Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
131 changes: 111 additions & 20 deletions plugins/google/src/realtime/realtime_api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ const OUTPUT_AUDIO_CHANNELS = 1;

const LK_GOOGLE_DEBUG = Number(process.env.LK_GOOGLE_DEBUG ?? 0);

// Stop rejecting tool calls after this many in a row to avoid a loop (toolChoice="none").
const MAX_TOOL_CALL_REJECTIONS = 3;

// WebSocket close codes (RFC 6455)
const WS_CLOSE_NORMAL = 1000;
/**
Expand Down Expand Up @@ -105,6 +108,7 @@ interface RealtimeOptions {
thinkingConfig?: types.ThinkingConfig;
toolBehavior?: types.Behavior;
toolResponseScheduling?: types.FunctionResponseScheduling;
toolChoice?: llm.ToolChoice | null;
}

/**
Expand Down Expand Up @@ -472,6 +476,7 @@ export class RealtimeSession extends llm.RealtimeSession {
private toolCallStatuses = new Map<string, ToolCallStatus>();
private toolResponseCallIds = new WeakMap<types.FunctionResponse, string>();
private generationPendingTurnComplete?: ResponseGeneration;
private rejectedToolCalls = 0;

#client: GoogleGenAI;
#task: Promise<void>;
Expand Down Expand Up @@ -565,21 +570,7 @@ export class RealtimeSession extends llm.RealtimeSession {

for (const item of ctx.items) {
if (item.type === 'function_call_output') {
const response: types.FunctionResponse = {
name: item.name,
response: { output: item.output },
};

if (this.options.toolResponseScheduling !== undefined) {
// vertexai currently doesn't support the scheduling parameter, gemini api defaults to idle
// it's the user's responsibility to avoid this parameter when using vertexai
response.scheduling = this.options.toolResponseScheduling;
}

if (!vertexai) {
// vertexai does not support id in FunctionResponse
response.id = item.callId;
}
const response = this.createFunctionResponse(item, vertexai);
this.toolResponseCallIds.set(response, item.callId);

const status = this.toolCallStatuses.get(item.callId);
Expand All @@ -596,6 +587,29 @@ export class RealtimeSession extends llm.RealtimeSession {
return toolResponses.length > 0 ? { functionResponses: toolResponses } : undefined;
}

private createFunctionResponse(
output: llm.FunctionCallOutput,
vertexai: boolean,
): types.FunctionResponse {
const response: types.FunctionResponse = {
name: output.name,
response: output.isError ? { error: output.output } : { output: output.output },
};

if (this.options.toolResponseScheduling !== undefined) {
// vertexai currently doesn't support the scheduling parameter, gemini api defaults to idle
// it's the user's responsibility to avoid this parameter when using vertexai
response.scheduling = this.options.toolResponseScheduling;
}

if (!vertexai) {
// vertexai does not support id in FunctionResponse
response.id = output.callId;
}

return response;
}

updateOptions(options: {
voice?: Voice | string;
temperature?: number;
Expand Down Expand Up @@ -629,7 +643,18 @@ export class RealtimeSession extends llm.RealtimeSession {
}

if (options.toolChoice !== undefined) {
this.#logger.warn('toolChoice is not supported by the Google Realtime API.');
// Gemini has no per-response toolChoice; "none" is emulated by rejecting any tool
// call emitted during the turn.
this.options.toolChoice = options.toolChoice;
if (options.toolChoice === 'none') {
this.#logger.warn(
"the Google Realtime API has no toolChoice='none'; tool calls emitted this turn will be rejected so the model replies directly.",
);
} else if (options.toolChoice !== null && options.toolChoice !== 'auto') {
this.#logger.warn(
`toolChoice='${options.toolChoice}' is not supported by the Google Realtime API, falling back to 'auto'.`,
);
}
}

if (shouldRestart) {
Expand Down Expand Up @@ -1217,6 +1242,13 @@ export class RealtimeSession extends llm.RealtimeSession {
unlock();
}

if (response.toolCall && this.options.toolChoice === 'none') {
// Reject without opening a generation, so a pending generateReply stays bound to the
// model's eventual reply and tools stay suppressed for the whole turn.
this.rejectToolCalls(response.toolCall.functionCalls ?? []);
return;
}
Comment on lines +1245 to +1250

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🚩 Early return in onReceiveMessage drops non-toolCall fields when toolChoice='none'

At line 1245-1250, when response.toolCall is present and toolChoice === 'none', the entire message is returned early after calling rejectToolCalls. This means any other fields on the same LiveServerMessage (like serverContent, sessionResumptionUpdate, usageMetadata, goAway, toolCallCancellation) are silently dropped. In the Gemini Live protocol, toolCall messages typically don't carry other significant fields alongside them, so this is unlikely to cause issues. However, sessionResumptionUpdate can theoretically accompany any message. If a session resumption handle is lost, it could affect reconnection reliability. This is a minor concern given the protocol's typical behavior.

Open in Devin Review

Was this helpful? React with 👍 or 👎 to provide feedback.


const shouldStartNewGeneration =
!this.currentGeneration || this.currentGeneration._done || !!this.pendingGenerationFut;
if (shouldStartNewGeneration) {
Expand Down Expand Up @@ -1490,6 +1522,7 @@ export class RealtimeSession extends llm.RealtimeSession {
}

private startNewGeneration(): void {
this.rejectedToolCalls = 0;

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🚩 rejectedToolCalls counter not reset when toolChoice changes away from 'none'

The rejectedToolCalls counter is only reset in startNewGeneration() at line 1525. If toolChoice is changed from 'none' to 'auto' via updateOptions() mid-turn (e.g. after the agent_activity resets it at agents/src/voice/agent_activity.ts:3922-3923), and a subsequent server message arrives before a new generation starts, the stale rejectedToolCalls > 0 could cause handleServerContent or handleUsageMetadata to incorrectly suppress content. However, in practice, the agent_activity resets toolChoice in a finally block after the generation task completes, and a new generation would reset the counter. The window for this race is very narrow — it would require the model to send content between the toolChoice reset and the next generation start. This is worth noting but unlikely to be hit in practice.

Open in Devin Review

Was this helpful? React with 👍 or 👎 to provide feedback.

const previousGen = this.currentGeneration;
const previousHadOpenFunctionChannel = previousGen && !previousGen.functionChannel.closed;

Expand Down Expand Up @@ -1569,8 +1602,15 @@ export class RealtimeSession extends llm.RealtimeSession {
}

private handleServerContent(serverContent: types.LiveServerContent): void {
if (!this.currentGeneration) {
this.#logger.warn('received server content but no active generation.');
if (!this.currentGeneration || (this.currentGeneration._done && this.rejectedToolCalls > 0)) {
if (this.rejectedToolCalls > 0) {
this.#logger.debug(
{ serverContent },
'ignoring server content from a rejected tool call turn',
);
} else {
this.#logger.warn('received server content but no active generation.');
}
return;
}

Expand Down Expand Up @@ -1677,6 +1717,43 @@ export class RealtimeSession extends llm.RealtimeSession {
}
}

private rejectToolCalls(functionCalls: types.FunctionCall[]): void {
if (functionCalls.length === 0) {
return;
}

this.rejectedToolCalls += 1;
const functions = functionCalls.map((fncCall) => fncCall.name);
if (this.rejectedToolCalls > MAX_TOOL_CALL_REJECTIONS) {
// Stop responding to break the loop; the user can still interrupt by voice.
if (this.rejectedToolCalls === MAX_TOOL_CALL_REJECTIONS + 1) {
this.#logger.error(
{ functions },
`model keeps calling tools despite toolChoice='none'; stopping after ${MAX_TOOL_CALL_REJECTIONS} rejections to avoid a loop`,
);
}
return;
}

this.#logger.warn({ functions }, "rejecting tool call requested while toolChoice='none'");
const functionResponses = functionCalls.map((fncCall) =>
this.createFunctionResponse(
new llm.FunctionCallOutput({
name: fncCall.name ?? '',
callId: fncCall.id ?? '',
output: 'Tool calls are disabled for this turn, respond to the user directly.',
isError: true,
}),
this.options.vertexai,
),
);

this.sendClientEvent({
type: 'tool_response',
value: { functionResponses },
});
}

private handleToolCall(toolCall: types.LiveServerToolCall): void {
if (!this.currentGeneration) {
this.#logger.warn('received tool call but no active generation.');
Expand Down Expand Up @@ -1768,8 +1845,12 @@ export class RealtimeSession extends llm.RealtimeSession {
}

private handleUsageMetadata(usage: types.UsageMetadata): void {
if (!this.currentGeneration) {
this.#logger.debug('Received usage metadata but no active generation');
if (!this.currentGeneration || (this.currentGeneration._done && this.rejectedToolCalls > 0)) {
if (this.rejectedToolCalls > 0) {
this.#logger.debug('ignoring usage metadata from a rejected tool call turn');
} else {
this.#logger.debug('Received usage metadata but no active generation');
}
return;
}

Expand Down Expand Up @@ -1884,6 +1965,16 @@ export class RealtimeSession extends llm.RealtimeSession {
if (this.earlyCompletionPending) {
return false;
}
if (this.rejectedToolCalls > 0 && response.serverContent) {
const serverContent = response.serverContent;
const hasModelOutput =
!!serverContent.modelTurn ||
serverContent.outputTranscription != null ||
serverContent.inputTranscription != null;
if (!hasModelOutput) {
return false;
}
}
if (response.toolCall) {
return true;
}
Expand Down