Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/preserve-transcription-item-id.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
'@livekit/agents': patch
---

Preserve realtime transcription item IDs on user input transcription events.
24 changes: 24 additions & 0 deletions agents/src/voice/agent_activity.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import { Future, Task } from '../utils.js';
import { _getActivityTaskInfo } from './agent.js';
import { AgentActivity } from './agent_activity.js';
import type { PreemptiveGenerationInfo } from './audio_recognition.js';
import type { AgentSessionEventTypes, UserInputTranscribedEvent } from './events.js';
import { SpeechHandle } from './speech_handle.js';

const agentMocks = vi.hoisted(() => ({
Expand Down Expand Up @@ -132,6 +133,29 @@ function buildMainTaskRunner() {
}

describe('AgentActivity - mainTask', () => {
it('preserves realtime user input transcription item IDs', () => {
const capturedEvents: UserInputTranscribedEvent[] = [];
const activity = Object.create(AgentActivity.prototype) as AgentActivity;
Object.assign(activity, {
agentSession: {
emit: (_type: AgentSessionEventTypes, ev: UserInputTranscribedEvent) => {
capturedEvents.push(ev);
},
},
});

activity.onInputAudioTranscriptionCompleted({
itemId: 'item_123',
transcript: 'hello',
isFinal: false,
});

expect(capturedEvents).toHaveLength(1);
expect(capturedEvents[0]?.transcript).toBe('hello');
expect(capturedEvents[0]?.isFinal).toBe(false);
expect(capturedEvents[0]?.itemId).toBe('item_123');
});

it('should recover when speech handle is interrupted after authorization', async () => {
const { fakeActivity, mainTask, speechQueue, q_updated } = buildMainTaskRunner();

Expand Down
1 change: 1 addition & 0 deletions agents/src/voice/agent_activity.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1168,6 +1168,7 @@ export class AgentActivity implements RecognitionHooks {
createUserInputTranscribedEvent({
transcript: ev.transcript,
isFinal: ev.isFinal,
itemId: ev.itemId,
}),
);

Expand Down
5 changes: 5 additions & 0 deletions agents/src/voice/events.ts
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,8 @@ export type UserInputTranscribedEvent = {
type: 'user_input_transcribed';
transcript: string;
isFinal: boolean;
/** Provider-specific ID for the transcribed input item, when available. */
itemId: string | null;
// TODO(AJS-106): add multi participant support
/** Not supported yet. Always null by default. */
speakerId: string | null;
Expand All @@ -104,19 +106,22 @@ export type UserInputTranscribedEvent = {
export const createUserInputTranscribedEvent = ({
transcript,
isFinal,
itemId = null,
speakerId = null,
language = null,
createdAt = Date.now(),
}: {
transcript: string;
isFinal: boolean;
itemId?: string | null;
speakerId?: string | null;
language?: LanguageCode | null;
createdAt?: number;
}): UserInputTranscribedEvent => ({
type: 'user_input_transcribed',
transcript,
isFinal,
itemId,
speakerId,
language,
createdAt,
Expand Down
Loading