Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changeset/bright-fox-hill.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
'@openai/agents-extensions': patch
'@openai/agents-realtime': patch
---

Fix: clamp and floor `audio_end_ms` in interrupts to prevent Realtime API error with fractional speeds (#315)
34 changes: 31 additions & 3 deletions packages/agents-extensions/test/index.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,37 @@ describe('TwilioRealtimeTransportLayer', () => {
transport._audioLengthMs = 500;
transport._interrupt(0, true);

const call = sendEventSpy.mock.calls.find(
(c) => c[0]?.type === 'conversation.item.truncate',
);
const call = sendEventSpy.mock.calls
.filter((c) => c[0]?.type === 'conversation.item.truncate')
.at(-1);
expect(call?.[0].audio_end_ms).toBe(50);
});

test('interrupt clamps overshoot and emits integer audio_end_ms', async () => {
const twilio = new FakeTwilioWebSocket();
const transport = new TwilioRealtimeTransportLayer({
twilioWebSocket: twilio as any,
});

const sendEventSpy = vi.spyOn(
transport as TwilioRealtimeTransportLayer,
'sendEvent',
);

await transport.connect({
apiKey: 'ek_test',
initialSessionConfig: { speed: 1.1 },
});
sendEventSpy.mockClear();

// @ts-expect-error - we're testing protected fields.
transport._audioLengthMs = 20;
transport._interrupt(0, true);

const call = sendEventSpy.mock.calls
.filter((c) => c[0]?.type === 'conversation.item.truncate')
.at(-1);
expect(call?.[0].audio_end_ms).toBe(20);
expect(Number.isInteger(call?.[0].audio_end_ms)).toBe(true);
});
});
7 changes: 5 additions & 2 deletions packages/agents-realtime/src/openaiRealtimeWebsocket.ts
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,7 @@ export class OpenAIRealtimeWebSocket
* @param elapsedTime - The elapsed time since the response started.
*/
_interrupt(elapsedTime: number, cancelOngoingResponse: boolean = true) {
if (elapsedTime < 0 || elapsedTime > this._audioLengthMs) {
if (elapsedTime < 0) {
return;
}

Expand All @@ -369,12 +369,15 @@ export class OpenAIRealtimeWebSocket
this._cancelResponse();
}

const length = this._audioLengthMs ?? Number.POSITIVE_INFINITY;
const audio_end_ms = Math.max(0, Math.min(Math.floor(elapsedTime), length));

this.emit('audio_interrupted');
this.sendEvent({
type: 'conversation.item.truncate',
item_id: this.#currentItemId,
content_index: this.#currentAudioContentIndex,
audio_end_ms: elapsedTime,
audio_end_ms,
});
}

Expand Down
117 changes: 110 additions & 7 deletions packages/agents-realtime/test/openaiRealtimeWebsocket.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,9 @@ describe('OpenAIRealtimeWebSocket', () => {
const ws = new OpenAIRealtimeWebSocket();
const audioSpy = vi.fn();
ws.on('audio', audioSpy);
const sendSpy = vi.spyOn(ws, 'sendEvent');
const sendSpy = vi
.spyOn(ws as any, 'sendEvent')
.mockImplementation(() => {});
const interruptSpy = vi.spyOn(ws, 'interrupt');
const p = ws.connect({ apiKey: 'ek', model: 'm' });
await vi.runAllTimersAsync();
Expand Down Expand Up @@ -109,11 +111,13 @@ describe('OpenAIRealtimeWebSocket', () => {
});
expect(interruptSpy).toHaveBeenCalled();
expect(
sendSpy.mock.calls.some((c) => c[0].type === 'response.cancel'),
sendSpy.mock.calls.some(
(c: unknown[]) => (c[0] as any).type === 'response.cancel',
),
).toBe(true);
expect(
sendSpy.mock.calls.some(
(c) => c[0].type === 'conversation.item.truncate',
(c: unknown[]) => (c[0] as any).type === 'conversation.item.truncate',
),
).toBe(true);

Expand Down Expand Up @@ -147,7 +151,9 @@ describe('OpenAIRealtimeWebSocket', () => {
}),
});
expect(
sendSpy.mock.calls.every((c) => c[0].type !== 'response.cancel'),
sendSpy.mock.calls.every(
(c: unknown[]) => (c[0] as any).type !== 'response.cancel',
),
).toBe(true);
});

Expand All @@ -158,7 +164,9 @@ describe('OpenAIRealtimeWebSocket', () => {

it('close resets state so interrupt does nothing', async () => {
const ws = new OpenAIRealtimeWebSocket();
const sendSpy = vi.spyOn(ws, 'sendEvent');
const sendSpy = vi
.spyOn(OpenAIRealtimeWebSocket.prototype as any, 'sendEvent')
.mockImplementation(() => {});
const p = ws.connect({ apiKey: 'ek', model: 'm' });
await vi.runAllTimersAsync();
await p;
Expand Down Expand Up @@ -197,6 +205,99 @@ describe('OpenAIRealtimeWebSocket', () => {
expect(baseSpy).toHaveBeenCalled();
});

it('_interrupt quantizes and clamps elapsedTime', () => {
const ws = new OpenAIRealtimeWebSocket();
const sendSpy = vi
.spyOn(OpenAIRealtimeWebSocket.prototype as any, 'sendEvent')
.mockImplementation(() => {});
// @ts-expect-error - testing protected field.
ws._audioLengthMs = 100;
ws._interrupt(110.9, false);
let call = sendSpy.mock.calls.find(
(c: unknown[]) => (c[0] as any).type === 'conversation.item.truncate',
);
expect((call?.[0] as any).audio_end_ms).toBe(100);
sendSpy.mockClear();
// @ts-expect-error - testing protected field.
ws._audioLengthMs = 200;
ws._interrupt(123.7, false);
call = sendSpy.mock.calls.find(
(c: unknown[]) => (c[0] as any).type === 'conversation.item.truncate',
);
expect((call?.[0] as any).audio_end_ms).toBe(123);
sendSpy.mockRestore();
});

it('_interrupt floors sub-millisecond elapsedTime', () => {
const ws = new OpenAIRealtimeWebSocket();
const sendSpy = vi
.spyOn(OpenAIRealtimeWebSocket.prototype as any, 'sendEvent')
.mockImplementation(() => {});
// @ts-expect-error - testing protected field.
ws._audioLengthMs = 100;
ws._interrupt(0.9, false);
const call = sendSpy.mock.calls.find(
(c: unknown[]) => (c[0] as any).type === 'conversation.item.truncate',
);
expect((call?.[0] as any).audio_end_ms).toBe(0);
expect(Number.isInteger((call?.[0] as any).audio_end_ms)).toBe(true);
sendSpy.mockRestore();
});

it('_interrupt clamps overshoot elapsedTime', () => {
const ws = new OpenAIRealtimeWebSocket();
const sendSpy = vi
.spyOn(OpenAIRealtimeWebSocket.prototype as any, 'sendEvent')
.mockImplementation(() => {});
// @ts-expect-error - testing protected field.
ws._audioLengthMs = 42;
ws._interrupt(42.6, false);
const call = sendSpy.mock.calls.find(
(c: unknown[]) => (c[0] as any).type === 'conversation.item.truncate',
);
expect((call?.[0] as any).audio_end_ms).toBe(42);
expect(Number.isInteger((call?.[0] as any).audio_end_ms)).toBe(true);
sendSpy.mockRestore();
});

it('interrupt payload is integer with fractional speed', async () => {
const ws = new OpenAIRealtimeWebSocket();
const sendSpy = vi
.spyOn(OpenAIRealtimeWebSocket.prototype as any, 'sendEvent')
.mockImplementation(() => {});
const p = ws.connect({
apiKey: 'ek',
model: 'm',
initialSessionConfig: { speed: 1.1 },
} as any);
await vi.runAllTimersAsync();
await p;
// @ts-expect-error - testing protected field.
ws._audioLengthMs = 200;
ws._interrupt(123.4, false);
const call = sendSpy.mock.calls.find(
(c: unknown[]) => (c[0] as any).type === 'conversation.item.truncate',
);
expect(Number.isInteger((call?.[0] as any).audio_end_ms)).toBe(true);
sendSpy.mockRestore();
});

it('interrupt payload is integer with speed 1', () => {
const ws = new OpenAIRealtimeWebSocket();
const sendSpy = vi
.spyOn(OpenAIRealtimeWebSocket.prototype as any, 'sendEvent')
.mockImplementation(() => {});
// @ts-expect-error - testing protected field.
ws._audioLengthMs = 200;
ws._interrupt(123.4, false);
const call = sendSpy.mock.calls.find(
(c: unknown[]) => (c[0] as any).type === 'conversation.item.truncate',
);
expect((call?.[0] as any).audio_end_ms).toBe(123);
expect(Number.isInteger((call?.[0] as any).audio_end_ms)).toBe(true);
sendSpy.mockRestore();
});

it('full interrupt/_interrupt flow', async () => {
const ws = new OpenAIRealtimeWebSocket();
const sendSpy = vi.spyOn(ws, 'sendEvent');
Expand Down Expand Up @@ -230,11 +331,13 @@ describe('OpenAIRealtimeWebSocket', () => {
}),
});
expect(
sendSpy.mock.calls.some((c) => c[0].type === 'response.cancel'),
sendSpy.mock.calls.some(
(c: unknown[]) => (c[0] as any).type === 'response.cancel',
),
).toBe(true);
expect(
sendSpy.mock.calls.some(
(c) => c[0].type === 'conversation.item.truncate',
(c: unknown[]) => (c[0] as any).type === 'conversation.item.truncate',
),
).toBe(true);
sendSpy.mockClear();
Expand Down