model/openai: sanitize empty tool_calls chunks (#745)

bytethm · web-flow · commit 42a1bba5c615 · 2025-11-25T14:57:13.000+08:00
diff --git a/model/openai/openai.go b/model/openai/openai.go
@@ -1252,7 +1252,12 @@ func (m *Model) handleStreamingResponse(
 		// Always accumulate for correctness (tool call deltas are assembled later),
 		// but skip chunks with reasoning content that would cause the SDK accumulator to panic.
 		if !m.hasReasoningContent(chunk.Choices) {
-			acc.AddChunk(chunk)
+			// Sanitize chunks before feeding them into the upstream accumulator to
+			// avoid known panics when JSON.ToolCalls is marked present but the
+			// typed ToolCalls slice is empty, especially on finish_reason chunks.
+			sanitizedChunk := sanitizeChunkForAccumulator(chunk)
+
+			acc.AddChunk(sanitizedChunk)
 			if m.accumulateChunkUsage != nil {
 				accUsage, chunkUsage := completionUsageToModelUsage(acc.Usage), completionUsageToModelUsage(chunk.Usage)
 				usage := inverseOPENAISKDAddChunkUsage(accUsage, chunkUsage)
@@ -1304,6 +1309,44 @@ func (m *Model) handleStreamingResponse(
 	}
 }
 
+// sanitizeChunkForAccumulator returns a defensive copy of the given chunk that
+// avoids structures known to cause panics in the upstream OpenAI SDK
+// accumulator. In particular, it clears JSON.ToolCalls metadata when it is
+// marked present but the typed ToolCalls slice is empty on a finish_reason
+// chunk, which would otherwise lead to an out-of-range access in
+// chatCompletionResponseState.update.
+func sanitizeChunkForAccumulator(chunk openai.ChatCompletionChunk) openai.ChatCompletionChunk {
+	if len(chunk.Choices) == 0 {
+		return chunk
+	}
+
+	choice := chunk.Choices[0]
+	delta := choice.Delta
+
+	// Only sanitize the specific pattern that is known to be unsafe for the
+	// accumulator:
+	//   - finish_reason is set (e.g. "tool_calls" or "stop")
+	//   - JSON.ToolCalls is marked present
+	//   - but the typed ToolCalls slice is empty
+	if choice.FinishReason == "" ||
+		!delta.JSON.ToolCalls.Valid() ||
+		len(delta.ToolCalls) != 0 {
+		return chunk
+	}
+
+	sanitized := chunk
+	sanitized.Choices = make([]openai.ChatCompletionChunkChoice, len(chunk.Choices))
+	copy(sanitized.Choices, chunk.Choices)
+
+	// Clear the JSON metadata for ToolCalls on the first choice only. This
+	// preserves finish_reason and usage semantics while preventing the
+	// accumulator from treating this as a tool-call delta that must have at
+	// least one element.
+	sanitized.Choices[0].Delta.JSON.ToolCalls = respjson.Field{}
+
+	return sanitized
+}
+
 // updateToolCallIndexMapping updates the tool call index mapping.
 func (m *Model) updateToolCallIndexMapping(chunk openai.ChatCompletionChunk, idToIndexMap map[string]int) {
 	if len(chunk.Choices) > 0 && len(chunk.Choices[0].Delta.ToolCalls) > 0 {
diff --git a/model/openai/openai_test.go b/model/openai/openai_test.go
@@ -11,6 +11,7 @@ package openai
 
 import (
 	"context"
+	"encoding/json"
 	"fmt"
 	"io"
 	"net/http"
@@ -3892,6 +3893,121 @@ func TestToolCallIndexMapping(t *testing.T) {
 	})
 }
 
+// TestChatCompletionAccumulator_ToolCallsEmpty_Panics verifies that the
+// upstream openai-go accumulator panics when JSON.ToolCalls is marked
+// present but the typed ToolCalls slice is empty. This documents the
+// panic behavior that our framework needs to defensively guard against.
+func TestChatCompletionAccumulator_ToolCallsEmpty_Panics(t *testing.T) {
+	// This JSON mimics a streaming chunk where the provider sends an empty
+	// tool_calls array together with a tool_calls finish_reason.
+	raw := []byte(`{
+		"id": "test",
+		"object": "chat.completion.chunk",
+		"created": 1699200000,
+		"model": "gpt-3.5-turbo",
+		"choices": [
+			{
+				"index": 0,
+				"delta": {
+					"tool_calls": []
+				},
+				"finish_reason": "tool_calls"
+			}
+		]
+	}`)
+
+	var chunk openai.ChatCompletionChunk
+	require.NoError(t, json.Unmarshal(raw, &chunk), "failed to unmarshal test chunk")
+
+	defer func() {
+		if r := recover(); r == nil {
+			t.Fatalf("expected panic when adding chunk with JSON.ToolCalls valid and empty ToolCalls slice, but no panic occurred")
+		}
+	}()
+
+	var acc openai.ChatCompletionAccumulator
+	acc.AddChunk(chunk)
+}
+
+// TestSanitizeChunkForAccumulator_FinishReasonToolCalls verifies that
+// sanitizeChunkForAccumulator clears JSON.ToolCalls metadata for chunks
+// that have a finish_reason and an empty ToolCalls slice, which would
+// otherwise cause the upstream accumulator to panic.
+func TestSanitizeChunkForAccumulator_FinishReasonToolCalls(t *testing.T) {
+	raw := []byte(`{
+		"id": "test",
+		"object": "chat.completion.chunk",
+		"created": 1699200000,
+		"model": "gpt-3.5-turbo",
+		"choices": [
+			{
+				"index": 0,
+				"delta": {
+					"content": "",
+					"tool_calls": []
+				},
+				"finish_reason": "tool_calls"
+			}
+		]
+	}`)
+
+	var chunk openai.ChatCompletionChunk
+	require.NoError(t, json.Unmarshal(raw, &chunk))
+	require.Len(t, chunk.Choices, 1)
+	require.Equal(t, "tool_calls", chunk.Choices[0].FinishReason)
+	require.True(t, chunk.Choices[0].Delta.JSON.ToolCalls.Valid())
+	require.Len(t, chunk.Choices[0].Delta.ToolCalls, 0)
+
+	sanitized := sanitizeChunkForAccumulator(chunk)
+
+	// Original chunk should remain unchanged.
+	require.True(t, chunk.Choices[0].Delta.JSON.ToolCalls.Valid())
+
+	// Sanitized chunk should have ToolCalls metadata cleared but still carry
+	// the same finish_reason and an empty typed ToolCalls slice.
+	require.Len(t, sanitized.Choices, 1)
+	assert.Equal(t, "tool_calls", sanitized.Choices[0].FinishReason)
+	assert.False(t, sanitized.Choices[0].Delta.JSON.ToolCalls.Valid())
+	assert.Len(t, sanitized.Choices[0].Delta.ToolCalls, 0)
+}
+
+// TestSanitizeChunkForAccumulator_NoFinishReason ensures that chunks without
+// a finish_reason are left untouched even if they carry an empty ToolCalls
+// array, since these are safe for the accumulator (it will use the content
+// branch instead of the tool_calls branch).
+func TestSanitizeChunkForAccumulator_NoFinishReason(t *testing.T) {
+	raw := []byte(`{
+		"id": "test",
+		"object": "chat.completion.chunk",
+		"created": 1699200000,
+		"model": "gpt-3.5-turbo",
+		"choices": [
+			{
+				"index": 0,
+				"delta": {
+					"content": "hello",
+					"tool_calls": []
+				},
+				"finish_reason": null
+			}
+		]
+	}`)
+
+	var chunk openai.ChatCompletionChunk
+	require.NoError(t, json.Unmarshal(raw, &chunk))
+	require.Len(t, chunk.Choices, 1)
+	require.Equal(t, "", chunk.Choices[0].FinishReason)
+	require.True(t, chunk.Choices[0].Delta.JSON.ToolCalls.Valid())
+	require.Len(t, chunk.Choices[0].Delta.ToolCalls, 0)
+
+	sanitized := sanitizeChunkForAccumulator(chunk)
+
+	// Chunks without finish_reason should not be modified.
+	assert.Equal(t, chunk, sanitized)
+	assert.True(t, sanitized.Choices[0].Delta.JSON.ToolCalls.Valid())
+	assert.Len(t, sanitized.Choices[0].Delta.ToolCalls, 0)
+}
+
 // TestStreamingCallbackIntegration tests the integration of streaming callbacks.
 func TestStreamingCallbackIntegration(t *testing.T) {
 	t.Run("streaming with chat stream complete callback", func(t *testing.T) {