fix: move stop sequence decoding logic out of upstream filter (envoyproxy#1238)

yuzisun · web-flow · commit 8d3310e550d3 · 2025-09-28T13:41:35.000Z
**Description** Currently `stop` field is defined as `any` type, when step field is passed as string array it is decoded as `[]any` which then errors out in the `processStop` function in the upstream filter. The fix is to define the stop union type on the openai compatible API schema and decode right there instead of in the translator. **Related Issues/PRs (if applicable)** Fixes envoyproxy#1237 --------- Signed-off-by: Dan Sun <dsun20@bloomberg.net>
diff --git a/internal/apischema/awsbedrock/awsbedrock.go b/internal/apischema/awsbedrock/awsbedrock.go
@@ -46,7 +46,7 @@ type InferenceConfiguration struct {
 
 	// A list of stop sequences. A stop sequence is a sequence of characters that
 	// causes the model to stop generating the response.
-	StopSequences []*string `json:"stopSequences,omitempty"`
+	StopSequences []string `json:"stopSequences,omitempty"`
 
 	// The likelihood of the model selecting higher-probability options while generating
 	// a response. A lower value makes the model more likely to choose higher-probability
diff --git a/internal/apischema/openai/openai.go b/internal/apischema/openai/openai.go
@@ -874,7 +874,7 @@ type ChatCompletionRequest struct {
 	// Stop string / array / null Defaults to null
 	// Up to 4 sequences where the API will stop generating further tokens.
 	// Docs: https://platform.openai.com/docs/api-reference/chat/create#chat-create-stop
-	Stop any `json:"stop,omitempty"`
+	Stop openai.ChatCompletionNewParamsStopUnion `json:"stop,omitzero"`
 
 	// Stream: If set, partial message deltas will be sent, like in ChatGPT.
 	// Docs: https://platform.openai.com/docs/api-reference/chat/create#chat-create-stream
diff --git a/internal/apischema/openai/openai_test.go b/internal/apischema/openai/openai_test.go
@@ -11,7 +11,9 @@ import (
 	"time"
 
 	"github.com/google/go-cmp/cmp"
+	"github.com/google/go-cmp/cmp/cmpopts"
 	"github.com/openai/openai-go/v2"
+	"github.com/openai/openai-go/v2/packages/param"
 	"github.com/stretchr/testify/require"
 	"k8s.io/utils/ptr"
 )
@@ -381,10 +383,12 @@ func TestOpenAIChatCompletionMessageUnmarshal(t *testing.T) {
 				},
 				MaxCompletionTokens: ptr.To[int64](1024),
 				ParallelToolCalls:   ptr.To(true),
-				Stop:                []any{"\n", "stop"},
-				ServiceTier:         openai.ChatCompletionNewParamsServiceTierFlex,
-				Verbosity:           openai.ChatCompletionNewParamsVerbosityLow,
-				ReasoningEffort:     openai.ReasoningEffortLow,
+				Stop: openai.ChatCompletionNewParamsStopUnion{
+					OfStringArray: []string{"\n", "stop"},
+				},
+				ServiceTier:     openai.ChatCompletionNewParamsServiceTierFlex,
+				Verbosity:       openai.ChatCompletionNewParamsVerbosityLow,
+				ReasoningEffort: openai.ReasoningEffortLow,
 			},
 		},
 		{
@@ -404,7 +408,31 @@ func TestOpenAIChatCompletionMessageUnmarshal(t *testing.T) {
 						},
 					},
 				},
-				Stop: "stop",
+				Stop: openai.ChatCompletionNewParamsStopUnion{
+					OfString: openai.Opt[string]("stop"),
+				},
+			},
+		},
+		{
+			name: "stop as array",
+			in: []byte(`{
+				"model": "gpu-o4",
+				"messages": [{"role": "user", "content": "hello"}],
+				"stop": ["</s>", "__end_tag__", "<|eot_id|>", "[answer_end]"]
+			}`),
+			out: &ChatCompletionRequest{
+				Model: "gpu-o4",
+				Messages: []ChatCompletionMessageParamUnion{
+					{
+						OfUser: &ChatCompletionUserMessageParam{
+							Role:    ChatMessageRoleUser,
+							Content: StringOrUserRoleContentUnion{Value: "hello"},
+						},
+					},
+				},
+				Stop: openai.ChatCompletionNewParamsStopUnion{
+					OfStringArray: []string{"</s>", "__end_tag__", "<|eot_id|>", "[answer_end]"},
+				},
 			},
 		},
 		{
@@ -438,8 +466,10 @@ func TestOpenAIChatCompletionMessageUnmarshal(t *testing.T) {
 				return
 			}
 			require.NoError(t, err)
-			if !cmp.Equal(&chatCompletion, tc.out) {
-				t.Errorf("UnmarshalOpenAIRequest(), diff(got, expected) = %s\n", cmp.Diff(&chatCompletion, tc.out))
+			if !cmp.Equal(&chatCompletion, tc.out,
+				cmpopts.IgnoreUnexported(openai.ChatCompletionNewParamsStopUnion{}, param.Opt[string]{})) {
+				t.Errorf("UnmarshalOpenAIRequest(), diff(got, expected) = %s\n", cmp.Diff(&chatCompletion, tc.out,
+					cmpopts.IgnoreUnexported(openai.ChatCompletionNewParamsStopUnion{}, param.Opt[string]{})))
 			}
 		})
 	}
diff --git a/internal/apischema/openai/vendor_fields_test.go b/internal/apischema/openai/vendor_fields_test.go
@@ -12,6 +12,8 @@ import (
 	"github.com/anthropics/anthropic-sdk-go"
 	"github.com/google/go-cmp/cmp"
 	"github.com/google/go-cmp/cmp/cmpopts"
+	"github.com/openai/openai-go/v2"
+	"github.com/openai/openai-go/v2/packages/param"
 	"github.com/stretchr/testify/require"
 	"google.golang.org/genai"
 	"k8s.io/utils/ptr"
@@ -233,7 +235,8 @@ func TestChatCompletionRequest_VendorFieldsExtraction(t *testing.T) {
 			}
 
 			require.NoError(t, err)
-			if diff := cmp.Diff(tt.expected, &actual, cmpopts.IgnoreUnexported(anthropic.ThinkingConfigEnabledParam{}, anthropic.ThinkingConfigParamUnion{})); diff != "" {
+			if diff := cmp.Diff(tt.expected, &actual, cmpopts.IgnoreUnexported(anthropic.ThinkingConfigEnabledParam{}, anthropic.ThinkingConfigParamUnion{},
+				openai.ChatCompletionNewParamsStopUnion{}, param.Opt[string]{})); diff != "" {
 				t.Errorf("ChatCompletionRequest mismatch (-expected +actual):\n%s", diff)
 			}
 		})
diff --git a/internal/extproc/translator/gemini_helper.go b/internal/extproc/translator/gemini_helper.go
@@ -442,18 +442,10 @@ func openAIReqToGeminiGenerationConfig(openAIReq *openai.ChatCompletionRequest)
 	if openAIReq.FrequencyPenalty != nil {
 		gc.FrequencyPenalty = openAIReq.FrequencyPenalty
 	}
-	stopSeq, err := processStop(openAIReq.Stop)
-	if err != nil {
-		return nil, err
-	}
-	if len(stopSeq) > 0 {
-		var stops []string
-		for _, s := range stopSeq {
-			if s != nil {
-				stops = append(stops, *s)
-			}
-		}
-		gc.StopSequences = stops
+	if openAIReq.Stop.OfString.Valid() {
+		gc.StopSequences = []string{openAIReq.Stop.OfString.String()}
+	} else if openAIReq.Stop.OfStringArray != nil {
+		gc.StopSequences = openAIReq.Stop.OfStringArray
 	}
 	return gc, nil
 }
diff --git a/internal/extproc/translator/gemini_helper_test.go b/internal/extproc/translator/gemini_helper_test.go
@@ -11,6 +11,7 @@ import (
 
 	"github.com/google/go-cmp/cmp"
 	"github.com/google/go-cmp/cmp/cmpopts"
+	openaigo "github.com/openai/openai-go/v2"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"google.golang.org/genai"
@@ -737,7 +738,9 @@ func TestOpenAIReqToGeminiGenerationConfig(t *testing.T) {
 				MaxTokens:        ptr.To(int64(256)),
 				PresencePenalty:  ptr.To(float32(1.1)),
 				FrequencyPenalty: ptr.To(float32(0.5)),
-				Stop:             []*string{ptr.To("stop1"), ptr.To("stop2")},
+				Stop: openaigo.ChatCompletionNewParamsStopUnion{
+					OfStringArray: []string{"stop1", "stop2"},
+				},
 			},
 			expectedGenerationConfig: &genai.GenerationConfig{
 				Temperature:      ptr.To(float32(0.7)),
@@ -757,6 +760,17 @@ func TestOpenAIReqToGeminiGenerationConfig(t *testing.T) {
 			input:                    &openai.ChatCompletionRequest{},
 			expectedGenerationConfig: &genai.GenerationConfig{},
 		},
+		{
+			name: "stop sequences",
+			input: &openai.ChatCompletionRequest{
+				Stop: openaigo.ChatCompletionNewParamsStopUnion{
+					OfString: openaigo.Opt[string]("stop1"),
+				},
+			},
+			expectedGenerationConfig: &genai.GenerationConfig{
+				StopSequences: []string{"stop1"},
+			},
+		},
 		{
 			name: "text",
 			input: &openai.ChatCompletionRequest{
diff --git a/internal/extproc/translator/openai_awsbedrock.go b/internal/extproc/translator/openai_awsbedrock.go
@@ -78,12 +78,10 @@ func (o *openAIToAWSBedrockTranslatorV1ChatCompletion) RequestBody(_ []byte, ope
 
 	bedrockReq.InferenceConfig.MaxTokens = cmp.Or(openAIReq.MaxCompletionTokens, openAIReq.MaxTokens)
 
-	stopSequence, err := processStop(openAIReq.Stop)
-	if err != nil {
-		return
-	}
-	if len(stopSequence) > 0 {
-		bedrockReq.InferenceConfig.StopSequences = stopSequence
+	if openAIReq.Stop.OfString.Valid() {
+		bedrockReq.InferenceConfig.StopSequences = []string{openAIReq.Stop.OfString.String()}
+	} else if openAIReq.Stop.OfStringArray != nil {
+		bedrockReq.InferenceConfig.StopSequences = openAIReq.Stop.OfStringArray
 	}
 
 	// Handle Anthropic vendor fields if present. Currently only supports thinking fields.
diff --git a/internal/extproc/translator/openai_awsbedrock_test.go b/internal/extproc/translator/openai_awsbedrock_test.go
@@ -19,6 +19,7 @@ import (
 	"github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream"
 	extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
 	"github.com/google/go-cmp/cmp"
+	openaigo "github.com/openai/openai-go/v2"
 	"github.com/stretchr/testify/require"
 	"k8s.io/utils/ptr"
 
@@ -712,11 +713,47 @@ func TestOpenAIToAWSBedrockTranslatorV1ChatCompletion_RequestBody(t *testing.T)
 						},
 					},
 				},
-				Stop: []*string{ptr.To("stop_only")},
+				Stop: openaigo.ChatCompletionNewParamsStopUnion{
+					OfString: openaigo.Opt[string]("stop_only"),
+				},
+			},
+			output: awsbedrock.ConverseInput{
+				InferenceConfig: &awsbedrock.InferenceConfiguration{
+					StopSequences: []string{"stop_only"},
+				},
+				Messages: []*awsbedrock.Message{
+					{
+						Role: openai.ChatMessageRoleUser,
+						Content: []*awsbedrock.ContentBlock{
+							{
+								Text: ptr.To("from-user"),
+							},
+						},
+					},
+				},
+			},
+		},
+		{
+			name: "test stop sequence",
+			input: openai.ChatCompletionRequest{
+				Model: "gpt-4o",
+				Messages: []openai.ChatCompletionMessageParamUnion{
+					{
+						OfUser: &openai.ChatCompletionUserMessageParam{
+							Content: openai.StringOrUserRoleContentUnion{
+								Value: "from-user",
+							},
+							Role: openai.ChatMessageRoleUser,
+						},
+					},
+				},
+				Stop: openaigo.ChatCompletionNewParamsStopUnion{
+					OfStringArray: []string{"stop1", "stop2"},
+				},
 			},
 			output: awsbedrock.ConverseInput{
 				InferenceConfig: &awsbedrock.InferenceConfiguration{
-					StopSequences: []*string{ptr.To("stop_only")},
+					StopSequences: []string{"stop1", "stop2"},
 				},
 				Messages: []*awsbedrock.Message{
 					{
diff --git a/internal/extproc/translator/openai_gcpanthropic.go b/internal/extproc/translator/openai_gcpanthropic.go
@@ -515,20 +515,10 @@ func buildAnthropicParams(openAIReq *openai.ChatCompletionRequest) (params *anth
 	if openAIReq.TopP != nil {
 		params.TopP = anthropic.Float(*openAIReq.TopP)
 	}
-
-	// Handle stop sequences.
-	stopSequences, err := processStop(openAIReq.Stop)
-	if err != nil {
-		return nil, err
-	}
-	if len(stopSequences) > 0 {
-		var stops []string
-		for _, s := range stopSequences {
-			if s != nil {
-				stops = append(stops, *s)
-			}
-		}
-		params.StopSequences = stops
+	if openAIReq.Stop.OfString.Valid() {
+		params.StopSequences = []string{openAIReq.Stop.OfString.String()}
+	} else if openAIReq.Stop.OfStringArray != nil {
+		params.StopSequences = openAIReq.Stop.OfStringArray
 	}
 
 	// 5. Handle Vendor specific fields.
diff --git a/internal/extproc/translator/openai_gcpanthropic_test.go b/internal/extproc/translator/openai_gcpanthropic_test.go
@@ -18,6 +18,7 @@ import (
 	"github.com/anthropics/anthropic-sdk-go/shared/constant"
 	anthropicVertex "github.com/anthropics/anthropic-sdk-go/vertex"
 	"github.com/google/go-cmp/cmp"
+	openaigo "github.com/openai/openai-go/v2"
 	"github.com/stretchr/testify/require"
 	"github.com/tidwall/gjson"
 	"k8s.io/utils/ptr"
@@ -209,6 +210,44 @@ func TestOpenAIToGCPAnthropicTranslatorV1ChatCompletion_RequestBody(t *testing.T
 		require.Equal(t, expectedPath, string(pathHeader[0].Header.RawValue))
 	})
 
+	t.Run("Test message param", func(t *testing.T) {
+		openaiRequest := &openai.ChatCompletionRequest{
+			Model:       claudeTestModel,
+			Messages:    []openai.ChatCompletionMessageParamUnion{},
+			Temperature: ptr.To(0.1),
+			MaxTokens:   ptr.To(int64(100)),
+			TopP:        ptr.To(0.1),
+			Stop: openaigo.ChatCompletionNewParamsStopUnion{
+				OfStringArray: []string{"stop1", "stop2"},
+			},
+		}
+		messageParam, err := buildAnthropicParams(openaiRequest)
+		require.NoError(t, err)
+		require.Equal(t, int64(100), messageParam.MaxTokens)
+		require.Equal(t, "0.1", messageParam.TopP.String())
+		require.Equal(t, "0.1", messageParam.Temperature.String())
+		require.Equal(t, []string{"stop1", "stop2"}, messageParam.StopSequences)
+	})
+
+	t.Run("Test single stop", func(t *testing.T) {
+		openaiRequest := &openai.ChatCompletionRequest{
+			Model:       claudeTestModel,
+			Messages:    []openai.ChatCompletionMessageParamUnion{},
+			Temperature: ptr.To(0.1),
+			MaxTokens:   ptr.To(int64(100)),
+			TopP:        ptr.To(0.1),
+			Stop: openaigo.ChatCompletionNewParamsStopUnion{
+				OfString: openaigo.Opt[string]("stop1"),
+			},
+		}
+		messageParam, err := buildAnthropicParams(openaiRequest)
+		require.NoError(t, err)
+		require.Equal(t, int64(100), messageParam.MaxTokens)
+		require.Equal(t, "0.1", messageParam.TopP.String())
+		require.Equal(t, "0.1", messageParam.Temperature.String())
+		require.Equal(t, []string{"stop1"}, messageParam.StopSequences)
+	})
+
 	t.Run("Invalid Temperature (above bound)", func(t *testing.T) {
 		invalidTempReq := &openai.ChatCompletionRequest{
 			Model:       claudeTestModel,
@@ -847,12 +886,6 @@ func TestHelperFunctions(t *testing.T) {
 		require.Error(t, err)
 		require.Contains(t, err.Error(), "invalid anthropic role")
 	})
-
-	t.Run("process stop with nil", func(t *testing.T) {
-		val, err := processStop(nil)
-		require.NoError(t, err)
-		require.Nil(t, val)
-	})
 }
 
 func TestTranslateOpenAItoAnthropicTools(t *testing.T) {
diff --git a/internal/extproc/translator/openai_gcpvertexai_test.go b/internal/extproc/translator/openai_gcpvertexai_test.go
diff --git a/internal/extproc/translator/util.go b/internal/extproc/translator/util.go
diff --git a/internal/extproc/translator/util_test.go b/internal/extproc/translator/util_test.go