optimize: use openai go sdk ChatCompletion replace map struct (#246)

yuluo-yx · web-flow · commit cfefd0075f22 · 2025-09-27T10:40:03.000-04:00
Signed-off-by: yuluo-yx &lt;yuluo08290126@gmail.com&gt;
diff --git a/src/semantic-router/pkg/extproc/caching_test.go b/src/semantic-router/pkg/extproc/caching_test.go
@@ -6,6 +6,7 @@ import (
 
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
+	"github.com/openai/openai-go"
 
 	ext_proc "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
 
@@ -83,18 +84,18 @@ var _ = Describe("Caching Functionality", func() {
 		}
 
 		// Simulate response processing
-		openAIResponse := map[string]interface{}{
-			"choices": []map[string]interface{}{
+		openAIResponse := openai.ChatCompletion{
+			Choices: []openai.ChatCompletionChoice{
 				{
-					"message": map[string]interface{}{
-						"content": "Cached response",
+					Message: openai.ChatCompletionMessage{
+						Content: "Cached response.",
 					},
 				},
 			},
-			"usage": map[string]interface{}{
-				"prompt_tokens":     10,
-				"completion_tokens": 5,
-				"total_tokens":      15,
+			Usage: openai.CompletionUsage{
+				PromptTokens:     10,
+				CompletionTokens: 5,
+				TotalTokens:      15,
 			},
 		}
 
@@ -142,18 +143,18 @@ var _ = Describe("Caching Functionality", func() {
 			Expect(err).To(Or(BeNil(), HaveOccurred()))
 
 			// Process response
-			openAIResponse := map[string]interface{}{
-				"choices": []map[string]interface{}{
+			openAIResponse := openai.ChatCompletion{
+				Choices: []openai.ChatCompletionChoice{
 					{
-						"message": map[string]interface{}{
-							"content": "Machine learning is a subset of artificial intelligence...",
+						Message: openai.ChatCompletionMessage{
+							Content: "Machine learning is a subset of artificial intelligence...",
 						},
 					},
 				},
-				"usage": map[string]interface{}{
-					"prompt_tokens":     20,
-					"completion_tokens": 30,
-					"total_tokens":      50,
+				Usage: openai.CompletionUsage{
+					PromptTokens:     20,
+					CompletionTokens: 30,
+					TotalTokens:      50,
 				},
 			}
 
diff --git a/src/semantic-router/pkg/extproc/metrics_integration_test.go b/src/semantic-router/pkg/extproc/metrics_integration_test.go
@@ -6,6 +6,7 @@ import (
 
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
+	"github.com/openai/openai-go"
 
 	core "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
 	ext_proc "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
@@ -86,23 +87,24 @@ var _ = Describe("Metrics recording", func() {
 		beforePrompt := getHistogramSampleCount("llm_prompt_tokens_per_request", ctx.RequestModel)
 		beforeCompletion := getHistogramSampleCount("llm_completion_tokens_per_request", ctx.RequestModel)
 
-		openAIResponse := map[string]interface{}{
-			"id":      "chatcmpl-xyz",
-			"object":  "chat.completion",
-			"created": time.Now().Unix(),
-			"model":   ctx.RequestModel,
-			"usage": map[string]interface{}{
-				"prompt_tokens":     10,
-				"completion_tokens": 5,
-				"total_tokens":      15,
+		openAIResponse := openai.ChatCompletion{
+			ID:      "chatcmpl-xyz",
+			Object:  "chat.completion",
+			Created: time.Now().Unix(),
+			Model:   ctx.RequestModel,
+			Usage: openai.CompletionUsage{
+				PromptTokens:     10,
+				CompletionTokens: 5,
+				TotalTokens:      15,
 			},
-			"choices": []map[string]interface{}{
+			Choices: []openai.ChatCompletionChoice{
 				{
-					"message":       map[string]interface{}{"role": "assistant", "content": "Hello"},
-					"finish_reason": "stop",
+					Message:      openai.ChatCompletionMessage{Role: "assistant", Content: "Hello"},
+					FinishReason: "stop",
 				},
 			},
 		}
+
 		respBodyJSON, err := json.Marshal(openAIResponse)
 		Expect(err).NotTo(HaveOccurred())
 
diff --git a/src/semantic-router/pkg/extproc/request_processing_test.go b/src/semantic-router/pkg/extproc/request_processing_test.go
@@ -6,6 +6,7 @@ import (
 
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
+	"github.com/openai/openai-go"
 
 	core "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
 	ext_proc "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
@@ -360,23 +361,24 @@ var _ = Describe("Request Processing", func() {
 
 	Describe("handleResponseBody", func() {
 		It("should process response body with token parsing", func() {
-			openAIResponse := map[string]interface{}{
-				"id":      "chatcmpl-123",
-				"object":  "chat.completion",
-				"created": time.Now().Unix(),
-				"model":   "model-a",
-				"usage": map[string]interface{}{
-					"prompt_tokens":     150,
-					"completion_tokens": 50,
-					"total_tokens":      200,
+
+			openAIResponse := openai.ChatCompletion{
+				ID:      "chatcmpl-123",
+				Object:  "chat.completion",
+				Created: time.Now().Unix(),
+				Model:   "model-a",
+				Usage: openai.CompletionUsage{
+					PromptTokens:     150,
+					CompletionTokens: 50,
+					TotalTokens:      200,
 				},
-				"choices": []map[string]interface{}{
+				Choices: []openai.ChatCompletionChoice{
 					{
-						"message": map[string]interface{}{
-							"role":    "assistant",
-							"content": "This is a test response",
+						Message: openai.ChatCompletionMessage{
+							Role:    "assistant",
+							Content: "This is a test response",
 						},
-						"finish_reason": "stop",
+						FinishReason: "stop",
 					},
 				},
 			}
diff --git a/src/semantic-router/pkg/utils/http/response.go b/src/semantic-router/pkg/utils/http/response.go
@@ -8,6 +8,7 @@ import (
 	core "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
 	ext_proc "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
 	typev3 "github.com/envoyproxy/go-control-plane/envoy/type/v3"
+	"github.com/openai/openai-go"
 	"github.com/vllm-project/semantic-router/src/semantic-router/pkg/metrics"
 	"github.com/vllm-project/semantic-router/src/semantic-router/pkg/observability"
 )
@@ -18,26 +19,26 @@ func CreatePIIViolationResponse(model string, deniedPII []string) *ext_proc.Proc
 	metrics.RecordPIIViolations(model, deniedPII)
 
 	// Create OpenAI-compatible response format for PII violations
-	openAIResponse := map[string]interface{}{
-		"id":                 fmt.Sprintf("chatcmpl-pii-violation-%d", time.Now().Unix()),
-		"object":             "chat.completion",
-		"created":            time.Now().Unix(),
-		"model":              model,
-		"system_fingerprint": "router_pii_policy",
-		"choices": []map[string]interface{}{
+	unixTimeStep := time.Now().Unix()
+	openAIResponse := openai.ChatCompletion{
+		ID:      fmt.Sprintf("chatcmpl-pii-violation-%d", unixTimeStep),
+		Object:  "chat.completion",
+		Created: unixTimeStep,
+		Model:   model,
+		Choices: []openai.ChatCompletionChoice{
 			{
-				"index": 0,
-				"message": map[string]interface{}{
-					"role":    "assistant",
-					"content": fmt.Sprintf("I cannot process this request as it contains personally identifiable information (%v) that is not allowed for the '%s' model according to the configured privacy policy. Please remove any sensitive information and try again.", deniedPII, model),
+				Index: 0,
+				Message: openai.ChatCompletionMessage{
+					Role:    "assistant",
+					Content: fmt.Sprintf("I cannot process this request as it contains personally identifiable information (%v) that is not allowed for the '%s' model according to the configured privacy policy. Please remove any sensitive information and try again.", deniedPII, model),
 				},
-				"finish_reason": "content_filter",
+				FinishReason: "content_filter",
 			},
 		},
-		"usage": map[string]interface{}{
-			"prompt_tokens":     0,
-			"completion_tokens": 0,
-			"total_tokens":      0,
+		Usage: openai.CompletionUsage{
+			PromptTokens:     0,
+			CompletionTokens: 0,
+			TotalTokens:      0,
 		},
 	}
 
@@ -81,26 +82,25 @@ func CreatePIIViolationResponse(model string, deniedPII []string) *ext_proc.Proc
 // CreateJailbreakViolationResponse creates an HTTP response for jailbreak detection violations
 func CreateJailbreakViolationResponse(jailbreakType string, confidence float32) *ext_proc.ProcessingResponse {
 	// Create OpenAI-compatible response format for jailbreak violations
-	openAIResponse := map[string]interface{}{
-		"id":                 fmt.Sprintf("chatcmpl-jailbreak-blocked-%d", time.Now().Unix()),
-		"object":             "chat.completion",
-		"created":            time.Now().Unix(),
-		"model":              "security-filter",
-		"system_fingerprint": "router_prompt_guard",
-		"choices": []map[string]interface{}{
+	openAIResponse := openai.ChatCompletion{
+		ID:      fmt.Sprintf("chatcmpl-jailbreak-blocked-%d", time.Now().Unix()),
+		Object:  "chat.completion",
+		Created: time.Now().Unix(),
+		Model:   "security-filter",
+		Choices: []openai.ChatCompletionChoice{
 			{
-				"index": 0,
-				"message": map[string]interface{}{
-					"role":    "assistant",
-					"content": fmt.Sprintf("I cannot process this request as it appears to contain a potential jailbreak attempt (type: %s, confidence: %.3f). Please rephrase your request in a way that complies with our usage policies.", jailbreakType, confidence),
+				Index: 0,
+				Message: openai.ChatCompletionMessage{
+					Role:    "assistant",
+					Content: fmt.Sprintf("I cannot process this request as it appears to contain a potential jailbreak attempt (type: %s, confidence: %.3f). Please rephrase your request in a way that complies with our usage policies.", jailbreakType, confidence),
 				},
-				"finish_reason": "content_filter",
+				FinishReason: "content_filter",
 			},
 		},
-		"usage": map[string]interface{}{
-			"prompt_tokens":     0,
-			"completion_tokens": 0,
-			"total_tokens":      0,
+		Usage: openai.CompletionUsage{
+			PromptTokens:     0,
+			CompletionTokens: 0,
+			TotalTokens:      0,
 		},
 	}