Skip to content

Commit b774c48

Browse files
committed
response mapping init
Signed-off-by: JaredforReal <[email protected]>
1 parent ee6e87e commit b774c48

File tree

8 files changed

+1906
-62
lines changed

8 files changed

+1906
-62
lines changed

config/config.development.yaml

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
# Development Configuration Example with Stdout Tracing
2+
# This configuration enables distributed tracing with stdout exporter
3+
# for local development and debugging.
4+
5+
bert_model:
6+
model_id: models/all-MiniLM-L12-v2
7+
threshold: 0.6
8+
use_cpu: true
9+
10+
semantic_cache:
11+
enabled: true
12+
backend_type: "memory"
13+
similarity_threshold: 0.8
14+
max_entries: 100
15+
ttl_seconds: 600
16+
eviction_policy: "fifo"
17+
use_hnsw: true # Enable HNSW for faster search
18+
hnsw_m: 16
19+
hnsw_ef_construction: 200
20+
21+
tools:
22+
enabled: false
23+
top_k: 3
24+
similarity_threshold: 0.2
25+
tools_db_path: "config/tools_db.json"
26+
fallback_to_empty: true
27+
28+
prompt_guard:
29+
enabled: false
30+
31+
vllm_endpoints:
32+
- name: "local-endpoint"
33+
address: "127.0.0.1"
34+
port: 8000
35+
weight: 1
36+
37+
model_config:
38+
"test-model":
39+
pii_policy:
40+
allow_by_default: true
41+
42+
classifier:
43+
category_model:
44+
model_id: "models/category_classifier_modernbert-base_model"
45+
use_modernbert: true
46+
threshold: 0.6
47+
use_cpu: true
48+
category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"
49+
50+
categories:
51+
- name: test
52+
system_prompt: "You are a test assistant."
53+
# Example: Category-level cache settings
54+
# semantic_cache_enabled: true
55+
# semantic_cache_similarity_threshold: 0.85
56+
model_scores:
57+
- model: test-model
58+
score: 1.0
59+
use_reasoning: false
60+
61+
default_model: test-model
62+
63+
# Enable OpenAI Responses API adapter (experimental)
64+
enable_responses_adapter: true
65+
66+
# Auto model name for automatic model selection (optional)
67+
# Uncomment and set to customize the model name for automatic routing
68+
# auto_model_name: "MoM"
69+
70+
api:
71+
batch_classification:
72+
max_batch_size: 10
73+
metrics:
74+
enabled: true
75+
76+
# Observability Configuration - Development with Stdout
77+
observability:
78+
tracing:
79+
# Enable tracing for development/debugging
80+
enabled: true
81+
82+
# OpenTelemetry provider
83+
provider: "opentelemetry"
84+
85+
exporter:
86+
# Stdout exporter prints traces to console (great for debugging)
87+
type: "stdout"
88+
89+
# No endpoint needed for stdout
90+
# endpoint: ""
91+
# insecure: true
92+
93+
sampling:
94+
# Always sample in development to see all traces
95+
type: "always_on"
96+
97+
# Rate not used for always_on
98+
# rate: 1.0
99+
100+
resource:
101+
# Service name for trace identification
102+
service_name: "vllm-semantic-router-dev"
103+
104+
# Version for development
105+
service_version: "dev"
106+
107+
# Environment identifier
108+
deployment_environment: "development"

config/config.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -480,6 +480,9 @@ reasoning_families:
480480
# Global default reasoning effort level
481481
default_reasoning_effort: high
482482

483+
# Enable OpenAI Responses API adapter (experimental)
484+
enable_responses_adapter: false
485+
483486
# API Configuration
484487
api:
485488
batch_classification:

src/semantic-router/pkg/config/config.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,12 @@ type EmbeddingRule struct {
247247
SimilarityThreshold float32 `yaml:"threshold"`
248248
Candidates []string `yaml:"candidates"` // Renamed from Keywords
249249
AggregationMethodConfiged AggregationMethod `yaml:"aggregation_method"`
250+
// Gateway route cache clearing
251+
ClearRouteCache bool `yaml:"clear_route_cache"`
252+
253+
// EnableResponsesAdapter enables the compatibility shim for OpenAI Responses API (/v1/responses)
254+
// When enabled, POST /v1/responses requests are adapted to legacy /v1/chat/completions.
255+
EnableResponsesAdapter bool `yaml:"enable_responses_adapter"`
250256
}
251257

252258
// APIConfig represents configuration for API endpoints
Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
package extproc
2+
3+
import (
4+
"encoding/json"
5+
"fmt"
6+
"strings"
7+
)
8+
9+
// mapResponsesRequestToChatCompletions converts a minimal OpenAI Responses API request
10+
// into a legacy Chat Completions request JSON. Supports only text input for PR1.
11+
func mapResponsesRequestToChatCompletions(original []byte) ([]byte, error) {
12+
var req map[string]interface{}
13+
if err := json.Unmarshal(original, &req); err != nil {
14+
return nil, err
15+
}
16+
17+
// Extract model
18+
model, _ := req["model"].(string)
19+
if model == "" {
20+
return nil, fmt.Errorf("missing model")
21+
}
22+
23+
// Derive user content
24+
var userContent string
25+
if input, ok := req["input"]; ok {
26+
switch v := input.(type) {
27+
case string:
28+
userContent = v
29+
case []interface{}:
30+
// Join any string elements; ignore non-string for now
31+
var parts []string
32+
for _, it := range v {
33+
if s, ok := it.(string); ok {
34+
parts = append(parts, s)
35+
} else if m, ok := it.(map[string]interface{}); ok {
36+
// Try common shapes: {type:"input_text"|"text", text:"..."}
37+
if t, _ := m["type"].(string); t == "input_text" || t == "text" {
38+
if txt, _ := m["text"].(string); txt != "" {
39+
parts = append(parts, txt)
40+
}
41+
}
42+
}
43+
}
44+
userContent = strings.TrimSpace(strings.Join(parts, " "))
45+
default:
46+
// unsupported multimodal
47+
return nil, fmt.Errorf("unsupported input type")
48+
}
49+
} else if msgs, ok := req["messages"].([]interface{}); ok {
50+
// Fallback: if caller already provided messages, pass them through
51+
// This enables easy migration from chat/completions
52+
mapped := map[string]interface{}{
53+
"model": model,
54+
"messages": msgs,
55+
}
56+
// Map basic params
57+
if v, ok := req["temperature"]; ok {
58+
mapped["temperature"] = v
59+
}
60+
if v, ok := req["top_p"]; ok {
61+
mapped["top_p"] = v
62+
}
63+
if v, ok := req["max_output_tokens"]; ok {
64+
mapped["max_tokens"] = v
65+
}
66+
return json.Marshal(mapped)
67+
}
68+
69+
if userContent == "" {
70+
return nil, fmt.Errorf("empty input")
71+
}
72+
73+
// Build minimal Chat Completions request
74+
mapped := map[string]interface{}{
75+
"model": model,
76+
"messages": []map[string]interface{}{
77+
{"role": "user", "content": userContent},
78+
},
79+
}
80+
// Map basic params
81+
if v, ok := req["temperature"]; ok {
82+
mapped["temperature"] = v
83+
}
84+
if v, ok := req["top_p"]; ok {
85+
mapped["top_p"] = v
86+
}
87+
if v, ok := req["max_output_tokens"]; ok {
88+
mapped["max_tokens"] = v
89+
}
90+
91+
return json.Marshal(mapped)
92+
}
93+
94+
// mapChatCompletionToResponses converts an OpenAI ChatCompletion JSON
95+
// into a minimal Responses API JSON (non-streaming only) for PR1.
96+
func mapChatCompletionToResponses(chatCompletionJSON []byte) ([]byte, error) {
97+
var parsed struct {
98+
ID string `json:"id"`
99+
Object string `json:"object"`
100+
Created int64 `json:"created"`
101+
Model string `json:"model"`
102+
Choices []struct {
103+
Index int `json:"index"`
104+
FinishReason string `json:"finish_reason"`
105+
Message struct {
106+
Role string `json:"role"`
107+
Content string `json:"content"`
108+
} `json:"message"`
109+
} `json:"choices"`
110+
Usage struct {
111+
PromptTokens int `json:"prompt_tokens"`
112+
CompletionTokens int `json:"completion_tokens"`
113+
TotalTokens int `json:"total_tokens"`
114+
} `json:"usage"`
115+
}
116+
if err := json.Unmarshal(chatCompletionJSON, &parsed); err != nil {
117+
return nil, err
118+
}
119+
120+
content := ""
121+
stopReason := "stop"
122+
if len(parsed.Choices) > 0 {
123+
content = parsed.Choices[0].Message.Content
124+
if parsed.Choices[0].FinishReason != "" {
125+
stopReason = parsed.Choices[0].FinishReason
126+
}
127+
}
128+
129+
out := map[string]interface{}{
130+
"id": parsed.ID,
131+
"object": "response",
132+
"created": parsed.Created,
133+
"model": parsed.Model,
134+
"output": []map[string]interface{}{
135+
{"type": "message", "role": "assistant", "content": content},
136+
},
137+
"stop_reason": stopReason,
138+
"usage": map[string]int{
139+
"input_tokens": parsed.Usage.PromptTokens,
140+
"output_tokens": parsed.Usage.CompletionTokens,
141+
"total_tokens": parsed.Usage.TotalTokens,
142+
},
143+
}
144+
145+
return json.Marshal(out)
146+
}
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
package extproc
2+
3+
import (
4+
"encoding/json"
5+
"testing"
6+
)
7+
8+
func TestMapResponsesRequestToChatCompletions_TextInput(t *testing.T) {
9+
in := []byte(`{"model":"gpt-test","input":"Hello world","temperature":0.2,"top_p":0.9,"max_output_tokens":128}`)
10+
out, err := mapResponsesRequestToChatCompletions(in)
11+
if err != nil {
12+
t.Fatalf("unexpected error: %v", err)
13+
}
14+
var m map[string]interface{}
15+
if err := json.Unmarshal(out, &m); err != nil {
16+
t.Fatalf("unmarshal mapped: %v", err)
17+
}
18+
if m["model"].(string) != "gpt-test" {
19+
t.Fatalf("model not mapped")
20+
}
21+
if _, ok := m["messages"].([]interface{}); !ok {
22+
t.Fatalf("messages missing")
23+
}
24+
}
25+
26+
func TestMapChatCompletionToResponses_Minimal(t *testing.T) {
27+
in := []byte(`{
28+
"id":"chatcmpl-1","object":"chat.completion","created":123,"model":"gpt-test",
29+
"choices":[{"index":0,"finish_reason":"stop","message":{"role":"assistant","content":"hi"}}],
30+
"usage":{"prompt_tokens":1,"completion_tokens":1,"total_tokens":2}
31+
}`)
32+
out, err := mapChatCompletionToResponses(in)
33+
if err != nil {
34+
t.Fatalf("unexpected error: %v", err)
35+
}
36+
var m map[string]interface{}
37+
if err := json.Unmarshal(out, &m); err != nil {
38+
t.Fatalf("unmarshal mapped: %v", err)
39+
}
40+
if m["object"].(string) != "response" {
41+
t.Fatalf("object not 'response'")
42+
}
43+
if m["stop_reason"].(string) == "" {
44+
t.Fatalf("stop_reason missing")
45+
}
46+
}

0 commit comments

Comments
 (0)