Skip to content

Commit d7c578e

Browse files
authored
Merge pull request #1 from vllm-project/reasoning-mode
feat: support auto-enable reasoning mode based on intention
2 parents f3728a5 + a9c21e2 commit d7c578e

File tree

11 files changed

+1031
-300
lines changed

11 files changed

+1031
-300
lines changed

config/config.yaml

Lines changed: 235 additions & 207 deletions
Large diffs are not rendered by default.

config/envoy.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ static_resources:
3131
upstream_local_address: "%UPSTREAM_LOCAL_ADDRESS%"
3232
request_id: "%REQ(X-REQUEST-ID)%"
3333
selected_model: "%REQ(X-SELECTED-MODEL)%"
34-
selected_endpoint: "%REQ(X-GATEWAY-DESTINATION-ENDPOINT)%"
34+
selected_endpoint: "%REQ(X-SEMANTIC-DESTINATION-ENDPOINT)%"
3535
route_config:
3636
name: local_route
3737
virtual_hosts:
@@ -42,7 +42,7 @@ static_resources:
4242
- match:
4343
prefix: "/"
4444
headers:
45-
- name: "x-gateway-destination-endpoint"
45+
- name: "x-semantic-destination-endpoint"
4646
string_match:
4747
exact: "endpoint1"
4848
route:
@@ -51,7 +51,7 @@ static_resources:
5151
- match:
5252
prefix: "/"
5353
headers:
54-
- name: "x-gateway-destination-endpoint"
54+
- name: "x-semantic-destination-endpoint"
5555
string_match:
5656
exact: "endpoint2"
5757
route:
@@ -60,7 +60,7 @@ static_resources:
6060
- match:
6161
prefix: "/"
6262
headers:
63-
- name: "x-gateway-destination-endpoint"
63+
- name: "x-semantic-destination-endpoint"
6464
string_match:
6565
exact: "endpoint3"
6666
route:

src/semantic-router/pkg/config/config.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -213,9 +213,11 @@ type ModelScore struct {
213213
}
214214

215215
type Category struct {
216-
Name string `yaml:"name"`
217-
Description string `yaml:"description,omitempty"`
218-
ModelScores []ModelScore `yaml:"model_scores"`
216+
Name string `yaml:"name"`
217+
Description string `yaml:"description,omitempty"`
218+
UseReasoning bool `yaml:"use_reasoning"`
219+
ReasoningDescription string `yaml:"reasoning_description,omitempty"`
220+
ModelScores []ModelScore `yaml:"model_scores"`
219221
}
220222

221223
var (

src/semantic-router/pkg/extproc/endpoint_selection_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ var _ = Describe("Endpoint Selection", func() {
7575
var modelHeaderFound bool
7676

7777
for _, header := range headerMutation.SetHeaders {
78-
if header.Header.Key == "x-gateway-destination-endpoint" {
78+
if header.Header.Key == "x-semantic-destination-endpoint" {
7979
endpointHeaderFound = true
8080
// Should be one of the configured endpoints
8181
Expect(header.Header.Value).To(BeElementOf("test-endpoint1", "test-endpoint2"))
@@ -139,7 +139,7 @@ var _ = Describe("Endpoint Selection", func() {
139139
var selectedEndpoint string
140140

141141
for _, header := range headerMutation.SetHeaders {
142-
if header.Header.Key == "x-gateway-destination-endpoint" {
142+
if header.Header.Key == "x-semantic-destination-endpoint" {
143143
endpointHeaderFound = true
144144
selectedEndpoint = header.Header.Value
145145
break
@@ -198,7 +198,7 @@ var _ = Describe("Endpoint Selection", func() {
198198
var selectedEndpoint string
199199

200200
for _, header := range headerMutation.SetHeaders {
201-
if header.Header.Key == "x-gateway-destination-endpoint" {
201+
if header.Header.Key == "x-semantic-destination-endpoint" {
202202
endpointHeaderFound = true
203203
selectedEndpoint = header.Header.Value
204204
break

src/semantic-router/pkg/extproc/processor.go

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,22 +52,30 @@ func (r *OpenAIRouter) Process(stream ext_proc.ExternalProcessor_ProcessServer)
5252

5353
switch v := req.Request.(type) {
5454
case *ext_proc.ProcessingRequest_RequestHeaders:
55+
log.Printf("DEBUG: Processing request headers")
5556
response, err := r.handleRequestHeaders(v, ctx)
5657
if err != nil {
58+
log.Printf("ERROR: handleRequestHeaders failed: %v", err)
5759
return err
5860
}
59-
if err := sendResponse(stream, response, "header"); err != nil {
61+
if err := sendResponse(stream, response, "request header"); err != nil {
62+
log.Printf("ERROR: sendResponse for headers failed: %v", err)
6063
return err
6164
}
65+
log.Printf("DEBUG: Request headers processed successfully")
6266

6367
case *ext_proc.ProcessingRequest_RequestBody:
68+
log.Printf("DEBUG: Processing request body - THIS IS WHERE ROUTING HAPPENS")
6469
response, err := r.handleRequestBody(v, ctx)
6570
if err != nil {
71+
log.Printf("ERROR: handleRequestBody failed: %v", err)
6672
return err
6773
}
68-
if err := sendResponse(stream, response, "body"); err != nil {
74+
if err := sendResponse(stream, response, "request body"); err != nil {
75+
log.Printf("ERROR: sendResponse for body failed: %v", err)
6976
return err
7077
}
78+
log.Printf("DEBUG: Request body processed successfully")
7179

7280
case *ext_proc.ProcessingRequest_ResponseHeaders:
7381
response, err := r.handleResponseHeaders(v)
@@ -105,5 +113,7 @@ func (r *OpenAIRouter) Process(stream ext_proc.ExternalProcessor_ProcessServer)
105113
return err
106114
}
107115
}
116+
117+
log.Printf("DEBUG: Finished processing message, continuing to next...")
108118
}
109119
}

0 commit comments

Comments
 (0)