Skip to content

Commit e207c39

Browse files
Merge branch 'router-for-me:main' into fix/codex-sse-error-forwarding
2 parents 93dd05e + ddcf1f2 commit e207c39

File tree

17 files changed

+1387
-301
lines changed

17 files changed

+1387
-301
lines changed

internal/registry/model_definitions_static_data.go

Lines changed: 62 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,12 +208,27 @@ func GetGeminiModels() []*ModelInfo {
208208
Name: "models/gemini-3-flash-preview",
209209
Version: "3.0",
210210
DisplayName: "Gemini 3 Flash Preview",
211-
Description: "Gemini 3 Flash Preview",
211+
Description: "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.",
212212
InputTokenLimit: 1048576,
213213
OutputTokenLimit: 65536,
214214
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
215215
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
216216
},
217+
{
218+
ID: "gemini-3.1-flash-lite-preview",
219+
Object: "model",
220+
Created: 1776288000,
221+
OwnedBy: "google",
222+
Type: "gemini",
223+
Name: "models/gemini-3.1-flash-lite-preview",
224+
Version: "3.1",
225+
DisplayName: "Gemini 3.1 Flash Lite Preview",
226+
Description: "Our smallest and most cost effective model, built for at scale usage.",
227+
InputTokenLimit: 1048576,
228+
OutputTokenLimit: 65536,
229+
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
230+
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "high"}},
231+
},
217232
{
218233
ID: "gemini-3-pro-image-preview",
219234
Object: "model",
@@ -324,6 +339,21 @@ func GetGeminiVertexModels() []*ModelInfo {
324339
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
325340
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
326341
},
342+
{
343+
ID: "gemini-3.1-flash-lite-preview",
344+
Object: "model",
345+
Created: 1776288000,
346+
OwnedBy: "google",
347+
Type: "gemini",
348+
Name: "models/gemini-3.1-flash-lite-preview",
349+
Version: "3.1",
350+
DisplayName: "Gemini 3.1 Flash Lite Preview",
351+
Description: "Our smallest and most cost effective model, built for at scale usage.",
352+
InputTokenLimit: 1048576,
353+
OutputTokenLimit: 65536,
354+
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
355+
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "high"}},
356+
},
327357
{
328358
ID: "gemini-3-pro-image-preview",
329359
Object: "model",
@@ -496,6 +526,21 @@ func GetGeminiCLIModels() []*ModelInfo {
496526
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
497527
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
498528
},
529+
{
530+
ID: "gemini-3.1-flash-lite-preview",
531+
Object: "model",
532+
Created: 1776288000,
533+
OwnedBy: "google",
534+
Type: "gemini",
535+
Name: "models/gemini-3.1-flash-lite-preview",
536+
Version: "3.1",
537+
DisplayName: "Gemini 3.1 Flash Lite Preview",
538+
Description: "Our smallest and most cost effective model, built for at scale usage.",
539+
InputTokenLimit: 1048576,
540+
OutputTokenLimit: 65536,
541+
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
542+
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "high"}},
543+
},
499544
}
500545
}
501546

@@ -592,6 +637,21 @@ func GetAIStudioModels() []*ModelInfo {
592637
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
593638
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
594639
},
640+
{
641+
ID: "gemini-3.1-flash-lite-preview",
642+
Object: "model",
643+
Created: 1776288000,
644+
OwnedBy: "google",
645+
Type: "gemini",
646+
Name: "models/gemini-3.1-flash-lite-preview",
647+
Version: "3.1",
648+
DisplayName: "Gemini 3.1 Flash Lite Preview",
649+
Description: "Our smallest and most cost effective model, built for at scale usage.",
650+
InputTokenLimit: 1048576,
651+
OutputTokenLimit: 65536,
652+
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
653+
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "high"}},
654+
},
595655
{
596656
ID: "gemini-pro-latest",
597657
Object: "model",
@@ -968,6 +1028,7 @@ func GetAntigravityModelConfig() map[string]*AntigravityModelConfig {
9681028
"gemini-3.1-pro-high": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}},
9691029
"gemini-3.1-pro-low": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}},
9701030
"gemini-3.1-flash-image": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "high"}}},
1031+
"gemini-3.1-flash-lite-preview": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "high"}}},
9711032
"gemini-3-flash": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}},
9721033
"claude-opus-4-6-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 64000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
9731034
"claude-sonnet-4-6": {Thinking: &ThinkingSupport{Min: 1024, Max: 64000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},

internal/runtime/executor/codex_websockets_executor.go

Lines changed: 15 additions & 105 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ import (
3131
)
3232

3333
const (
34-
codexResponsesWebsocketBetaHeaderValue = "responses_websockets=2026-02-04"
34+
codexResponsesWebsocketBetaHeaderValue = "responses_websockets=2026-02-06"
3535
codexResponsesWebsocketIdleTimeout = 5 * time.Minute
3636
codexResponsesWebsocketHandshakeTO = 30 * time.Second
3737
)
@@ -57,11 +57,6 @@ type codexWebsocketSession struct {
5757
wsURL string
5858
authID string
5959

60-
// connCreateSent tracks whether a `response.create` message has been successfully sent
61-
// on the current websocket connection. The upstream expects the first message on each
62-
// connection to be `response.create`.
63-
connCreateSent bool
64-
6560
writeMu sync.Mutex
6661

6762
activeMu sync.Mutex
@@ -212,13 +207,7 @@ func (e *CodexWebsocketsExecutor) Execute(ctx context.Context, auth *cliproxyaut
212207
defer sess.reqMu.Unlock()
213208
}
214209

215-
allowAppend := true
216-
if sess != nil {
217-
sess.connMu.Lock()
218-
allowAppend = sess.connCreateSent
219-
sess.connMu.Unlock()
220-
}
221-
wsReqBody := buildCodexWebsocketRequestBody(body, allowAppend)
210+
wsReqBody := buildCodexWebsocketRequestBody(body)
222211
recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
223212
URL: wsURL,
224213
Method: "WEBSOCKET",
@@ -280,10 +269,7 @@ func (e *CodexWebsocketsExecutor) Execute(ctx context.Context, auth *cliproxyaut
280269
// execution session.
281270
connRetry, _, errDialRetry := e.ensureUpstreamConn(ctx, auth, sess, authID, wsURL, wsHeaders)
282271
if errDialRetry == nil && connRetry != nil {
283-
sess.connMu.Lock()
284-
allowAppend = sess.connCreateSent
285-
sess.connMu.Unlock()
286-
wsReqBodyRetry := buildCodexWebsocketRequestBody(body, allowAppend)
272+
wsReqBodyRetry := buildCodexWebsocketRequestBody(body)
287273
recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
288274
URL: wsURL,
289275
Method: "WEBSOCKET",
@@ -312,7 +298,6 @@ func (e *CodexWebsocketsExecutor) Execute(ctx context.Context, auth *cliproxyaut
312298
return resp, errSend
313299
}
314300
}
315-
markCodexWebsocketCreateSent(sess, conn, wsReqBody)
316301

317302
for {
318303
if ctx != nil && ctx.Err() != nil {
@@ -403,26 +388,20 @@ func (e *CodexWebsocketsExecutor) ExecuteStream(ctx context.Context, auth *clipr
403388
wsHeaders = applyCodexWebsocketHeaders(ctx, wsHeaders, auth, apiKey)
404389

405390
var authID, authLabel, authType, authValue string
406-
if auth != nil {
407-
authID = auth.ID
408-
authLabel = auth.Label
409-
authType, authValue = auth.AccountInfo()
410-
}
391+
authID = auth.ID
392+
authLabel = auth.Label
393+
authType, authValue = auth.AccountInfo()
411394

412395
executionSessionID := executionSessionIDFromOptions(opts)
413396
var sess *codexWebsocketSession
414397
if executionSessionID != "" {
415398
sess = e.getOrCreateSession(executionSessionID)
416-
sess.reqMu.Lock()
399+
if sess != nil {
400+
sess.reqMu.Lock()
401+
}
417402
}
418403

419-
allowAppend := true
420-
if sess != nil {
421-
sess.connMu.Lock()
422-
allowAppend = sess.connCreateSent
423-
sess.connMu.Unlock()
424-
}
425-
wsReqBody := buildCodexWebsocketRequestBody(body, allowAppend)
404+
wsReqBody := buildCodexWebsocketRequestBody(body)
426405
recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
427406
URL: wsURL,
428407
Method: "WEBSOCKET",
@@ -483,10 +462,7 @@ func (e *CodexWebsocketsExecutor) ExecuteStream(ctx context.Context, auth *clipr
483462
sess.reqMu.Unlock()
484463
return nil, errDialRetry
485464
}
486-
sess.connMu.Lock()
487-
allowAppend = sess.connCreateSent
488-
sess.connMu.Unlock()
489-
wsReqBodyRetry := buildCodexWebsocketRequestBody(body, allowAppend)
465+
wsReqBodyRetry := buildCodexWebsocketRequestBody(body)
490466
recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
491467
URL: wsURL,
492468
Method: "WEBSOCKET",
@@ -515,7 +491,6 @@ func (e *CodexWebsocketsExecutor) ExecuteStream(ctx context.Context, auth *clipr
515491
return nil, errSend
516492
}
517493
}
518-
markCodexWebsocketCreateSent(sess, conn, wsReqBody)
519494

520495
out := make(chan cliproxyexecutor.StreamChunk)
521496
go func() {
@@ -657,31 +632,14 @@ func writeCodexWebsocketMessage(sess *codexWebsocketSession, conn *websocket.Con
657632
return conn.WriteMessage(websocket.TextMessage, payload)
658633
}
659634

660-
func buildCodexWebsocketRequestBody(body []byte, allowAppend bool) []byte {
635+
func buildCodexWebsocketRequestBody(body []byte) []byte {
661636
if len(body) == 0 {
662637
return nil
663638
}
664639

665-
// Codex CLI websocket v2 uses `response.create` with `previous_response_id` for incremental turns.
666-
// The upstream ChatGPT Codex websocket currently rejects that with close 1008 (policy violation).
667-
// Fall back to v1 `response.append` semantics on the same websocket connection to keep the session alive.
668-
//
669-
// NOTE: The upstream expects the first websocket event on each connection to be `response.create`,
670-
// so we only use `response.append` after we have initialized the current connection.
671-
if allowAppend {
672-
if prev := strings.TrimSpace(gjson.GetBytes(body, "previous_response_id").String()); prev != "" {
673-
inputNode := gjson.GetBytes(body, "input")
674-
wsReqBody := []byte(`{}`)
675-
wsReqBody, _ = sjson.SetBytes(wsReqBody, "type", "response.append")
676-
if inputNode.Exists() && inputNode.IsArray() && strings.TrimSpace(inputNode.Raw) != "" {
677-
wsReqBody, _ = sjson.SetRawBytes(wsReqBody, "input", []byte(inputNode.Raw))
678-
return wsReqBody
679-
}
680-
wsReqBody, _ = sjson.SetRawBytes(wsReqBody, "input", []byte("[]"))
681-
return wsReqBody
682-
}
683-
}
684-
640+
// Match codex-rs websocket v2 semantics: every request is `response.create`.
641+
// Incremental follow-up turns continue on the same websocket using
642+
// `previous_response_id` + incremental `input`, not `response.append`.
685643
wsReqBody, errSet := sjson.SetBytes(bytes.Clone(body), "type", "response.create")
686644
if errSet == nil && len(wsReqBody) > 0 {
687645
return wsReqBody
@@ -725,21 +683,6 @@ func readCodexWebsocketMessage(ctx context.Context, sess *codexWebsocketSession,
725683
}
726684
}
727685

728-
func markCodexWebsocketCreateSent(sess *codexWebsocketSession, conn *websocket.Conn, payload []byte) {
729-
if sess == nil || conn == nil || len(payload) == 0 {
730-
return
731-
}
732-
if strings.TrimSpace(gjson.GetBytes(payload, "type").String()) != "response.create" {
733-
return
734-
}
735-
736-
sess.connMu.Lock()
737-
if sess.conn == conn {
738-
sess.connCreateSent = true
739-
}
740-
sess.connMu.Unlock()
741-
}
742-
743686
func newProxyAwareWebsocketDialer(cfg *config.Config, auth *cliproxyauth.Auth) *websocket.Dialer {
744687
dialer := &websocket.Dialer{
745688
Proxy: http.ProxyFromEnvironment,
@@ -1017,36 +960,6 @@ func closeHTTPResponseBody(resp *http.Response, logPrefix string) {
1017960
}
1018961
}
1019962

1020-
func closeOnContextDone(ctx context.Context, conn *websocket.Conn) chan struct{} {
1021-
done := make(chan struct{})
1022-
if ctx == nil || conn == nil {
1023-
return done
1024-
}
1025-
go func() {
1026-
select {
1027-
case <-done:
1028-
case <-ctx.Done():
1029-
_ = conn.Close()
1030-
}
1031-
}()
1032-
return done
1033-
}
1034-
1035-
func cancelReadOnContextDone(ctx context.Context, conn *websocket.Conn) chan struct{} {
1036-
done := make(chan struct{})
1037-
if ctx == nil || conn == nil {
1038-
return done
1039-
}
1040-
go func() {
1041-
select {
1042-
case <-done:
1043-
case <-ctx.Done():
1044-
_ = conn.SetReadDeadline(time.Now())
1045-
}
1046-
}()
1047-
return done
1048-
}
1049-
1050963
func executionSessionIDFromOptions(opts cliproxyexecutor.Options) string {
1051964
if len(opts.Metadata) == 0 {
1052965
return ""
@@ -1120,7 +1033,6 @@ func (e *CodexWebsocketsExecutor) ensureUpstreamConn(ctx context.Context, auth *
11201033
sess.conn = conn
11211034
sess.wsURL = wsURL
11221035
sess.authID = authID
1123-
sess.connCreateSent = false
11241036
sess.readerConn = conn
11251037
sess.connMu.Unlock()
11261038

@@ -1206,7 +1118,6 @@ func (e *CodexWebsocketsExecutor) invalidateUpstreamConn(sess *codexWebsocketSes
12061118
return
12071119
}
12081120
sess.conn = nil
1209-
sess.connCreateSent = false
12101121
if sess.readerConn == conn {
12111122
sess.readerConn = nil
12121123
}
@@ -1273,7 +1184,6 @@ func (e *CodexWebsocketsExecutor) closeExecutionSession(sess *codexWebsocketSess
12731184
authID := sess.authID
12741185
wsURL := sess.wsURL
12751186
sess.conn = nil
1276-
sess.connCreateSent = false
12771187
if sess.readerConn == conn {
12781188
sess.readerConn = nil
12791189
}
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
package executor
2+
3+
import (
4+
"context"
5+
"net/http"
6+
"testing"
7+
8+
"github.com/tidwall/gjson"
9+
)
10+
11+
func TestBuildCodexWebsocketRequestBodyPreservesPreviousResponseID(t *testing.T) {
12+
body := []byte(`{"model":"gpt-5-codex","previous_response_id":"resp-1","input":[{"type":"message","id":"msg-1"}]}`)
13+
14+
wsReqBody := buildCodexWebsocketRequestBody(body)
15+
16+
if got := gjson.GetBytes(wsReqBody, "type").String(); got != "response.create" {
17+
t.Fatalf("type = %s, want response.create", got)
18+
}
19+
if got := gjson.GetBytes(wsReqBody, "previous_response_id").String(); got != "resp-1" {
20+
t.Fatalf("previous_response_id = %s, want resp-1", got)
21+
}
22+
if gjson.GetBytes(wsReqBody, "input.0.id").String() != "msg-1" {
23+
t.Fatalf("input item id mismatch")
24+
}
25+
if got := gjson.GetBytes(wsReqBody, "type").String(); got == "response.append" {
26+
t.Fatalf("unexpected websocket request type: %s", got)
27+
}
28+
}
29+
30+
func TestApplyCodexWebsocketHeadersDefaultsToCurrentResponsesBeta(t *testing.T) {
31+
headers := applyCodexWebsocketHeaders(context.Background(), http.Header{}, nil, "")
32+
33+
if got := headers.Get("OpenAI-Beta"); got != codexResponsesWebsocketBetaHeaderValue {
34+
t.Fatalf("OpenAI-Beta = %s, want %s", got, codexResponsesWebsocketBetaHeaderValue)
35+
}
36+
}

0 commit comments

Comments
 (0)