|
9 | 9 | from .config import BASE_INSTRUCTIONS, GPT5_CODEX_INSTRUCTIONS |
10 | 10 | from .limits import record_rate_limits_from_response |
11 | 11 | from .http import build_cors_headers |
12 | | -from .reasoning import apply_reasoning_to_message, build_reasoning_param, extract_reasoning_from_model_name |
| 12 | +from .reasoning import ( |
| 13 | + allowed_efforts_for_model, |
| 14 | + apply_reasoning_to_message, |
| 15 | + build_reasoning_param, |
| 16 | + extract_reasoning_from_model_name, |
| 17 | +) |
13 | 18 | from .upstream import normalize_model_name, start_upstream_request |
14 | 19 | from .utils import ( |
15 | 20 | convert_chat_messages_to_responses_input, |
@@ -54,7 +59,7 @@ def _gen(): |
54 | 59 |
|
55 | 60 | def _instructions_for_model(model: str) -> str: |
56 | 61 | base = current_app.config.get("BASE_INSTRUCTIONS", BASE_INSTRUCTIONS) |
57 | | - if model == "gpt-5-codex" or model == "gpt-5.1-codex": |
| 62 | + if model.startswith("gpt-5-codex") or model.startswith("gpt-5.1-codex"): |
58 | 63 | codex = current_app.config.get("GPT5_CODEX_INSTRUCTIONS") or GPT5_CODEX_INSTRUCTIONS |
59 | 64 | if isinstance(codex, str) and codex.strip(): |
60 | 65 | return codex |
@@ -166,7 +171,12 @@ def chat_completions() -> Response: |
166 | 171 |
|
167 | 172 | model_reasoning = extract_reasoning_from_model_name(requested_model) |
168 | 173 | reasoning_overrides = payload.get("reasoning") if isinstance(payload.get("reasoning"), dict) else model_reasoning |
169 | | - reasoning_param = build_reasoning_param(reasoning_effort, reasoning_summary, reasoning_overrides) |
| 174 | + reasoning_param = build_reasoning_param( |
| 175 | + reasoning_effort, |
| 176 | + reasoning_summary, |
| 177 | + reasoning_overrides, |
| 178 | + allowed_efforts=allowed_efforts_for_model(model), |
| 179 | + ) |
170 | 180 |
|
171 | 181 | upstream, error_resp = start_upstream_request( |
172 | 182 | model, |
@@ -396,7 +406,12 @@ def completions() -> Response: |
396 | 406 |
|
397 | 407 | model_reasoning = extract_reasoning_from_model_name(requested_model) |
398 | 408 | reasoning_overrides = payload.get("reasoning") if isinstance(payload.get("reasoning"), dict) else model_reasoning |
399 | | - reasoning_param = build_reasoning_param(reasoning_effort, reasoning_summary, reasoning_overrides) |
| 409 | + reasoning_param = build_reasoning_param( |
| 410 | + reasoning_effort, |
| 411 | + reasoning_summary, |
| 412 | + reasoning_overrides, |
| 413 | + allowed_efforts=allowed_efforts_for_model(model), |
| 414 | + ) |
400 | 415 | upstream, error_resp = start_upstream_request( |
401 | 416 | model, |
402 | 417 | input_items, |
@@ -518,9 +533,10 @@ def list_models() -> Response: |
518 | 533 | expose_variants = bool(current_app.config.get("EXPOSE_REASONING_MODELS")) |
519 | 534 | model_groups = [ |
520 | 535 | ("gpt-5", ["high", "medium", "low", "minimal"]), |
521 | | - ("gpt-5.1", ["high", "medium", "low", "minimal"]), |
| 536 | + ("gpt-5.1", ["high", "medium", "low"]), |
522 | 537 | ("gpt-5-codex", ["high", "medium", "low"]), |
523 | 538 | ("gpt-5.1-codex", ["high", "medium", "low"]), |
| 539 | + ("gpt-5.1-codex-max", ["xhigh", "high", "medium", "low"]), |
524 | 540 | ("gpt-5.1-codex-mini", []), |
525 | 541 | ("codex-mini", []), |
526 | 542 | ] |
|
0 commit comments