Skip to content

Commit 8db91eb

Browse files
authored
GPT-5.1 models "minimal" removed, add gpt-5.1-codex-max (#80)
1 parent d2879a3 commit 8db91eb

File tree

6 files changed

+88
-21
lines changed

6 files changed

+88
-21
lines changed

README.md

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,15 +114,19 @@ curl http://127.0.0.1:8000/v1/chat/completions \
114114

115115
# Supported models
116116
- `gpt-5`
117+
- `gpt-5.1`
117118
- `gpt-5-codex`
119+
- `gpt-5.1-codex`
120+
- `gpt-5.1-codex-max`
121+
- `gpt-5.1-codex-mini`
118122
- `codex-mini`
119123

120124
# Customisation / Configuration
121125

122126
### Thinking effort
123127

124-
- `--reasoning-effort` (choice of minimal,low,medium,high)<br>
125-
GPT-5 has a configurable amount of "effort" it can put into thinking, which may cause it to take more time for a response to return, but may overall give a smarter answer. Applying this parameter after `serve` forces the server to use this reasoning effort by default, unless overrided by the API request with a different effort set. The default reasoning effort without setting this parameter is `medium`.
128+
- `--reasoning-effort` (choice of minimal,low,medium,high,xhigh)<br>
129+
GPT-5 has a configurable amount of "effort" it can put into thinking, which may cause it to take more time for a response to return, but may overall give a smarter answer. Applying this parameter after `serve` forces the server to use this reasoning effort by default, unless overrided by the API request with a different effort set. The default reasoning effort without setting this parameter is `medium`. The `gpt-5.1` family (including codex) supports `low`, `medium`, and `high` while `gpt-5.1-codex-max` adds `xhigh`; neither offers a `minimal` variant.
126130

127131
### Thinking summaries
128132

chatmock/cli.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,7 @@ def main() -> None:
311311
)
312312
p_serve.add_argument(
313313
"--reasoning-effort",
314-
choices=["minimal", "low", "medium", "high"],
314+
choices=["minimal", "low", "medium", "high", "xhigh"],
315315
default=os.getenv("CHATGPT_LOCAL_REASONING_EFFORT", "medium").lower(),
316316
help="Reasoning effort level for Responses API (default: medium)",
317317
)
@@ -335,8 +335,8 @@ def main() -> None:
335335
action="store_true",
336336
default=(os.getenv("CHATGPT_LOCAL_EXPOSE_REASONING_MODELS") or "").strip().lower() in ("1", "true", "yes", "on"),
337337
help=(
338-
"Expose gpt-5 reasoning effort variants (minimal|low|medium|high) as separate models from /v1/models. "
339-
"This allows choosing effort via model selection in compatible UIs."
338+
"Expose gpt-5 reasoning effort variants (minimal|low|medium|high|xhigh where supported) "
339+
"as separate models from /v1/models. This allows choosing effort via model selection in compatible UIs."
340340
),
341341
)
342342
p_serve.add_argument(

chatmock/reasoning.py

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,34 @@
11
from __future__ import annotations
22

3-
from typing import Any, Dict
3+
from typing import Any, Dict, Set
4+
5+
6+
DEFAULT_REASONING_EFFORTS: Set[str] = {"minimal", "low", "medium", "high", "xhigh"}
7+
8+
9+
def allowed_efforts_for_model(model: str | None) -> Set[str]:
10+
base = (model or "").strip().lower()
11+
if not base:
12+
return DEFAULT_REASONING_EFFORTS
13+
normalized = base.split(":", 1)[0]
14+
if normalized.startswith("gpt-5.1-codex-max"):
15+
return {"low", "medium", "high", "xhigh"}
16+
if normalized.startswith("gpt-5.1"):
17+
return {"low", "medium", "high"}
18+
return DEFAULT_REASONING_EFFORTS
419

520

621
def build_reasoning_param(
7-
base_effort: str = "medium", base_summary: str = "auto", overrides: Dict[str, Any] | None = None
22+
base_effort: str = "medium",
23+
base_summary: str = "auto",
24+
overrides: Dict[str, Any] | None = None,
25+
*,
26+
allowed_efforts: Set[str] | None = None,
827
) -> Dict[str, Any]:
928
effort = (base_effort or "").strip().lower()
1029
summary = (base_summary or "").strip().lower()
1130

12-
valid_efforts = {"minimal", "low", "medium", "high"}
31+
valid_efforts = allowed_efforts or DEFAULT_REASONING_EFFORTS
1332
valid_summaries = {"auto", "concise", "detailed", "none"}
1433

1534
if isinstance(overrides, dict):
@@ -80,7 +99,7 @@ def extract_reasoning_from_model_name(model: str | None) -> Dict[str, Any] | Non
8099
s = model.strip().lower()
81100
if not s:
82101
return None
83-
efforts = {"minimal", "low", "medium", "high"}
102+
efforts = {"minimal", "low", "medium", "high", "xhigh"}
84103

85104
if ":" in s:
86105
maybe = s.rsplit(":", 1)[-1].strip()
@@ -96,5 +115,7 @@ def extract_reasoning_from_model_name(model: str | None) -> Dict[str, Any] | Non
96115
return {"effort": "medium"}
97116
if s.endswith(sep + "high"):
98117
return {"effort": "high"}
118+
if s.endswith(sep + "xhigh"):
119+
return {"effort": "xhigh"}
99120

100121
return None

chatmock/routes_ollama.py

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,11 @@
1010
from .config import BASE_INSTRUCTIONS, GPT5_CODEX_INSTRUCTIONS
1111
from .limits import record_rate_limits_from_response
1212
from .http import build_cors_headers
13-
from .reasoning import build_reasoning_param, extract_reasoning_from_model_name
13+
from .reasoning import (
14+
allowed_efforts_for_model,
15+
build_reasoning_param,
16+
extract_reasoning_from_model_name,
17+
)
1418
from .transform import convert_ollama_messages, normalize_ollama_tools
1519
from .upstream import normalize_model_name, start_upstream_request
1620
from .utils import convert_chat_messages_to_responses_input, convert_tools_chat_to_responses
@@ -67,7 +71,7 @@ def ollama_version() -> Response:
6771

6872
def _instructions_for_model(model: str) -> str:
6973
base = current_app.config.get("BASE_INSTRUCTIONS", BASE_INSTRUCTIONS)
70-
if model == "gpt-5-codex" or model == "gpt-5.1-codex":
74+
if model.startswith("gpt-5-codex") or model.startswith("gpt-5.1-codex"):
7175
codex = current_app.config.get("GPT5_CODEX_INSTRUCTIONS") or GPT5_CODEX_INSTRUCTIONS
7276
if isinstance(codex, str) and codex.strip():
7377
return codex
@@ -89,7 +93,15 @@ def ollama_tags() -> Response:
8993
if bool(current_app.config.get("VERBOSE")):
9094
print("IN GET /api/tags")
9195
expose_variants = bool(current_app.config.get("EXPOSE_REASONING_MODELS"))
92-
model_ids = ["gpt-5", "gpt-5.1", "gpt-5-codex", "gpt-5.1-codex", "gpt-5.1-codex-mini", "codex-mini"]
96+
model_ids = [
97+
"gpt-5",
98+
"gpt-5.1",
99+
"gpt-5-codex",
100+
"gpt-5.1-codex",
101+
"gpt-5.1-codex-max",
102+
"gpt-5.1-codex-mini",
103+
"codex-mini",
104+
]
93105
if expose_variants:
94106
model_ids.extend(
95107
[
@@ -100,13 +112,16 @@ def ollama_tags() -> Response:
100112
"gpt-5.1-high",
101113
"gpt-5.1-medium",
102114
"gpt-5.1-low",
103-
"gpt-5.1-minimal",
104115
"gpt-5-codex-high",
105116
"gpt-5-codex-medium",
106117
"gpt-5-codex-low",
107118
"gpt-5.1-codex-high",
108119
"gpt-5.1-codex-medium",
109120
"gpt-5.1-codex-low",
121+
"gpt-5.1-codex-max-xhigh",
122+
"gpt-5.1-codex-max-high",
123+
"gpt-5.1-codex-max-medium",
124+
"gpt-5.1-codex-max-low",
110125
]
111126
)
112127
models = []
@@ -275,7 +290,12 @@ def ollama_chat() -> Response:
275290
tools=tools_responses,
276291
tool_choice=tool_choice,
277292
parallel_tool_calls=parallel_tool_calls,
278-
reasoning_param=build_reasoning_param(reasoning_effort, reasoning_summary, model_reasoning),
293+
reasoning_param=build_reasoning_param(
294+
reasoning_effort,
295+
reasoning_summary,
296+
model_reasoning,
297+
allowed_efforts=allowed_efforts_for_model(model),
298+
),
279299
)
280300
if error_resp is not None:
281301
if verbose:
@@ -310,7 +330,12 @@ def ollama_chat() -> Response:
310330
tools=base_tools_only,
311331
tool_choice=safe_choice,
312332
parallel_tool_calls=parallel_tool_calls,
313-
reasoning_param=build_reasoning_param(reasoning_effort, reasoning_summary, model_reasoning),
333+
reasoning_param=build_reasoning_param(
334+
reasoning_effort,
335+
reasoning_summary,
336+
model_reasoning,
337+
allowed_efforts=allowed_efforts_for_model(model),
338+
),
314339
)
315340
record_rate_limits_from_response(upstream2)
316341
if err2 is None and upstream2 is not None and upstream2.status_code < 400:

chatmock/routes_openai.py

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,12 @@
99
from .config import BASE_INSTRUCTIONS, GPT5_CODEX_INSTRUCTIONS
1010
from .limits import record_rate_limits_from_response
1111
from .http import build_cors_headers
12-
from .reasoning import apply_reasoning_to_message, build_reasoning_param, extract_reasoning_from_model_name
12+
from .reasoning import (
13+
allowed_efforts_for_model,
14+
apply_reasoning_to_message,
15+
build_reasoning_param,
16+
extract_reasoning_from_model_name,
17+
)
1318
from .upstream import normalize_model_name, start_upstream_request
1419
from .utils import (
1520
convert_chat_messages_to_responses_input,
@@ -54,7 +59,7 @@ def _gen():
5459

5560
def _instructions_for_model(model: str) -> str:
5661
base = current_app.config.get("BASE_INSTRUCTIONS", BASE_INSTRUCTIONS)
57-
if model == "gpt-5-codex" or model == "gpt-5.1-codex":
62+
if model.startswith("gpt-5-codex") or model.startswith("gpt-5.1-codex"):
5863
codex = current_app.config.get("GPT5_CODEX_INSTRUCTIONS") or GPT5_CODEX_INSTRUCTIONS
5964
if isinstance(codex, str) and codex.strip():
6065
return codex
@@ -166,7 +171,12 @@ def chat_completions() -> Response:
166171

167172
model_reasoning = extract_reasoning_from_model_name(requested_model)
168173
reasoning_overrides = payload.get("reasoning") if isinstance(payload.get("reasoning"), dict) else model_reasoning
169-
reasoning_param = build_reasoning_param(reasoning_effort, reasoning_summary, reasoning_overrides)
174+
reasoning_param = build_reasoning_param(
175+
reasoning_effort,
176+
reasoning_summary,
177+
reasoning_overrides,
178+
allowed_efforts=allowed_efforts_for_model(model),
179+
)
170180

171181
upstream, error_resp = start_upstream_request(
172182
model,
@@ -396,7 +406,12 @@ def completions() -> Response:
396406

397407
model_reasoning = extract_reasoning_from_model_name(requested_model)
398408
reasoning_overrides = payload.get("reasoning") if isinstance(payload.get("reasoning"), dict) else model_reasoning
399-
reasoning_param = build_reasoning_param(reasoning_effort, reasoning_summary, reasoning_overrides)
409+
reasoning_param = build_reasoning_param(
410+
reasoning_effort,
411+
reasoning_summary,
412+
reasoning_overrides,
413+
allowed_efforts=allowed_efforts_for_model(model),
414+
)
400415
upstream, error_resp = start_upstream_request(
401416
model,
402417
input_items,
@@ -518,9 +533,10 @@ def list_models() -> Response:
518533
expose_variants = bool(current_app.config.get("EXPOSE_REASONING_MODELS"))
519534
model_groups = [
520535
("gpt-5", ["high", "medium", "low", "minimal"]),
521-
("gpt-5.1", ["high", "medium", "low", "minimal"]),
536+
("gpt-5.1", ["high", "medium", "low"]),
522537
("gpt-5-codex", ["high", "medium", "low"]),
523538
("gpt-5.1-codex", ["high", "medium", "low"]),
539+
("gpt-5.1-codex-max", ["xhigh", "high", "medium", "low"]),
524540
("gpt-5.1-codex-mini", []),
525541
("codex-mini", []),
526542
]

chatmock/upstream.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def normalize_model_name(name: str | None, debug_model: str | None = None) -> st
3232
base = name.split(":", 1)[0].strip()
3333
for sep in ("-", "_"):
3434
lowered = base.lower()
35-
for effort in ("minimal", "low", "medium", "high"):
35+
for effort in ("minimal", "low", "medium", "high", "xhigh"):
3636
suffix = f"{sep}{effort}"
3737
if lowered.endswith(suffix):
3838
base = base[: -len(suffix)]
@@ -46,6 +46,7 @@ def normalize_model_name(name: str | None, debug_model: str | None = None) -> st
4646
"gpt-5-codex": "gpt-5-codex",
4747
"gpt-5-codex-latest": "gpt-5-codex",
4848
"gpt-5.1-codex": "gpt-5.1-codex",
49+
"gpt-5.1-codex-max": "gpt-5.1-codex-max",
4950
"codex": "codex-mini-latest",
5051
"codex-mini": "codex-mini-latest",
5152
"codex-mini-latest": "codex-mini-latest",

0 commit comments

Comments
 (0)