Skip to content

Commit 01829da

Browse files
committed
feat(validation): add input length guard; expand models; handle truncation
- Bblslug::translate(): - Integrate TextLengthValidator right after filters to fail early when prepared input exceeds model capacity. - Validation/TextLengthValidator: - New validator using model limits (estimated_max_chars, max_tokens, max_output_tokens) with a 4 chars/token heuristic. - Configurable overhead buffer (default 2000) and fallback reserve (% of total tokens) when max_output_tokens unknown. - Returns detailed error with overage and guidance to split input or reduce output. - Models (resources/models.yaml): - OpenAI: add gpt-5 / gpt-5-mini / gpt-5-nano; set limits (max_tokens/max_output_tokens/estimated_max_chars); expose reasoning tokens via usage.completion_tokens_details.reasoning_tokens; add limits for gpt-4o / gpt-4o-mini; restore explicit limits for gpt-4 / gpt-4-turbo. - Google: add usage breakdown for thoughts; keep 2.0-flash defined and list after 2.5 family. - X.ai: normalize usage breakdown keys; set explicit limits for grok-4 / grok-3 / grok-3-mini. - AnthropicDriver: - Detect `finish_reason=length` and fail fast with a clear error message; extract raw content early. - README: - Sync supported model list with registry, including OpenAI GPT-5 family and Gemini ordering.
1 parent 44fa820 commit 01829da

File tree

5 files changed

+184
-45
lines changed

5 files changed

+184
-45
lines changed

README.md

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,25 +14,26 @@ APIs supported:
1414
- `deepl:free` - DeepL free tier
1515
- `deepl:pro` - DeepL pro tier
1616
- Google (Gemini):
17-
- `google:gemini-2.0-flash` - Gemini 2.0 Flash
1817
- `google:gemini-2.5-flash` - Gemini 2.5 Flash
1918
- `google:gemini-2.5-flash-lite` - Gemini 2.5 Flash Lite
2019
- `google:gemini-2.5-pro` - Gemini 2.5 Pro
20+
- `google:gemini-2.0-flash` - Gemini 2.0 Flash
2121
- OpenAI (GPT):
22-
- `openai:gpt-4` - OpenAI GPT-4
23-
- `openai:gpt-4-turbo` - OpenAI GPT-4 Turbo
22+
- `openai:gpt-5` - OpenAI GPT-5
23+
- `openai:gpt-5-mini` - OpenAI GPT-5 Mini
24+
- `openai:gpt-5-nano` - OpenAI GPT-5 Nano
2425
- `openai:gpt-4o` - OpenAI GPT-4o
2526
- `openai:gpt-4o-mini` - OpenAI GPT-4o Mini
27+
- `openai:gpt-4` - OpenAI GPT-4
28+
- `openai:gpt-4-turbo` - OpenAI GPT-4 Turbo
2629
- Yandex:
2730
- `yandex:gpt-lite` - YandexGPT Lite
2831
- `yandex:gpt-pro` - YandexGPT Pro
2932
- `yandex:gpt-32k` - YandexGPT Pro 32K
3033
- X.ai:
3134
- `xai:grok-4` - Grok 4
3235
- `xai:grok-3` - Grok 3
33-
- `xai:grok-3-fast` - Grok 3 Fast
3436
- `xai:grok-3-mini` - Grok 3 Mini
35-
- `xai:grok-3-mini-fast` - Grok 3 Mini Fast
3637

3738
## Features
3839

resources/models.yaml

Lines changed: 80 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -119,19 +119,9 @@ google:
119119
breakdown:
120120
prompt: promptTokenCount
121121
candidates: candidatesTokenCount
122+
thoughts: thoughtsTokenCount
122123

123124
models:
124-
gemini-2.0-flash:
125-
name: 'Gemini 2.0 Flash'
126-
endpoint: 'https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent'
127-
defaults:
128-
model: gemini-2.0-flash
129-
limits:
130-
max_tokens: 131072
131-
token_estimator: gpt
132-
estimated_max_chars: 524288
133-
notes: 'Low-latency Flash model, balanced cost and performance.'
134-
135125
gemini-2.5-flash:
136126
name: 'Gemini 2.5 Flash'
137127
endpoint: 'https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent'
@@ -171,6 +161,17 @@ google:
171161
estimated_max_chars: 1048576
172162
notes: 'Top-tier Pro model for longest contexts and highest accuracy.'
173163

164+
gemini-2.0-flash:
165+
name: 'Gemini 2.0 Flash'
166+
endpoint: 'https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent'
167+
defaults:
168+
model: gemini-2.0-flash
169+
limits:
170+
max_tokens: 131072
171+
token_estimator: gpt
172+
estimated_max_chars: 524288
173+
notes: 'Low-latency Flash model, balanced cost and performance.'
174+
174175

175176
# -------------------------------------------------------------------
176177
# OpenAI GPT
@@ -189,41 +190,87 @@ openai:
189190
headers:
190191
- 'Content-Type: application/json'
191192
limits:
192-
max_tokens: 128000
193193
token_estimator: gpt
194-
estimated_max_chars: 512000
195194
usage:
196195
tokens:
197196
total: total_tokens
198197
breakdown:
199198
prompt: prompt_tokens
200199
completion: completion_tokens
200+
reasoning: completion_tokens_details.reasoning_tokens
201201

202202
models:
203-
gpt-4:
204-
name: 'OpenAI GPT-4'
203+
gpt-5:
204+
name: 'OpenAI GPT-5'
205205
defaults:
206-
model: gpt-4
207-
notes: 'Classic GPT-4 model: highest reliability.'
206+
model: gpt-5
207+
temperature: 1
208+
limits:
209+
max_tokens: 400000
210+
max_output_tokens: 128000
211+
estimated_max_chars: 1600000 # 400k * 4
212+
notes: 'Flagship model with enhanced reasoning, multimodal capabilities, and extended context support.'
208213

209-
gpt-4-turbo:
210-
name: 'OpenAI GPT-4 Turbo'
214+
gpt-5-mini:
215+
name: 'OpenAI GPT-5 Mini'
211216
defaults:
212-
model: gpt-4-turbo
213-
notes: 'Fast & cost-effective GPT-4 quality.'
217+
model: gpt-5-mini
218+
temperature: 1
219+
limits:
220+
max_tokens: 400000
221+
max_output_tokens: 128000
222+
estimated_max_chars: 1600000
223+
notes: 'Compact, cost-efficient GPT-5 variant suitable for high-volume or latency-sensitive translation tasks.'
224+
225+
gpt-5-nano:
226+
name: 'OpenAI GPT-5 Nano'
227+
defaults:
228+
model: gpt-5-nano
229+
temperature: 1
230+
limits:
231+
max_tokens: 400000
232+
max_output_tokens: 128000
233+
estimated_max_chars: 1600000
234+
notes: 'Ultra-fast GPT-5 variant optimized for small-scale and real-time translation tasks.'
214235

215236
gpt-4o:
216237
name: 'OpenAI GPT-4o'
217238
defaults:
218239
model: gpt-4o
240+
limits:
241+
max_tokens: 128000
242+
max_output_tokens: 16384
243+
estimated_max_chars: 512000
219244
notes: 'Highly accurate with flexible prompts, ideal for AI-assisted adaptive translation.'
220245

221246
gpt-4o-mini:
222247
name: 'OpenAI GPT-4o Mini'
223248
defaults:
224249
model: gpt-4o-mini
250+
limits:
251+
max_tokens: 128000
252+
max_output_tokens: 16384
253+
estimated_max_chars: 512000
225254
notes: 'Lightweight GPT-4o: lower latency/cost.'
226255

256+
gpt-4:
257+
name: 'OpenAI GPT-4'
258+
defaults:
259+
model: gpt-4
260+
limits:
261+
max_tokens: 8192
262+
estimated_max_chars: 32768
263+
notes: 'Classic GPT-4 model: highest reliability.'
264+
265+
gpt-4-turbo:
266+
name: 'OpenAI GPT-4 Turbo'
267+
defaults:
268+
model: gpt-4-turbo
269+
limits:
270+
max_tokens: 128000
271+
estimated_max_chars: 512000
272+
notes: 'Fast & cost-effective GPT-4 quality.'
273+
227274

228275
# -------------------------------------------------------------------
229276
# Yandex Foundation Models
@@ -313,44 +360,39 @@ xai:
313360
headers:
314361
- 'Content-Type: application/json'
315362
limits:
316-
max_tokens: 262144
317363
token_estimator: gpt
318-
estimated_max_chars: 1048576
319364
usage:
320365
tokens:
321-
total: total_tokens
366+
total: total_tokens
322367
breakdown:
323-
prompt: prompt_tokens
324-
completion: completion_tokens
368+
prompt: prompt_tokens
369+
completion: completion_tokens
325370
reasoning: completion_tokens_details.reasoning_tokens
326371

327372
models:
328373
grok-4:
329374
name: 'Grok 4'
330375
defaults:
331376
model: grok-4
377+
limits:
378+
max_tokens: 256000
379+
estimated_max_chars: 1024000
332380
notes: 'Scientist-grade reasoning, coding mode, and real-time internet understanding'
333381

334382
grok-3:
335383
name: 'Grok 3'
336384
defaults:
337385
model: grok-3
386+
limits:
387+
max_tokens: 131072
388+
estimated_max_chars: 524288
338389
notes: 'Optimized for logical reasoning, math problem-solving, and real-time data with DeepSearch'
339390

340-
grok-3-fast:
341-
name: 'Grok 3 Fast'
342-
defaults:
343-
model: grok-3-fast
344-
notes: 'Optimized for fastest Grok 3 inference'
345-
346391
grok-3-mini:
347392
name: 'Grok 3 Mini'
348393
defaults:
349394
model: grok-3-mini
395+
limits:
396+
max_tokens: 131072
397+
estimated_max_chars: 524288
350398
notes: 'Compact variant balancing Grok 3 performance and efficiency'
351-
352-
grok-3-mini-fast:
353-
name: 'Grok 3 Mini Fast'
354-
defaults:
355-
model: grok-3-mini-fast
356-
notes: 'Mini variant optimized for fastest inference with compact footprint'

src/Bblslug/Bblslug.php

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
use Bblslug\Validation\HtmlValidator;
1111
use Bblslug\Validation\JsonValidator;
1212
use Bblslug\Validation\Schema;
13+
use Bblslug\Validation\TextLengthValidator;
1314

1415
class Bblslug
1516
{
@@ -167,6 +168,15 @@ public static function translate(
167168
$prepared = $filterManager->apply($text);
168169
$preparedLength = mb_strlen($prepared);
169170

171+
// Length guard: make sure prepared text fits model constraints
172+
$lengthValidator = TextLengthValidator::fromModelConfig($model);
173+
$lenResult = $lengthValidator->validate($prepared);
174+
if (! $lenResult->isValid()) {
175+
throw new \RuntimeException(
176+
"Input length exceeds model limits: " . implode('; ', $lenResult->getErrors())
177+
);
178+
}
179+
170180
// Prepare options for driver, merging in any CLI-provided variables
171181
$options = array_merge(
172182
[

src/Bblslug/Models/Drivers/AnthropicDriver.php

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,9 +115,21 @@ public function parseResponse(array $config, string $responseBody): array
115115
throw new \RuntimeException("Anthropic API error: {$message}");
116116
}
117117

118+
// Extract raw content early (may be partial when truncated)
119+
$contentRaw = $data['choices'][0]['message']['content'] ?? '';
120+
$contentRaw = is_string($contentRaw) ? $contentRaw : '';
121+
122+
// If Anthropic cut output by tokens, fail with a clear message before marker search
123+
$finishReason = $data['choices'][0]['finish_reason'] ?? null;
124+
if ($finishReason === 'length') {
125+
throw new \RuntimeException(
126+
"Anthropic: translation was truncated (finish_reason=length) — increase max_tokens or split input. "
127+
);
128+
}
129+
118130
// Validate content
119-
$content = $data['choices'][0]['message']['content'] ?? null;
120-
if (!is_string($content)) {
131+
$content = $contentRaw;
132+
if ($content === '') {
121133
throw new \RuntimeException("Anthropic translation failed: {$responseBody}");
122134
}
123135

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
<?php
2+
3+
namespace Bblslug\Validation;
4+
5+
class TextLengthValidator implements ValidatorInterface
6+
{
7+
private int $limitChars;
8+
private int $overheadChars;
9+
10+
/**
11+
* @param int $limitChars Hard cap for prepared input length (in chars)
12+
* @param int $overheadChars Safety buffer to account for prompts/markers/etc.
13+
*/
14+
public function __construct(int $limitChars, int $overheadChars = 2000)
15+
{
16+
$this->limitChars = max(0, $limitChars - max(0, $overheadChars));
17+
$this->overheadChars = $overheadChars;
18+
}
19+
20+
public function validate(string $content): ValidationResult
21+
{
22+
$len = mb_strlen($content);
23+
if ($this->limitChars > 0 && $len > $this->limitChars) {
24+
$excess = $len - $this->limitChars;
25+
return ValidationResult::failure([
26+
sprintf(
27+
'Prepared text length %d exceeds limit %d by %d chars (includes %d overhead). ' .
28+
'Split input or reduce max output tokens.',
29+
$len,
30+
$this->limitChars,
31+
$excess,
32+
$this->overheadChars
33+
)
34+
]);
35+
}
36+
return ValidationResult::success();
37+
}
38+
39+
/**
40+
* Build validator from model config.
41+
* Uses estimated_max_chars, max_tokens and (if present) max_output_tokens.
42+
*
43+
* @param array<string,mixed> $model
44+
* @param int $fallbackReservePct Reserve percent when max_output_tokens unknown (e.g. 20).
45+
* @param int $overheadChars Prompt/markers safety buffer (e.g. 2000).
46+
*/
47+
public static function fromModelConfig(array $model, int $fallbackReservePct = 20, int $overheadChars = 2000): self
48+
{
49+
$limits = $model['limits'] ?? [];
50+
51+
$estimatedMaxChars = (int)($limits['estimated_max_chars'] ?? 0);
52+
$maxTokens = (int)($limits['max_tokens'] ?? 0);
53+
$maxOutTokens = (int)($limits['max_output_tokens'] ?? 0);
54+
55+
// Prefer a token-based calculation if we know both totals.
56+
if ($maxTokens > 0) {
57+
$reservedOut = $maxOutTokens > 0
58+
? $maxOutTokens
59+
: (int)max(1, floor($maxTokens * ($fallbackReservePct / 100)));
60+
61+
$inputTokenBudget = max(0, $maxTokens - $reservedOut);
62+
$charsByTokens = $inputTokenBudget * 4; // ≈ 4 chars/token heuristic
63+
64+
$limitChars = $estimatedMaxChars > 0
65+
? min($estimatedMaxChars, $charsByTokens)
66+
: $charsByTokens;
67+
} else {
68+
// No token info — rely only on estimated_max_chars
69+
$limitChars = $estimatedMaxChars;
70+
}
71+
72+
return new self($limitChars, $overheadChars);
73+
}
74+
}

0 commit comments

Comments
 (0)