Skip to content

Commit 8fd1c33

Browse files
author
Dmitrii Shitikov
committed
Merge branch 'shitikov/translate2' into 'main'
structured_json format See merge request opensource/bblslug!1
2 parents ab5d63b + 5e78f67 commit 8fd1c33

File tree

11 files changed

+99
-30
lines changed

11 files changed

+99
-30
lines changed

composer.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
}
1414
],
1515
"require": {
16-
"php": ">=8.0",
16+
"php": ">=8.1",
1717
"ext-curl": "*",
1818
"ext-mbstring": "*",
1919
"symfony/yaml": "^7.3"

resources/prompts.yaml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,3 +79,22 @@ translator:
7979
- There must be a blank line immediately before the {end} marker.
8080
- Return only the translated JSON, without commentary, Markdown fences, or extra text.
8181
{context}
82+
83+
structured_json: |
84+
You are a professional translator of JSON content.
85+
TASK:
86+
- Translate ONLY the string values of the provided JSON from {source} to {target}.
87+
- Never translate keys.
88+
RULES:
89+
- Do not add, remove, rename, or reorder any fields.
90+
- Each input object MUST produce exactly one output object.
91+
- Never split, duplicate, or create additional objects.
92+
- Do not translate URLs, domain names, filenames, code fragments, or variables.
93+
- Treat all blocks inside "body" and "preview" as parts of one continuous article.
94+
- Use full cross-block context when translating.
95+
- If a sentence spans multiple blocks, translate it coherently, but return the translation split into the same blocks and at the same boundaries as in the input.
96+
- Do not merge, reorder, or redistribute blocks.
97+
- Quoted text (including text inside quotation marks) is inline content and must remain inside the same text field.
98+
- Typographic quotation marks may be replaced with standard quotation marks used in the target language.
99+
- Never return an empty string unless the source string is empty.
100+
{context}

src/Bblslug/Bblslug.php

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
namespace Bblslug;
44

5+
use Bblslug\Content\Transformer;
56
use Bblslug\Filters\FilterManager;
67
use Bblslug\HttpClient;
78
use Bblslug\Models\ModelRegistry;
@@ -83,7 +84,7 @@ public static function translate(
8384
string $apiKey,
8485
string $format,
8586
string $modelKey,
86-
string $text,
87+
string|array $text,
8788
// Optional arguments (alphabetical)
8889
?string $context = null,
8990
bool $dryRun = false,
@@ -123,13 +124,13 @@ public static function translate(
123124
$model['defaults']['context'] = $context;
124125
}
125126

126-
// Measure original length
127127
$originalLength = mb_strlen($text);
128128

129129
// Pre-validation (before filters)
130130
if ($validate && $format !== 'text') {
131131
switch ($format) {
132132
case 'json':
133+
case 'structured_json':
133134
$jsonValidator = new JsonValidator();
134135
$preResult = $jsonValidator->validate($text);
135136
if (! $preResult->isValid()) {
@@ -171,7 +172,7 @@ public static function translate(
171172
// Length guard: make sure prepared text fits model constraints
172173
$lengthValidator = TextLengthValidator::fromModelConfig($model);
173174
$lenResult = $lengthValidator->validate($prepared);
174-
if (! $lenResult->isValid()) {
175+
if (!$lenResult->isValid()) {
175176
throw new \RuntimeException(
176177
"Input length exceeds model limits: " . implode('; ', $lenResult->getErrors())
177178
);
@@ -256,7 +257,7 @@ public static function translate(
256257
);
257258
}
258259
try {
259-
$parsed = $driver->parseResponse($model, $raw);
260+
$parsed = $driver->parseResponse($model, $raw, $options);
260261
$translated = $parsed['text'];
261262
$rawUsage = $parsed['usage'] ?? null;
262263
} catch (\RuntimeException $e) {
@@ -279,6 +280,7 @@ public static function translate(
279280
if ($validate && $format !== 'text') {
280281
switch ($format) {
281282
case 'json':
283+
case 'structured_json':
282284
$postResult = (new JsonValidator())->validate($result);
283285
if (! $postResult->isValid()) {
284286
throw new \RuntimeException(
@@ -299,7 +301,6 @@ public static function translate(
299301
$valLogPost = "[JSON schema validated]\n";
300302
}
301303
break;
302-
303304
case 'html':
304305
$htmlValidator = new HtmlValidator();
305306
$postResult = $htmlValidator->validate($result);

src/Bblslug/Console/Cli.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,8 +99,8 @@ public static function run(): void
9999
);
100100
}
101101

102-
if (!in_array($format, ['text','html','json'], true)) {
103-
Help::error("Invalid format: '{$format}'. Allowed: text, html, json.");
102+
if (!in_array($format, ['text','html','json', 'structured_json'], true)) {
103+
Help::error("Invalid format: '{$format}'. Allowed: text, html, json, structured_json.");
104104
}
105105

106106
// Load API key

src/Bblslug/Models/Drivers/AnthropicDriver.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ public function buildRequest(array $config, string $text, array $options): array
9393
*
9494
* @throws \RuntimeException If the response is malformed or markers are missing.
9595
*/
96-
public function parseResponse(array $config, string $responseBody): array
96+
public function parseResponse(array $config, string $responseBody, array $options): array
9797
{
9898
$data = json_decode($responseBody, true);
9999

src/Bblslug/Models/Drivers/DeepLDriver.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ public function buildRequest(array $config, string $text, array $options): array
9696
*
9797
* @throws \RuntimeException If the response format is unexpected.
9898
*/
99-
public function parseResponse(array $config, string $responseBody): array
99+
public function parseResponse(array $config, string $responseBody, array $options): array
100100
{
101101
$data = json_decode($responseBody, true);
102102
if (

src/Bblslug/Models/Drivers/GoogleDriver.php

Lines changed: 36 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -60,9 +60,6 @@ public function buildRequest(array $config, string $text, array $options): array
6060
]
6161
);
6262

63-
// Wrap user text in markers
64-
$contentText = self::START . "\n" . $text . "\n" . self::END;
65-
6663
// Build JSON payload
6764
$generationConfig = array_filter([
6865
'temperature' => (float) $temperature,
@@ -79,6 +76,16 @@ public function buildRequest(array $config, string $text, array $options): array
7976
$generationConfig['thinkingConfig'] = $thinkingConfig;
8077
}
8178

79+
if (!empty($options['response_schema'])) {
80+
// @link https://ai.google.dev/gemini-api/docs/structured-output
81+
$generationConfig['response_schema'] = $this->adaptSchema($options['response_schema']);
82+
$generationConfig['response_mime_type'] = 'application/json';
83+
$contentText = $text;
84+
} else {
85+
// Wrap user text in markers
86+
$contentText = self::START . "\n" . $text . "\n" . self::END;
87+
}
88+
8289
$body = [
8390
'system_instruction' => ['parts' => [['text' => $systemText]]],
8491
'contents' => [['parts' => [['text' => $contentText]]]],
@@ -105,7 +112,7 @@ public function buildRequest(array $config, string $text, array $options): array
105112
*
106113
* @throws \RuntimeException If the response is malformed or markers are missing.
107114
*/
108-
public function parseResponse(array $config, string $responseBody): array
115+
public function parseResponse(array $config, string $responseBody, array $options): array
109116
{
110117
$data = json_decode($responseBody, true);
111118
if (!is_array($data)) {
@@ -142,14 +149,18 @@ public function parseResponse(array $config, string $responseBody): array
142149
}
143150
}
144151

145-
// Extract between markers
146-
$pattern = '/' . preg_quote(self::START, '/') . '(.*?)' . preg_quote(self::END, '/') . '/s';
152+
if (!empty($options['response_schema'])) {
153+
$text = $accumulated;
154+
} else {
155+
// Extract between markers
156+
$pattern = '/' . preg_quote(self::START, '/') . '(.*?)' . preg_quote(self::END, '/') . '/s';
147157

148-
if (!preg_match($pattern, $accumulated, $matches)) {
149-
throw new \RuntimeException("Markers not found in Gemini response");
150-
}
158+
if (!preg_match($pattern, $accumulated, $matches)) {
159+
throw new \RuntimeException("Markers not found in Gemini response");
160+
}
151161

152-
$text = trim($matches[1]);
162+
$text = trim($matches[1]);
163+
}
153164

154165
// Usage metadata
155166
$usage = $data['usageMetadata'] ?? null;
@@ -159,4 +170,19 @@ public function parseResponse(array $config, string $responseBody): array
159170
'usage' => $usage,
160171
];
161172
}
173+
174+
protected function adaptSchema(array $schema): array
175+
{
176+
if (\array_key_exists('additionalProperties', $schema)) {
177+
unset($schema['additionalProperties']);
178+
}
179+
180+
foreach ($schema as $key => $value) {
181+
if (\is_array($value)) {
182+
$schema[$key] = $this->adaptSchema($value);
183+
}
184+
}
185+
186+
return $schema;
187+
}
162188
}

src/Bblslug/Models/Drivers/OpenAiDriver.php

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -54,10 +54,25 @@ public function buildRequest(array $config, string $text, array $options): array
5454
]
5555
);
5656

57+
$responseFormat = [];
58+
if (!empty($options['response_schema'])) {
59+
$content = $text;
60+
$responseFormat = [
61+
'type' => 'json_schema',
62+
'json_schema' => [
63+
'strict' => $options['strict'] ?? true,
64+
"name" => $options['response_schema_name'] ?? 'anonymous',
65+
"schema" => $options['response_schema'],
66+
],
67+
];
68+
} else {
69+
$content = self::START . "\n{$text}\n" . self::END;
70+
}
71+
5772
// Compose chat messages
5873
$messages = [
5974
['role' => 'system', 'content' => $systemPrompt],
60-
['role' => 'user', 'content' => self::START . "\n{$text}\n" . self::END],
75+
['role' => 'user', 'content' => $content],
6176
];
6277

6378
$payload = [
@@ -66,6 +81,10 @@ public function buildRequest(array $config, string $text, array $options): array
6681
'temperature' => (float) $temperature,
6782
];
6883

84+
if (count($responseFormat) > 0) {
85+
$payload['response_format'] = $responseFormat;
86+
}
87+
6988
return [
7089
'url' => $config['endpoint'],
7190
'headers' => $config['requirements']['headers'] ?? [],
@@ -86,7 +105,7 @@ public function buildRequest(array $config, string $text, array $options): array
86105
*
87106
* @throws \RuntimeException If the response is malformed or markers are missing.
88107
*/
89-
public function parseResponse(array $config, string $responseBody): array
108+
public function parseResponse(array $config, string $responseBody, array $options): array
90109
{
91110
$data = json_decode($responseBody, true);
92111
if (!is_array($data)) {
@@ -111,12 +130,16 @@ public function parseResponse(array $config, string $responseBody): array
111130
throw new \RuntimeException("OpenAI translation failed: {$responseBody}");
112131
}
113132

114-
// Extract between markers
115-
$pattern = '/' . preg_quote(self::START, '/') . '(.*?)' . preg_quote(self::END, '/') . '/s';
116-
if (!preg_match($pattern, $content, $matches)) {
117-
throw new \RuntimeException("Markers not found in OpenAI response");
133+
if (!empty($options['response_schema'])) {
134+
$text = $content;
135+
} else {
136+
// Extract between markers
137+
$pattern = '/' . preg_quote(self::START, '/') . '(.*?)' . preg_quote(self::END, '/') . '/s';
138+
if (!preg_match($pattern, $content, $matches)) {
139+
throw new \RuntimeException("Markers not found in OpenAI response");
140+
}
141+
$text = trim($matches[1]);
118142
}
119-
$text = trim($matches[1]);
120143

121144
// Usage statistics
122145
$usage = $data['usage'] ?? null;

src/Bblslug/Models/Drivers/XaiDriver.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ public function buildRequest(array $config, string $text, array $options): array
8989
*
9090
* @throws \RuntimeException If the response is malformed or markers are missing.
9191
*/
92-
public function parseResponse(array $config, string $responseBody): array
92+
public function parseResponse(array $config, string $responseBody, array $options): array
9393
{
9494
$data = json_decode($responseBody, true);
9595

src/Bblslug/Models/Drivers/YandexDriver.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ public function buildRequest(array $config, string $text, array $options): array
100100
*
101101
* @throws \RuntimeException If the response is malformed or markers are missing.
102102
*/
103-
public function parseResponse(array $config, string $responseBody): array
103+
public function parseResponse(array $config, string $responseBody, array $options): array
104104
{
105105
$data = json_decode($responseBody, true);
106106

0 commit comments

Comments
 (0)