Skip to content

Commit 4d213da

Browse files
committed
fix: enhance payload structure for API compatibility and clarify input types
1 parent 7e7eb65 commit 4d213da

File tree

1 file changed

+28
-9
lines changed

1 file changed

+28
-9
lines changed

app/src/core/api-client.js

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,10 @@ export class ApiClient {
1111
const t0 = performance.now();
1212
// Build a minimal payload appropriate for the endpoint type
1313
const body = (model.endpointType === 'responses')
14-
? { model: model.model, max_output_tokens: 1, input: [{ role: 'user', content: [{ type: 'text', text: 'ping' }] }] }
15-
: { model: model.model, max_tokens: 1, messages: [{ role: 'user', content: [{ type: 'text', text: 'ping' }] }] };
14+
// Responses API expects input_* types
15+
? { model: model.model, max_output_tokens: 1, input: [{ role: 'user', content: [{ type: 'input_text', text: 'ping' }] }] }
16+
// Chat API can accept either string or array. Use simple string for ping.
17+
: { model: model.model, max_tokens: 1, messages: [{ role: 'user', content: 'ping' }] };
1618
const res = await fetch(url, {
1719
method: 'POST',
1820
headers: this._headers(model),
@@ -39,16 +41,30 @@ export class ApiClient {
3941
max_tokens: (endpointType === 'responses') ? undefined : maxTokens
4042
});
4143

44+
// Some providers (notably via OpenRouter) have subtle differences in multimodal payloads.
45+
// Normalize a few common variants for maximum compatibility.
46+
const isOpenRouter = /openrouter\.ai/i.test(String(baseURL || ''));
47+
const modelSlug = String(model || '').toLowerCase();
48+
const isQwenVL = /qwen/.test(modelSlug) && /vl/.test(modelSlug);
49+
50+
// For Chat API (OpenAI-style), image_url can be either object {url} or string for some providers.
51+
const imagePartChat = (isOpenRouter && isQwenVL)
52+
? { type: 'image_url', image_url: b64 }
53+
: { type: 'image_url', image_url: { url: b64 } };
54+
55+
// For Responses API (new OpenAI Responses), types should be input_text / input_image
56+
// and image_url is commonly a direct string.
57+
const textPartResponses = { type: 'input_text', text: prompt };
58+
const sysTextPartResponses = { type: 'input_text', text: sysPrompt };
59+
const imagePartResponses = { type: 'input_image', image_url: b64 };
60+
4261
let body;
4362
if (endpointType === 'responses') {
4463
body = {
4564
model, temperature, max_output_tokens: maxTokens,
4665
input: [
47-
{ role:'system', content:[{ type:'text', text: sysPrompt }]},
48-
{ role:'user', content:[
49-
{ type:'text', text: prompt },
50-
{ type:'image_url', image_url: { url: b64 } }
51-
]}
66+
{ role:'system', content:[ sysTextPartResponses ]},
67+
{ role:'user', content:[ textPartResponses, imagePartResponses ]}
5268
],
5369
response_format: { type:'json_object' }
5470
};
@@ -57,10 +73,13 @@ export class ApiClient {
5773
body = {
5874
model, temperature, max_tokens: maxTokens,
5975
messages: [
60-
{ role:'system', content:[{ type:'text', text: sysPrompt }]},
76+
// Some providers expect system as a plain string; use string for Qwen via OpenRouter.
77+
isOpenRouter && isQwenVL
78+
? { role:'system', content: sysPrompt }
79+
: { role:'system', content:[{ type:'text', text: sysPrompt }]},
6180
{ role:'user', content:[
6281
{ type:'text', text: prompt },
63-
{ type:'image_url', image_url: { url: b64 } }
82+
imagePartChat
6483
]}
6584
],
6685
response_format: { type:'json_object' }

0 commit comments

Comments
 (0)