@@ -11,8 +11,10 @@ export class ApiClient {
1111 const t0 = performance . now ( ) ;
1212 // Build a minimal payload appropriate for the endpoint type
1313 const body = ( model . endpointType === 'responses' )
14- ? { model : model . model , max_output_tokens : 1 , input : [ { role : 'user' , content : [ { type : 'text' , text : 'ping' } ] } ] }
15- : { model : model . model , max_tokens : 1 , messages : [ { role : 'user' , content : [ { type : 'text' , text : 'ping' } ] } ] } ;
14+ // Responses API expects input_* types
15+ ? { model : model . model , max_output_tokens : 1 , input : [ { role : 'user' , content : [ { type : 'input_text' , text : 'ping' } ] } ] }
16+ // Chat API can accept either string or array. Use simple string for ping.
17+ : { model : model . model , max_tokens : 1 , messages : [ { role : 'user' , content : 'ping' } ] } ;
1618 const res = await fetch ( url , {
1719 method : 'POST' ,
1820 headers : this . _headers ( model ) ,
@@ -39,16 +41,30 @@ export class ApiClient {
3941 max_tokens : ( endpointType === 'responses' ) ? undefined : maxTokens
4042 } ) ;
4143
44+ // Some providers (notably via OpenRouter) have subtle differences in multimodal payloads.
45+ // Normalize a few common variants for maximum compatibility.
46+ const isOpenRouter = / o p e n r o u t e r \. a i / i. test ( String ( baseURL || '' ) ) ;
47+ const modelSlug = String ( model || '' ) . toLowerCase ( ) ;
48+ const isQwenVL = / q w e n / . test ( modelSlug ) && / v l / . test ( modelSlug ) ;
49+
50+ // For Chat API (OpenAI-style), image_url can be either object {url} or string for some providers.
51+ const imagePartChat = ( isOpenRouter && isQwenVL )
52+ ? { type : 'image_url' , image_url : b64 }
53+ : { type : 'image_url' , image_url : { url : b64 } } ;
54+
55+ // For Responses API (new OpenAI Responses), types should be input_text / input_image
56+ // and image_url is commonly a direct string.
57+ const textPartResponses = { type : 'input_text' , text : prompt } ;
58+ const sysTextPartResponses = { type : 'input_text' , text : sysPrompt } ;
59+ const imagePartResponses = { type : 'input_image' , image_url : b64 } ;
60+
4261 let body ;
4362 if ( endpointType === 'responses' ) {
4463 body = {
4564 model, temperature, max_output_tokens : maxTokens ,
4665 input : [
47- { role :'system' , content :[ { type :'text' , text : sysPrompt } ] } ,
48- { role :'user' , content :[
49- { type :'text' , text : prompt } ,
50- { type :'image_url' , image_url : { url : b64 } }
51- ] }
66+ { role :'system' , content :[ sysTextPartResponses ] } ,
67+ { role :'user' , content :[ textPartResponses , imagePartResponses ] }
5268 ] ,
5369 response_format : { type :'json_object' }
5470 } ;
@@ -57,10 +73,13 @@ export class ApiClient {
5773 body = {
5874 model, temperature, max_tokens : maxTokens ,
5975 messages : [
60- { role :'system' , content :[ { type :'text' , text : sysPrompt } ] } ,
76+ // Some providers expect system as a plain string; use string for Qwen via OpenRouter.
77+ isOpenRouter && isQwenVL
78+ ? { role :'system' , content : sysPrompt }
79+ : { role :'system' , content :[ { type :'text' , text : sysPrompt } ] } ,
6180 { role :'user' , content :[
6281 { type :'text' , text : prompt } ,
63- { type : 'image_url' , image_url : { url : b64 } }
82+ imagePartChat
6483 ] }
6584 ] ,
6685 response_format : { type :'json_object' }
0 commit comments