feat: model api calls refactoring

jan-beranek · jan-beranek · commit 02f01c26c82f · 2025-09-09T10:51:52.000+02:00
diff --git a/app/src/components/model-tabs.js b/app/src/components/model-tabs.js
@@ -40,6 +40,7 @@ export class ModelTabs {
       // Swatch acts as an enable/disable switch
       const swatch = document.createElement('span');
       swatch.className = 'swatch';
+      swatch.dataset.modelId = cfg.id;
       swatch.setAttribute('role', 'switch');
       swatch.setAttribute('aria-checked', String(cfg.enabled));
       swatch.title = cfg.enabled ? 'Enabled — click to disable' : 'Disabled — click to enable';
@@ -420,4 +421,12 @@ export class ModelTabs {
 
     return card;
   }
+
+  // Toggle blinking state of a model tab's color chip while a call is in progress
+  setModelRunning(modelId, running) {
+    const btn = this.header.querySelector(`.tab-btn[data-model-id="${modelId}"]`);
+    const sw = btn?.querySelector('.swatch');
+    if (!sw) return;
+    sw.classList.toggle('running', !!running);
+  }
 }
diff --git a/app/src/core/api-client.js b/app/src/core/api-client.js
@@ -1,4 +1,5 @@
 import { blobToDataURL, truncate } from './utils.js';
+import { buildRequestBody } from './providers/builder.js';
 
 /**
  * ApiClient
@@ -41,54 +42,18 @@ export class ApiClient {
       max_tokens: (endpointType === 'responses') ? undefined : maxTokens
     });
 
-    // Some providers (notably via OpenRouter) have subtle differences in multimodal payloads.
-    // Normalize a few common variants for maximum compatibility.
-    const isOpenRouter = /openrouter\.ai/i.test(String(baseURL || ''));
-    const modelSlug = String(model || '').toLowerCase();
-    const isQwenVL = /qwen/.test(modelSlug) && /vl/.test(modelSlug);
-
-    // For Chat API (OpenAI-style), image_url can be either object {url} or string for some providers.
-    const imagePartChat = (isOpenRouter && isQwenVL)
-      ? { type: 'image_url', image_url: b64 }
-      : { type: 'image_url', image_url: { url: b64 } };
-
-    // For Responses API (new OpenAI Responses), types should be input_text / input_image
-    // and image_url is commonly a direct string.
-    const textPartResponses = { type: 'input_text', text: prompt };
-    const sysTextPartResponses = { type: 'input_text', text: sysPrompt };
-    const imagePartResponses = { type: 'input_image', image_url: b64 };
-
-    let body;
-    if (endpointType === 'responses') {
-      body = {
-        model,
-        input: [
-          { role:'system', content:[ sysTextPartResponses ]},
-          { role:'user', content:[ textPartResponses, imagePartResponses ]}
-        ],
-        // JSON-only response formatting
-        text: { format: { type: 'json_object' } },
-        // Azure GPT-5 compatible: use top-level max_output_tokens; omit temperature entirely
-        max_output_tokens: maxTokens,
-        ...(reasoningEffort ? { reasoning: { effort: reasoningEffort } } : {})
-      };
-    } else {
-      // chat
-      body = {
-        model, temperature, max_tokens: maxTokens,
-        messages: [
-          // Some providers expect system as a plain string; use string for Qwen via OpenRouter.
-          isOpenRouter && isQwenVL
-            ? { role:'system', content: sysPrompt }
-            : { role:'system', content:[{ type:'text', text: sysPrompt }]},
-          { role:'user', content:[
-            { type:'text', text: prompt },
-            imagePartChat
-          ]}
-        ],
-        response_format: { type:'json_object' }
-      };
-    }
+    // Build provider/mode-specific body using the new builder
+    const body = buildRequestBody({
+      endpointType,
+      baseURL,
+      model,
+      temperature,
+      maxTokens,
+      prompt,
+      sysPrompt,
+      imageB64: b64,
+      reasoningEffort
+    });
 
     const controller = new AbortController();
     const to = setTimeout(() => controller.abort('timeout'), timeoutMs);
@@ -115,16 +80,17 @@ export class ApiClient {
       // Auto-retry for Responses when stopped by max_output_tokens
       if (endpointType === 'responses' && j && j.status === 'incomplete' && j.incomplete_details?.reason === 'max_output_tokens') {
         const increased = Math.min(Math.max(Number(maxTokens) || 300, 300) * 2, 4096);
-        const retryBody = {
+        const retryBody = buildRequestBody({
+          endpointType,
+          baseURL,
           model,
-          input: [
-            { role:'system', content:[ sysTextPartResponses ]},
-            { role:'user', content:[ textPartResponses, imagePartResponses ]}
-          ],
-          text: { format: { type: 'json_object' } },
-          max_output_tokens: increased,
-          ...(reasoningEffort ? { reasoning: { effort: reasoningEffort } } : {})
-        };
+          temperature,
+          maxTokens: increased,
+          prompt,
+          sysPrompt,
+          imageB64: b64,
+          reasoningEffort
+        });
         const controller2 = new AbortController();
         const to2 = setTimeout(() => controller2.abort('timeout'), timeoutMs);
         res = await fetch(url, { method:'POST', headers, body: JSON.stringify(retryBody), signal: controller2.signal });
diff --git a/app/src/core/batch-runner.js b/app/src/core/batch-runner.js
@@ -40,9 +40,28 @@ export class BatchRunner {
       // Notify UI that a new run started so it can show a partial row immediately
       onRunStart?.({ batchId: batchMeta.id, runId: runMeta.id, runMeta });
 
+      // Set image on overlay immediately so progressive detections draw on it
+      const ctxImage = await createImageBitmap(imageBlob, { imageOrientation: 'from-image' });
+      this.overlay.setImage(ctxImage, imageW, imageH, imageName);
+
       // Kick off parallel calls
       const sysTpl = this.storage?.getSystemPromptTemplate?.() || '';
+      const partialResults = [];
+      const updateUI = async () => {
+        try {
+          // Draw whatever we have so far
+          const items = partialResults.filter(r => r.status === 'ok' && r.parsed?.primary).map(r => ({
+            color: r.color, model: r.modelDisplayName, det: r.parsed.primary
+          }));
+          this.overlay.drawDetections(items);
+          // Show partial results in the table without waiting for all
+          this.resultsTable.showRun(runMeta, { id: runMeta.id, results: partialResults, logs: {} });
+        } catch { /* noop */ }
+      };
+
       const promises = enabledModels.map(async m => {
+        // mark model as running (blink tab chip)
+        this.modelTabs?.setModelRunning?.(m.id, true);
         let status = 'ok', latencyMs = null, rawText = '', rawFull = undefined, parsed = null, errorMessage = undefined;
         const onLog = (log) => this._appendLog(runMeta.id, m.id, log);
         try {
@@ -69,20 +88,14 @@ export class BatchRunner {
           errorMessage
         };
         await this._appendResult(runMeta.id, result);
+        partialResults.push(result);
+        await updateUI();
+        // clear running state for this model
+        this.modelTabs?.setModelRunning?.(m.id, false);
         return result;
       });
-
       const settled = await Promise.all(promises);
 
-      // Update overlay + results table
-      const items = settled.filter(r => r.status === 'ok' && r.parsed?.primary).map(r => ({
-        color: r.color, model: r.modelDisplayName, det: r.parsed.primary
-      }));
-      // Update canvas
-      const ctxImage = await createImageBitmap(imageBlob, { imageOrientation: 'from-image' });
-      this.overlay.setImage(ctxImage, imageW, imageH, imageName);
-      this.overlay.drawDetections(items);
-
       // Update summaries
       const okCount = settled.filter(r => r.status === 'ok').length;
       const errCount = settled.length - okCount;
@@ -100,7 +113,7 @@ export class BatchRunner {
       batchMeta.summary.avgLatencyMs = avgLatency != null ? (prevAvg == null ? avgLatency : Math.round((prevAvg + avgLatency)/2)) : prevAvg;
       await this.history.updateBatchMeta(batchMeta);
 
-      // Results panel reflects the current run
+      // Final refresh of the results table using stored run data
       const runData = await this.history.getRunData(runMeta.id);
       this.resultsTable.showRun(runMeta, runData);
 
diff --git a/app/src/core/providers/builder.js b/app/src/core/providers/builder.js
@@ -0,0 +1,71 @@
+// Provider/mode-specific request builder for model calls.
+// Keeps ApiClient slim by encapsulating payload differences.
+
+function detectProvider(baseURL = '') {
+  const u = String(baseURL || '').toLowerCase();
+  if (u.includes('openrouter.ai')) return 'openrouter';
+  try {
+    const host = new URL(baseURL).host.toLowerCase();
+    if (host.endsWith('.azure.com')) return 'azure';
+  } catch {}
+  return 'generic';
+}
+
+function isQwenVLModel(model = '') {
+  const m = String(model || '').toLowerCase();
+  return /qwen/.test(m) && /vl/.test(m);
+}
+
+function buildChatPayload(ctx) {
+  const { model, temperature = 0, maxTokens = 2048, prompt, sysPrompt, baseURL } = ctx;
+  const provider = detectProvider(baseURL);
+
+  // Some providers expect image_url as a string (OpenRouter + Qwen VL)
+  const useStringChatImageUrl = provider === 'openrouter' && isQwenVLModel(model);
+  const imagePartChat = useStringChatImageUrl
+    ? { type: 'image_url', image_url: ctx.imageB64 }
+    : { type: 'image_url', image_url: { url: ctx.imageB64 } };
+
+  // Some providers expect system content as a plain string (OpenRouter + Qwen VL)
+  const systemMessage = (provider === 'openrouter' && isQwenVLModel(model))
+    ? { role: 'system', content: sysPrompt }
+    : { role: 'system', content: [{ type: 'text', text: sysPrompt }] };
+
+  return {
+    model,
+    temperature,
+    max_tokens: maxTokens,
+    messages: [
+      systemMessage,
+      { role: 'user', content: [ { type: 'text', text: prompt }, imagePartChat ] }
+    ],
+    response_format: { type: 'json_object' }
+  };
+}
+
+function buildResponsesPayload(ctx) {
+  const { model, maxTokens = 2048, prompt, sysPrompt, reasoningEffort } = ctx;
+  return {
+    model,
+    input: [
+      { role: 'system', content: [ { type: 'input_text', text: sysPrompt } ] },
+      { role: 'user', content: [ { type: 'input_text', text: prompt }, { type: 'input_image', image_url: ctx.imageB64 } ] }
+    ],
+    // JSON-only response formatting
+    text: { format: { type: 'json_object' } },
+    // Azure GPT-5 compatible: top-level max_output_tokens; omit temperature entirely
+    max_output_tokens: maxTokens,
+    ...(reasoningEffort ? { reasoning: { effort: reasoningEffort } } : {})
+  };
+}
+
+export function buildRequestBody({ endpointType, baseURL, model, temperature, maxTokens, prompt, sysPrompt, imageB64, reasoningEffort }) {
+  const ctx = { endpointType, baseURL, model, temperature, maxTokens, prompt, sysPrompt, imageB64, reasoningEffort };
+  if (endpointType === 'responses') return buildResponsesPayload(ctx);
+  return buildChatPayload(ctx);
+}
+
+export function detectProviderKind(baseURL) {
+  return detectProvider(baseURL);
+}
+
diff --git a/app/styles.css b/app/styles.css
@@ -131,6 +131,12 @@ h2 { font-size:16px; margin:0; }
   box-shadow: 0 0 0 1px var(--accent) inset;
 }
 .tab-btn .swatch { width:12px; height:12px; border-radius:3px; border:2px solid #000; display:inline-block; margin-right:6px; vertical-align:middle; cursor:pointer; }
+.tab-btn .swatch.running { animation: chipBlink 1.1s ease-in-out infinite; box-shadow: 0 0 0 2px rgba(255,255,255,0.08) inset; }
+@keyframes chipBlink {
+  0% { filter: brightness(0.8) saturate(0.9); opacity: 0.6; }
+  50% { filter: brightness(1.3) saturate(1.1); opacity: 1; }
+  100% { filter: brightness(0.8) saturate(0.9); opacity: 0.6; }
+}
 .tabs-body { padding: 10px; }
 .tab-pane { display:none; }
 .tab-pane.active { display:block; }