Skip to content

Commit ae21554

Browse files
committed
fix: dino prompting
1 parent 13900f6 commit ae21554

File tree

1 file changed

+52
-2
lines changed

1 file changed

+52
-2
lines changed

app/src/core/api-client.js

Lines changed: 52 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ export class ApiClient {
7676
let rawText = '';
7777
try {
7878
let res;
79+
let attemptKind = 'single';
7980
if (endpointType === 'groundingdino') {
8081
// For GroundingDINO servers that expect multipart/form-data
8182
// Build FormData: file, prompt, thresholds. Include common synonyms to maximize compatibility.
@@ -101,6 +102,25 @@ export class ApiClient {
101102
// Remove JSON content-type so browser sets multipart boundary
102103
headers = this._sanitizeForMultipart(headers);
103104
res = await fetch(url, { method: 'POST', headers, body: fd, signal: controller.signal });
105+
// Peek and optionally retry with JSON if the server returned a generic fallback
106+
let contentType0 = res.headers.get('content-type') || '';
107+
let j0 = null;
108+
if (contentType0.includes('application/json')) {
109+
try { j0 = await res.clone().json(); } catch {}
110+
} else {
111+
try { await res.clone().text(); } catch {}
112+
}
113+
if (this._shouldRetryGroundingDino(j0, p)) {
114+
// Retry with JSON body including broader keys (may trigger preflight but improves compatibility)
115+
const jsonBody = buildRequestBody({ endpointType, baseURL, model, temperature, maxTokens, prompt, sysPrompt, imageB64: b64, reasoningEffort, dinoBoxThreshold, dinoTextThreshold });
116+
const jsonHeaders = { 'Content-Type': 'application/json' };
117+
const controller2 = new AbortController();
118+
const to2 = setTimeout(() => controller2.abort('timeout'), timeoutMs);
119+
const res2 = await fetch(url, { method: 'POST', headers: jsonHeaders, body: JSON.stringify(jsonBody), signal: controller2.signal });
120+
clearTimeout(to2);
121+
res = res2;
122+
attemptKind = 'retry-json';
123+
}
104124
} else {
105125
res = await fetch(url, { method: 'POST', headers, body: JSON.stringify(body), signal: controller.signal });
106126
}
@@ -177,7 +197,7 @@ export class ApiClient {
177197
url,
178198
headers: this._sanitizeHeaders(headers),
179199
bodyPreview: (endpointType === 'groundingdino')
180-
? 'multipart/form-data (file, prompt, thresholds)'
200+
? (attemptKind === 'retry-json' ? 'multipart (initial) -> retried JSON (image+prompt+thresholds)' : 'multipart/form-data (file, prompt, thresholds)')
181201
: truncate(JSON.stringify(body), 1200)
182202
};
183203
const log = {
@@ -195,7 +215,7 @@ export class ApiClient {
195215
url,
196216
headers: this._sanitizeHeaders(headers),
197217
bodyPreview: (endpointType === 'groundingdino')
198-
? 'multipart/form-data (file, prompt, thresholds)'
218+
? (attemptKind === 'retry-json' ? 'multipart (initial) -> retried JSON (image+prompt+thresholds)' : 'multipart/form-data (file, prompt, thresholds)')
199219
: truncate(JSON.stringify(body), 1200)
200220
};
201221
const log = {
@@ -303,6 +323,36 @@ export class ApiClient {
303323
return JSON.stringify(j);
304324
}
305325

326+
_shouldRetryGroundingDino(serverResponse, userPrompt) {
327+
try {
328+
const p = String(userPrompt || '').trim().toLowerCase();
329+
if (!p) return false;
330+
const mv = String(serverResponse?.model_version || '');
331+
if (/fallback/i.test(mv)) return true;
332+
// Label Studio-like fallback: value.text === 'object' and boxes have zero area
333+
if (Array.isArray(serverResponse?.results)) {
334+
let any = false;
335+
let allZero = true;
336+
let allObject = true;
337+
for (const group of serverResponse.results) {
338+
const arr = Array.isArray(group?.result) ? group.result : [];
339+
for (const item of arr) {
340+
if (item?.type !== 'rectanglelabels') continue;
341+
const v = item?.value || {};
342+
any = true;
343+
const w = Number(v.width || 0);
344+
const h = Number(v.height || 0);
345+
if (w > 0 && h > 0) allZero = false;
346+
const txt = String(v.text || '').trim().toLowerCase();
347+
if (txt !== 'object') allObject = false;
348+
}
349+
}
350+
if (any && (allZero || allObject)) return true;
351+
}
352+
} catch {}
353+
return false;
354+
}
355+
306356
_adaptGroundingDinoToJson(serverResponse, imageW, imageH) {
307357
// Adapt various possible server shapes to canonical detection JSON.
308358
// Specifically supports Label Studio-like structure returned by

0 commit comments

Comments
 (0)