Skip to content

Commit f9595ae

Browse files
committed
feat: add grounding dino support
1 parent 02f01c2 commit f9595ae

File tree

7 files changed

+283
-34
lines changed

7 files changed

+283
-34
lines changed

README.md

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,43 @@ npx http-server app -p 8080
3838
}
3939
```
4040

41+
### GroundingDINO Support (Optional)
42+
43+
Besides LLM providers, the app can call a local GroundingDINO server for phrase‑grounded object detection.
44+
45+
- Add a model and choose Endpoint Type: `GroundingDINO`.
46+
- Set Base URL to your detection endpoint (example server below serves `/groundingdino/detect`).
47+
- Adjust `Box thr` and `Text thr` as desired.
48+
49+
Example server is included in `server/`:
50+
51+
1) Create venv and install requirements, then install GroundingDINO from source.
52+
2) Set `GROUNDING_DINO_CONFIG_PATH` and `GROUNDING_DINO_WEIGHTS_PATH` env vars.
53+
3) Run: `uvicorn server.groundingdino_api:app --port 8001`
54+
4) In the app, set Base URL to `http://localhost:8001/groundingdino/detect` and test.
55+
56+
The SPA adapts common GroundingDINO server responses into its canonical JSON for overlay rendering and CSV exports. It supports:
57+
- Pixel-space detections: `{ width, height, detections: [{ x,y,width,height,confidence }] }`
58+
- Label Studio–style results like your remote service returns: `{ results: [{ result: [{ type: 'rectanglelabels', value: { x,y,width,height,score } }], score }] }` with normalized [0..1] coordinates (scaled to pixels using the input image size).
59+
60+
Remote example (already running):
61+
- Endpoint: `https://dino.d2.wopee.io/predict`
62+
- Configure a model with:
63+
- Endpoint Type: `GroundingDINO`
64+
- Base URL: `https://dino.d2.wopee.io/predict`
65+
- Model (label): e.g. `GroundingDINO`
66+
- Box thr: `0.35` • Text thr: `0.25`
67+
68+
The client uploads the image as multipart/form-data with fields `file`, `prompt`, `box_threshold`, `text_threshold`, matching this curl you used:
69+
70+
```
71+
curl -i -X POST "https://dino.d2.wopee.io/predict" \
72+
-F "file=@Downloads/screenshot.png" \
73+
-F "prompt=button" \
74+
-F "box_threshold=0.35" \
75+
-F "text_threshold=0.25"
76+
```
77+
4178
## Browser Support
4279

4380
- Designed for evergreen browsers (ES modules). Uses `createImageBitmap({ imageOrientation: 'from-image' })` to respect EXIF rotation when supported, with a safe fallback.

app/index.html

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@ <h1>LLM Visual Evaluator</h1>
1313
</header>
1414

1515
<main class="layout" role="main">
16+
<div id="corsWarning" style="display:none; margin:8px 8px 0 8px; padding:8px; border:1px solid #e9b34f; background:#fff8e6; color:#8a6100; border-radius:6px;">
17+
Running from file:// has origin "null" and most APIs block it. Please serve the app over http(s), e.g. <code>python3 -m http.server -d app 8080</code>, then open <code>http://localhost:8080</code>.
18+
</div>
1619
<!-- Inputs Panel (Left) -->
1720
<section class="panel inputs" aria-label="Inputs panel">
1821
<div class="section-block">

app/src/components/model-tabs.js

Lines changed: 55 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -173,19 +173,19 @@ export class ModelTabs {
173173
<input data-field="color" type="text" value="${cfg.color}"/>
174174
</div>
175175
<div>
176-
<label>Model ID</label>
176+
<label data-el="modelLabel">Model ID</label>
177177
<input data-field="model" type="text" value="${cfg.model}" placeholder="gpt-4o-mini"/>
178178
</div>
179179
<div class="full">
180180
<label>Base URL</label>
181181
<input data-field="baseURL" type="text" value="${cfg.baseURL}" placeholder="https://api.example.com/v1"/>
182182
</div>
183183
<div>
184-
<label>API Key</label>
184+
<label data-el="apiKeyLabel">API Key</label>
185185
<input data-field="apiKey" type="password" value="${cfg.apiKey || ''}" placeholder="sk-..."/>
186186
</div>
187187
<div>
188-
<label>API Version</label>
188+
<label data-el="apiVersionLabel">API Version</label>
189189
<input data-field="apiVersion" type="text" value="${cfg.apiVersion || ''}" placeholder="2024-08-01-preview"/>
190190
</div>
191191
</div>
@@ -195,7 +195,7 @@ export class ModelTabs {
195195
<div class="model-section">
196196
<div class="model-grid wide">
197197
<div>
198-
<label>Max tokens</label>
198+
<label data-el="maxTokensLabel">Max tokens</label>
199199
<input data-field="maxTokens" type="number" value="${cfg.maxTokens ?? 2048}"/>
200200
</div>
201201
<div>
@@ -211,6 +211,7 @@ export class ModelTabs {
211211
<div class="btn-group" role="group" aria-label="Endpoint type">
212212
<button type="button" class="btn endpoint-btn ${cfg.endpointType==='chat' ? 'active':''}" data-endpoint="chat" aria-pressed="${cfg.endpointType==='chat'}">Chat</button>
213213
<button type="button" class="btn endpoint-btn ${cfg.endpointType==='responses' ? 'active':''}" data-endpoint="responses" aria-pressed="${cfg.endpointType==='responses'}">Responses</button>
214+
<button type="button" class="btn endpoint-btn ${cfg.endpointType==='groundingdino' ? 'active':''}" data-endpoint="groundingdino" aria-pressed="${cfg.endpointType==='groundingdino'}">GroundingDINO</button>
214215
</div>
215216
<div class="inline-field endpoint-field" data-endpoint="chat" style="display:${cfg.endpointType==='chat' ? 'flex':'none'}">
216217
<label>Temperature</label>
@@ -225,6 +226,12 @@ export class ModelTabs {
225226
<option value="high" ${cfg.reasoningEffort==='high'?'selected':''}>high</option>
226227
</select>
227228
</div>
229+
<div class="inline-field endpoint-field" data-endpoint="groundingdino" style="display:${cfg.endpointType==='groundingdino' ? 'flex':'none'}; gap:8px;">
230+
<label>Box thr</label>
231+
<input data-field="dinoBoxThreshold" type="number" step="0.01" min="0" max="1" value="${cfg.dinoBoxThreshold ?? 0.35}" style="width:90px"/>
232+
<label>Text thr</label>
233+
<input data-field="dinoTextThreshold" type="number" step="0.01" min="0" max="1" value="${cfg.dinoTextThreshold ?? 0.25}" style="width:90px"/>
234+
</div>
228235
</div>
229236
</div>
230237
</div>
@@ -246,12 +253,18 @@ export class ModelTabs {
246253
let currentEndpointType = cfg.endpointType || 'chat';
247254
const baseURL = card.querySelector('input[data-field="baseURL"]');
248255
const model = card.querySelector('input[data-field="model"]');
256+
const modelLabelEl = card.querySelector('[data-el="modelLabel"]');
249257
const apiVersion = card.querySelector('input[data-field="apiVersion"]');
258+
const apiVersionLabelEl = card.querySelector('[data-el="apiVersionLabel"]');
250259
const reasoningEffort = card.querySelector('select[data-field="reasoningEffort"]');
251260
const key = card.querySelector('input[data-field="apiKey"]');
261+
const apiKeyLabelEl = card.querySelector('[data-el="apiKeyLabel"]');
252262
const temp = card.querySelector('input[data-field="temperature"]');
253263
const maxTok = card.querySelector('input[data-field="maxTokens"]');
264+
const maxTokensLabelEl = card.querySelector('[data-el="maxTokensLabel"]');
254265
const timeout = card.querySelector('input[data-field="timeoutMs"]');
266+
const dinoBoxThreshold = card.querySelector('input[data-field="dinoBoxThreshold"]');
267+
const dinoTextThreshold = card.querySelector('input[data-field="dinoTextThreshold"]');
255268

256269
const headersTa = card.querySelector('textarea[data-field="extraHeaders"]');
257270
const logEl = card.querySelector('[data-log]');
@@ -311,6 +324,38 @@ export class ModelTabs {
311324
color.addEventListener('focus', showColorPopover);
312325
color.addEventListener('blur', () => { /* keep open for interactions; closed by outside click */ });
313326

327+
const updateEndpointNonApplicableUI = () => {
328+
const isDino = currentEndpointType === 'groundingdino';
329+
if (model) {
330+
model.disabled = isDino;
331+
model.placeholder = isDino ? '(not used for DINO)' : 'gpt-4o-mini';
332+
if (isDino) {
333+
const v = String(model.value || '');
334+
if (!v || /(gpt|claude|qwen|llava|mini|vision)/i.test(v)) {
335+
model.value = 'GroundingDINO';
336+
}
337+
}
338+
}
339+
if (modelLabelEl) {
340+
modelLabelEl.textContent = isDino ? 'Display name (UI only)' : 'Model ID';
341+
}
342+
if (apiVersion) apiVersion.disabled = isDino;
343+
if (apiVersionLabelEl) apiVersionLabelEl.textContent = isDino ? 'API Version (n/a)' : 'API Version';
344+
if (maxTok) maxTok.disabled = isDino;
345+
if (maxTokensLabelEl) maxTokensLabelEl.textContent = isDino ? 'Max tokens (n/a)' : 'Max tokens';
346+
if (key) {
347+
key.disabled = isDino;
348+
key.placeholder = isDino ? '(not used for DINO)' : 'sk-...';
349+
}
350+
if (apiKeyLabelEl) apiKeyLabelEl.textContent = isDino ? 'API Key (n/a)' : 'API Key';
351+
if (headersTa) {
352+
headersTa.disabled = isDino;
353+
headersTa.placeholder = isDino ? '(not used for DINO)' : '{"X-Org":"..."}';
354+
}
355+
};
356+
// Initialize once
357+
updateEndpointNonApplicableUI();
358+
314359
const persist = () => {
315360
let extra = undefined;
316361
try {
@@ -333,6 +378,8 @@ export class ModelTabs {
333378
maxTokens: Number(maxTok.value),
334379
timeoutMs: Number(timeout.value),
335380
extraHeaders: extra,
381+
dinoBoxThreshold: dinoBoxThreshold ? Number(dinoBoxThreshold.value) : undefined,
382+
dinoTextThreshold: dinoTextThreshold ? Number(dinoTextThreshold.value) : undefined,
336383
};
337384
this.storage.updateModel(updated);
338385

@@ -406,6 +453,8 @@ export class ModelTabs {
406453
endpointPanels.forEach(p => {
407454
p.style.display = (p.dataset.endpoint === currentEndpointType) ? 'flex' : 'none';
408455
});
456+
// Update disabled/labels for non-applicable fields
457+
updateEndpointNonApplicableUI();
409458
persist();
410459
});
411460
});
@@ -418,6 +467,8 @@ export class ModelTabs {
418467
timeout.addEventListener('input', persist);
419468
headersTa.addEventListener('input', persist);
420469
reasoningEffort.addEventListener('change', persist);
470+
if (dinoBoxThreshold) dinoBoxThreshold.addEventListener('input', persist);
471+
if (dinoTextThreshold) dinoTextThreshold.addEventListener('input', persist);
421472

422473
return card;
423474
}

0 commit comments

Comments
 (0)