Skip to content

Commit 36d7313

Browse files
claudesimonw
authored andcommitted
Add support for rendering points with labels in gemini-bbox tool
- Add extractPoints() function to parse JSON responses containing point data - Add displayImageWithPoints() function to visualize points on canvas - Points are rendered as colored circles with labels next to them - Supports response format: [{"point": [y, x], "label": "name"}, ...] - Falls back to bounding box rendering if no points are found
1 parent 7c1f572 commit 36d7313

File tree

1 file changed

+159
-1
lines changed

1 file changed

+159
-1
lines changed

gemini-bbox.html

Lines changed: 159 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,11 @@
105105
if (fileInput.files[0]) {
106106
// Extract coordinates from the response
107107
const coordinates = extractCoordinates(text);
108-
if (coordinates.length > 0) {
108+
const points = extractPoints(text);
109+
110+
if (points.length > 0) {
111+
displayImageWithPoints(fileInput.files[0], points);
112+
} else if (coordinates.length > 0) {
109113
displayImageWithBoundingBoxes(fileInput.files[0], coordinates);
110114
}
111115
}
@@ -120,6 +124,23 @@
120124
return matches.map(JSON.parse);
121125
}
122126

127+
function extractPoints(text) {
128+
// Try to extract JSON array with point objects
129+
try {
130+
// Look for JSON array pattern
131+
const jsonMatch = text.match(/\[[\s\S]*?\{"point"[\s\S]*?\](?=\s*$|\s*```|\s*\n\n)/);
132+
if (jsonMatch) {
133+
const parsed = JSON.parse(jsonMatch[0]);
134+
if (Array.isArray(parsed) && parsed.length > 0 && parsed[0].point) {
135+
return parsed;
136+
}
137+
}
138+
} catch (e) {
139+
// If parsing fails, return empty array
140+
}
141+
return [];
142+
}
143+
123144
function displayImageWithBoundingBoxes(file, coordinates) {
124145
const reader = new FileReader();
125146
reader.onload = function(event) {
@@ -238,6 +259,143 @@
238259
reader.readAsDataURL(file);
239260
}
240261

262+
function displayImageWithPoints(file, points) {
263+
const reader = new FileReader();
264+
reader.onload = function(event) {
265+
const image = new Image();
266+
image.onload = function() {
267+
const canvas = document.getElementById('canvas');
268+
canvas.width = image.width + 100;
269+
canvas.height = image.height + 100;
270+
const ctx = canvas.getContext('2d');
271+
272+
// Draw the image
273+
ctx.drawImage(image, 80, 20);
274+
275+
// Draw grid lines
276+
ctx.strokeStyle = 'rgba(255, 0, 0, 0.5)'; // Red with 50% opacity
277+
ctx.lineWidth = 1;
278+
279+
// Vertical grid lines
280+
for (let i = 0; i <= 1000; i += 100) {
281+
const x = 80 + i / 1000 * image.width;
282+
ctx.beginPath();
283+
ctx.moveTo(x, 20);
284+
ctx.lineTo(x, image.height + 20);
285+
ctx.stroke();
286+
}
287+
288+
// Horizontal grid lines
289+
for (let i = 0; i <= 1000; i += 100) {
290+
const y = 20 + (1000 - i) / 1000 * image.height;
291+
ctx.beginPath();
292+
ctx.moveTo(80, y);
293+
ctx.lineTo(image.width + 80, y);
294+
ctx.stroke();
295+
}
296+
297+
// Draw points
298+
const colors = ['#FF0000', '#00FF00', '#0000FF', '#FFFF00', '#FF00FF', '#00FFFF'];
299+
const boundingBoxImages = document.getElementById('boundingBoxImages');
300+
boundingBoxImages.innerHTML = ''; // Clear previous content
301+
302+
points.forEach((item, index) => {
303+
const [y, x] = item.point.map(coord => coord / 1000);
304+
const label = item.label || '';
305+
306+
// Calculate pixel position
307+
const pixelX = x * image.width + 80;
308+
const pixelY = y * image.height + 20;
309+
310+
const color = colors[index % colors.length];
311+
312+
// Draw point as a circle
313+
ctx.fillStyle = color;
314+
ctx.strokeStyle = '#FFFFFF';
315+
ctx.lineWidth = 2;
316+
ctx.beginPath();
317+
ctx.arc(pixelX, pixelY, 8, 0, 2 * Math.PI);
318+
ctx.fill();
319+
ctx.stroke();
320+
321+
// Draw label background
322+
ctx.font = 'bold 16px Arial';
323+
const textMetrics = ctx.measureText(label);
324+
const textWidth = textMetrics.width;
325+
const textHeight = 16;
326+
const padding = 4;
327+
328+
// Position label to the right of the point
329+
const labelX = pixelX + 12;
330+
const labelY = pixelY - 8;
331+
332+
ctx.fillStyle = 'rgba(0, 0, 0, 0.7)';
333+
ctx.fillRect(labelX, labelY, textWidth + padding * 2, textHeight + padding);
334+
335+
// Draw label text
336+
ctx.fillStyle = '#FFFFFF';
337+
ctx.textAlign = 'left';
338+
ctx.textBaseline = 'top';
339+
ctx.fillText(label, labelX + padding, labelY + padding);
340+
341+
// Add point info to the list below
342+
const pointContainer = document.createElement('div');
343+
pointContainer.className = 'bounding-box-container';
344+
345+
const title = document.createElement('p');
346+
title.style.color = color;
347+
title.textContent = `Point: [${item.point.join(', ')}] - Label: ${label}`;
348+
pointContainer.appendChild(title);
349+
350+
boundingBoxImages.appendChild(pointContainer);
351+
});
352+
353+
// Draw axes and labels
354+
ctx.strokeStyle = '#000000';
355+
ctx.lineWidth = 1;
356+
ctx.font = '26px Arial';
357+
ctx.textAlign = 'right';
358+
359+
// Y-axis
360+
ctx.beginPath();
361+
ctx.moveTo(80, 20);
362+
ctx.lineTo(80, image.height + 20);
363+
ctx.stroke();
364+
365+
// Y-axis labels and ticks
366+
for (let i = 0; i <= 1000; i += 100) {
367+
const y = 20 + i / 1000 * image.height;
368+
ctx.fillStyle = '#000000';
369+
ctx.fillText(i.toString(), 75, y + 5);
370+
ctx.beginPath();
371+
ctx.moveTo(75, y);
372+
ctx.lineTo(80, y);
373+
ctx.stroke();
374+
}
375+
376+
// X-axis
377+
ctx.beginPath();
378+
ctx.moveTo(80, image.height + 20);
379+
ctx.lineTo(image.width + 80, image.height + 20);
380+
ctx.stroke();
381+
382+
// X-axis labels and ticks
383+
ctx.textAlign = 'center';
384+
for (let i = 0; i <= 1000; i += 100) {
385+
const x = 80 + i / 1000 * image.width;
386+
ctx.fillStyle = '#000000';
387+
ctx.fillText(i.toString(), x, image.height + 40);
388+
ctx.beginPath();
389+
ctx.moveTo(x, image.height + 20);
390+
ctx.lineTo(x, image.height + 25);
391+
ctx.stroke();
392+
}
393+
};
394+
image.src = event.target.result;
395+
};
396+
reader.readAsDataURL(file);
397+
}
398+
241399
function clearImage() {
242400
document.getElementById('imageInput').value = '';
243401
document.getElementById('canvas').getContext('2d').clearRect(0, 0, canvas.width, canvas.height);

0 commit comments

Comments
 (0)