Skip to content

Commit 8a4f04b

Browse files
author
Guiners
committed
bounding-box sample with test
1 parent 62dbb27 commit 8a4f04b

File tree

3 files changed

+188
-0
lines changed

3 files changed

+188
-0
lines changed
Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
// Copyright 2025 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// https://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
'use strict';
16+
17+
// [START googlegenaisdk_boundingbox_with_txt_img]
18+
const {GoogleGenAI} = require('@google/genai');
19+
20+
const {createCanvas, loadImage} = require('canvas');
21+
const fetch = require('node-fetch');
22+
const fs = require('fs');
23+
24+
const GOOGLE_CLOUD_PROJECT = process.env.GOOGLE_CLOUD_PROJECT;
25+
const GOOGLE_CLOUD_LOCATION = process.env.GOOGLE_CLOUD_LOCATION || 'global';
26+
27+
async function fetchImageAsBase64(uri) {
28+
const response = await fetch(uri);
29+
const buffer = await response.buffer();
30+
return buffer.toString('base64');
31+
}
32+
33+
async function plotBoundingBoxes(imageUri, boundingBoxes) {
34+
console.log('Creating bounding boxes');
35+
const image = await loadImage(imageUri);
36+
const canvas = createCanvas(image.width, image.height);
37+
const ctx = canvas.getContext('2d');
38+
39+
ctx.drawImage(image, 0, 0);
40+
41+
const colors = ['red', 'blue', 'green', 'orange'];
42+
43+
boundingBoxes.forEach((bbox, i) => {
44+
const [yMin, xMin, yMax, xMax] = bbox.box_2d;
45+
46+
const absYMin = Math.floor((yMin / 1000) * image.height);
47+
const absXMin = Math.floor((xMin / 1000) * image.width);
48+
const absYMax = Math.floor((yMax / 1000) * image.height);
49+
const absXMax = Math.floor((xMax / 1000) * image.width);
50+
51+
ctx.strokeStyle = colors[i % colors.length];
52+
ctx.lineWidth = 4;
53+
ctx.strokeRect(absXMin, absYMin, absXMax - absXMin, absYMax - absYMin);
54+
55+
ctx.fillStyle = colors[i % colors.length];
56+
ctx.font = '20px Arial';
57+
ctx.fillText(bbox.label, absXMin + 8, absYMin + 20);
58+
});
59+
60+
fs.writeFileSync('output.png', canvas.toBuffer('image/png'));
61+
console.log('Saved output to file: output.png');
62+
}
63+
64+
async function createBoundingBox(
65+
projectId = GOOGLE_CLOUD_PROJECT,
66+
location = GOOGLE_CLOUD_LOCATION
67+
) {
68+
const client = new GoogleGenAI({
69+
vertexai: true,
70+
project: projectId,
71+
location: location,
72+
});
73+
74+
const systemInstruction = `
75+
Return bounding boxes as an array with labels.
76+
Never return masks. Limit to 25 objects.
77+
If an object is present multiple times, give each object a unique label
78+
according to its distinct characteristics (colors, size, position, etc).
79+
`;
80+
81+
const safetySettings = [
82+
{
83+
category: 'HARM_CATEGORY_DANGEROUS_CONTENT',
84+
threshold: 'BLOCK_ONLY_HIGH',
85+
},
86+
];
87+
88+
const imageUri =
89+
'https://storage.googleapis.com/generativeai-downloads/images/socks.jpg';
90+
const base64Image = await fetchImageAsBase64(imageUri);
91+
92+
const boundingBoxSchema = {
93+
type: 'ARRAY',
94+
description: 'List of bounding boxes for detected objects',
95+
items: {
96+
type: 'OBJECT',
97+
title: 'BoundingBox',
98+
description: 'Represents a bounding box with coordinates and label',
99+
properties: {
100+
box_2d: {
101+
type: 'ARRAY',
102+
description:
103+
'Bounding box coordinates in format [y_min, x_min, y_max, x_max]',
104+
items: {
105+
type: 'INTEGER',
106+
format: 'int32',
107+
},
108+
minItems: '4',
109+
maxItems: '4',
110+
},
111+
label: {
112+
type: 'STRING',
113+
description: 'Label describing the object within the bounding box',
114+
},
115+
},
116+
required: ['box_2d', 'label'],
117+
},
118+
};
119+
120+
const response = await client.models.generateContent({
121+
model: 'gemini-2.5-flash',
122+
contents: [
123+
{
124+
role: 'user',
125+
parts: [
126+
{
127+
text: 'Output the positions of the socks with a face. Label according to position in the image.',
128+
},
129+
{
130+
inlineData: {
131+
data: base64Image,
132+
mimeType: 'image/jpeg',
133+
},
134+
},
135+
],
136+
},
137+
],
138+
config: {
139+
systemInstruction: systemInstruction,
140+
safetySettings: safetySettings,
141+
responseMimeType: 'application/json',
142+
temperature: 0.5,
143+
responseSchema: boundingBoxSchema,
144+
},
145+
});
146+
147+
const candidate = response.candidates[0].content.parts[0].text;
148+
const boundingBoxes = JSON.parse(candidate);
149+
150+
console.log('Bounding boxes:', boundingBoxes);
151+
152+
await plotBoundingBoxes(imageUri, boundingBoxes);
153+
return boundingBoxes;
154+
}
155+
// [END googlegenaisdk_boundingbox_with_txt_img]
156+
157+
module.exports = {
158+
createBoundingBox,
159+
};

genai/output-folder/output.png

13.6 MB
Loading
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
// Copyright 2025 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// https://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
'use strict';
16+
17+
const {assert} = require('chai');
18+
const {describe, it} = require('mocha');
19+
20+
const projectId = process.env.CAIP_PROJECT_ID;
21+
const sample = require('../bounding-box/boundingbox-with-txt-img');
22+
23+
describe('boundingbox-with-txt-img', async () => {
24+
it('should return the bounding box', async function () {
25+
this.timeout(100000);
26+
const output = await sample.createBoundingBox(projectId);
27+
assert(output.length > 0);
28+
});
29+
});

0 commit comments

Comments
 (0)