Skip to content

Commit 7cc334d

Browse files
author
Guiners
committed
adding samples, test, lints
1 parent a5d6333 commit 7cc334d

File tree

3 files changed

+119
-104
lines changed

3 files changed

+119
-104
lines changed

genai/bounding-box/boundingbox-with-txt-img.js

Lines changed: 113 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -14,131 +14,144 @@
1414

1515
'use strict';
1616

17-
// [START googlegenaisdk_embeddings_docretrieval_with_txt]
17+
// [START googlegenaisdk_boundingbox_with_txt_img]
1818
const {GoogleGenAI} = require('@google/genai');
1919

20+
const {createCanvas, loadImage} = require('canvas');
21+
const fetch = require('node-fetch');
22+
const fs = require('fs');
23+
2024
const GOOGLE_CLOUD_PROJECT = process.env.GOOGLE_CLOUD_PROJECT;
2125
const GOOGLE_CLOUD_LOCATION = process.env.GOOGLE_CLOUD_LOCATION || 'global';
22-
//todo notworking
26+
27+
async function fetchImageAsBase64(uri) {
28+
const response = await fetch(uri);
29+
const buffer = await response.buffer();
30+
return buffer.toString('base64');
31+
}
32+
33+
async function plotBoundingBoxes(imageUri, boundingBoxes) {
34+
console.log('Creating bounding boxes');
35+
const image = await loadImage(imageUri);
36+
const canvas = createCanvas(image.width, image.height);
37+
const ctx = canvas.getContext('2d');
38+
39+
ctx.drawImage(image, 0, 0);
40+
41+
const colors = ['red', 'blue', 'green', 'orange'];
42+
43+
boundingBoxes.forEach((bbox, i) => {
44+
const [yMin, xMin, yMax, xMax] = bbox.box_2d;
45+
46+
const absYMin = Math.floor((yMin / 1000) * image.height);
47+
const absXMin = Math.floor((xMin / 1000) * image.width);
48+
const absYMax = Math.floor((yMax / 1000) * image.height);
49+
const absXMax = Math.floor((xMax / 1000) * image.width);
50+
51+
ctx.strokeStyle = colors[i % colors.length];
52+
ctx.lineWidth = 4;
53+
ctx.strokeRect(absXMin, absYMin, absXMax - absXMin, absYMax - absYMin);
54+
55+
ctx.fillStyle = colors[i % colors.length];
56+
ctx.font = '20px Arial';
57+
ctx.fillText(bbox.label, absXMin + 8, absYMin + 20);
58+
});
59+
60+
fs.writeFileSync('output.png', canvas.toBuffer('image/png'));
61+
console.log('Saved output to file: output.png');
62+
}
63+
2364
async function generateContent(
2465
projectId = GOOGLE_CLOUD_PROJECT,
2566
location = GOOGLE_CLOUD_LOCATION
2667
) {
27-
28-
/**
29-
* Represents a bounding box with its 2D coordinates and associated label.
30-
*/
31-
class BoundingBox {
32-
/**
33-
* @param {number[]} box2d - A list of integers representing the 2D coordinates of the bounding box
34-
* in the format [y_min, x_min, y_max, x_max].
35-
* @param {string} label - The label or class associated with the object in the bounding box.
36-
*/
37-
constructor(box2d, label) {
38-
if (!Array.isArray(box2d) || box2d.length !== 4 || !box2d.every(Number.isInteger)) {
39-
throw new Error('box2d must be an array of 4 integers');
40-
}
41-
if (typeof label !== 'string') {
42-
throw new Error('label must be a string');
43-
}
44-
this.box2d = box2d;
45-
this.label = label;
46-
}
47-
}
48-
49-
/**
50-
* Helper function to plot bounding boxes on an image
51-
* @param {string} imageUri
52-
* @param {BoundingBox[]} boundingBoxes
53-
*/
54-
async function plotBoundingBoxes(imageUri, boundingBoxes) {
55-
const image = await loadImage(imageUri);
56-
const width = image.width;
57-
const height = image.height;
58-
59-
const canvas = createCanvas(width, height);
60-
const ctx = canvas.getContext('2d');
61-
62-
ctx.drawImage(image, 0, 0, width, height);
63-
64-
const colors = ['red', 'green', 'blue', 'orange', 'purple', 'yellow', 'cyan', 'magenta', 'lime', 'pink'];
65-
66-
boundingBoxes.forEach((bbox, i) => {
67-
const absYMin = Math.floor((bbox.box2d[0] / 1000) * height);
68-
const absXMin = Math.floor((bbox.box2d[1] / 1000) * width);
69-
const absYMax = Math.floor((bbox.box2d[2] / 1000) * height);
70-
const absXMax = Math.floor((bbox.box2d[3] / 1000) * width);
71-
72-
const color = colors[i % colors.length];
73-
74-
ctx.strokeStyle = color;
75-
ctx.lineWidth = 4;
76-
ctx.strokeRect(absXMin, absYMin, absXMax - absXMin, absYMax - absYMin);
77-
78-
if (bbox.label) {
79-
ctx.fillStyle = color;
80-
ctx.font = '20px Arial';
81-
ctx.fillText(bbox.label, absXMin + 8, absYMin + 20);
82-
}
83-
});
84-
85-
// Save or return buffer
86-
return canvas.toBuffer('image/png');
87-
}
88-
8968
const ai = new GoogleGenAI({
9069
vertexai: true,
9170
project: projectId,
9271
location: location,
9372
});
9473

95-
const systemInstructions = 'Return bounding boxes as an array with labels.\n' +
96-
' Never return masks. Limit to 25 objects.\n' +
97-
' If an object is present multiple times, give each object a unique label\n' +
98-
' according to its distinct characteristics (colors, size, position, etc..).'
99-
100-
const imageUri = "https://storage.googleapis.com/generativeai-downloads/images/socks.jpg";
101-
102-
const prompt = [
103-
{ file_uri: imageUri, mime_type: 'image/jpeg' }, // zamiast Part.fromUri
104-
"Output the positions of the socks with a face. Label according to position in the image."
74+
const systemInstruction = `
75+
Return bounding boxes as an array with labels.
76+
Never return masks. Limit to 25 objects.
77+
If an object is present multiple times, give each object a unique label
78+
according to its distinct characteristics (colors, size, position, etc.).
79+
`;
80+
81+
const safetySettings = [
82+
{
83+
category: 'HARM_CATEGORY_DANGEROUS_CONTENT',
84+
threshold: 'BLOCK_ONLY_HIGH',
85+
},
10586
];
10687

107-
const config = {
108-
systemInstructions: systemInstructions,
109-
temperature: 0.5,
110-
safetySettings: [
111-
{
112-
category: 'HARM_CATEGORY_DANGEROUS_CONTENT',
113-
threshold: 'BLOCK_ONLY_HIGH',
88+
const imageUri =
89+
'https://storage.googleapis.com/generativeai-downloads/images/socks.jpg';
90+
const base64Image = await fetchImageAsBase64(imageUri);
91+
92+
const boundingBoxSchema = {
93+
type: 'ARRAY',
94+
description: 'List of bounding boxes for detected objects',
95+
items: {
96+
type: 'OBJECT',
97+
title: 'BoundingBox',
98+
description: 'Represents a bounding box with coordinates and label',
99+
properties: {
100+
box_2d: {
101+
type: 'ARRAY',
102+
description:
103+
'Bounding box coordinates in format [y_min, x_min, y_max, x_max]',
104+
items: {
105+
type: 'INTEGER',
106+
format: 'int32',
107+
},
108+
minItems: '4',
109+
maxItems: '4',
110+
},
111+
label: {
112+
type: 'STRING',
113+
description: 'Label describing the object within the bounding box',
114+
},
114115
},
115-
],
116-
responseMimeType: 'application/json'
116+
required: ['box_2d', 'label'],
117+
},
117118
};
118119

119-
120120
const response = await ai.models.generateContent({
121121
model: 'gemini-2.5-flash',
122-
contents: prompt,
123-
config: config
122+
contents: [
123+
{
124+
role: 'user',
125+
parts: [
126+
{
127+
text: 'Output the positions of the socks with a face. Label according to position in the image.',
128+
},
129+
{
130+
inlineData: {
131+
data: base64Image,
132+
mimeType: 'image/jpeg',
133+
},
134+
},
135+
],
136+
},
137+
],
138+
config: {
139+
systemInstruction: systemInstruction,
140+
safetySettings: safetySettings,
141+
responseMimeType: 'application/json',
142+
temperature: 0.5,
143+
responseSchema: boundingBoxSchema,
144+
},
124145
});
125146

126-
console.log(response.text);
147+
const candidate = response.candidates[0].content.parts[0].text;
148+
const boundingBoxes = JSON.parse(candidate);
127149

128-
let boundingBoxes = [];
129-
try {
130-
boundingBoxes = JSON.parse(response.text).map(b => new BoundingBox(b.box_2d, b.label));
131-
} catch (err) {
132-
console.error('Failed to parse response:', err);
133-
}
150+
console.log('Bounding boxes:', boundingBoxes);
134151

135152
await plotBoundingBoxes(imageUri, boundingBoxes);
136-
137-
138-
return response;
153+
return boundingBoxes;
139154
}
140-
// [END googlegenaisdk_embeddings_docretrieval_with_txt]
155+
// [END googlegenaisdk_boundingbox_with_txt_img]
141156

142-
module.exports = {
143-
generateContent,
144-
};
157+
module.exports = {generateContent};

genai/package.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
"mocha": "^10.0.0",
2424
"sinon": "^18.0.0",
2525
"uuid": "^10.0.0",
26-
"proxyquire": "^2.1.3"
26+
"proxyquire": "^2.1.3",
27+
"canvas": "^3.1.0",
28+
"node-fetch": "^2.7.0"
2729
}
2830
}

genai/test/boundingbox-with-txt-img.test.js

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@ const projectId = process.env.CAIP_PROJECT_ID;
2121
const sample = require('../bounding-box/boundingbox-with-txt-img');
2222

2323
describe('boundingbox-with-txt-img', async () => {
24-
it('should return the total token count for a text prompt', async function (){
25-
this.timeout(10000);
24+
it('should return the bounding box', async function () {
25+
this.timeout(100000);
2626
const output = await sample.generateContent(projectId);
27-
assert(output > 0);
27+
assert(output.length > 0);
2828
});
2929
});

0 commit comments

Comments
 (0)