Skip to content

Commit c8a3d3c

Browse files
jdomingrJuan Dominguez
andauthored
feat(genai): add samples for bounding box and express mode (#10205)
Co-authored-by: Juan Dominguez <[email protected]>
1 parent d0943c0 commit c8a3d3c

File tree

5 files changed

+438
-0
lines changed

5 files changed

+438
-0
lines changed
1.22 MB
Loading
Lines changed: 217 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,217 @@
1+
/*
2+
* Copyright 2025 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package genai.boundingbox;
18+
19+
// [START googlegenaisdk_boundingbox_with_txt_img]
20+
21+
import static com.google.genai.types.Type.Known.ARRAY;
22+
import static com.google.genai.types.Type.Known.INTEGER;
23+
import static com.google.genai.types.Type.Known.OBJECT;
24+
import static com.google.genai.types.Type.Known.STRING;
25+
26+
import com.google.genai.Client;
27+
import com.google.genai.types.Content;
28+
import com.google.genai.types.GenerateContentConfig;
29+
import com.google.genai.types.GenerateContentResponse;
30+
import com.google.genai.types.HarmBlockThreshold;
31+
import com.google.genai.types.HarmCategory;
32+
import com.google.genai.types.HttpOptions;
33+
import com.google.genai.types.Part;
34+
import com.google.genai.types.SafetySetting;
35+
import com.google.genai.types.Schema;
36+
import com.google.gson.Gson;
37+
import com.google.gson.reflect.TypeToken;
38+
import java.awt.BasicStroke;
39+
import java.awt.Color;
40+
import java.awt.Font;
41+
import java.awt.Graphics2D;
42+
import java.awt.image.BufferedImage;
43+
import java.io.File;
44+
import java.io.IOException;
45+
import java.io.InputStream;
46+
import java.lang.reflect.Type;
47+
import java.net.URL;
48+
import java.util.Arrays;
49+
import java.util.List;
50+
import java.util.Map;
51+
import javax.imageio.ImageIO;
52+
53+
public class BoundingBoxWithTxtImg {
54+
55+
public static class BoundingBox {
56+
List<Integer> box2d;
57+
String label;
58+
59+
public List<Integer> getBox2d() {
60+
return box2d;
61+
}
62+
63+
public String getLabel() {
64+
return label;
65+
}
66+
}
67+
68+
// Plot bounding boxes on an image and save it to a file.
69+
public static void plotBoundingBoxes(String imageUrl, List<BoundingBox> boundingBoxes)
70+
throws IOException {
71+
URL url = new URL(imageUrl);
72+
BufferedImage image = ImageIO.read(url);
73+
74+
int width = image.getWidth();
75+
int height = image.getHeight();
76+
77+
Graphics2D graphics2D = image.createGraphics();
78+
graphics2D.setStroke(new BasicStroke(4));
79+
graphics2D.setFont(new Font("Arial", Font.PLAIN, 18));
80+
81+
// Define a list of colors to cycle through.
82+
List<Color> colors =
83+
Arrays.asList(
84+
Color.RED,
85+
Color.GREEN,
86+
Color.BLUE,
87+
Color.YELLOW,
88+
Color.CYAN,
89+
Color.MAGENTA,
90+
Color.ORANGE);
91+
92+
for (int i = 0; i < boundingBoxes.size(); i++) {
93+
BoundingBox boundingBox = boundingBoxes.get((i));
94+
List<Integer> box2d = boundingBox.getBox2d();
95+
96+
// Scale normalized coordinates (0-1000) to image dimensions.
97+
int topY = (int) (box2d.get(0) / 1000.0 * height);
98+
int leftX = (int) (box2d.get(1) / 1000.0 * width);
99+
int bottomY = (int) (box2d.get(2) / 1000.0 * height);
100+
int rightX = (int) (box2d.get(3) / 1000.0 * width);
101+
102+
Color color = colors.get(i % colors.size());
103+
graphics2D.setColor(color);
104+
105+
// Draw the rectangle.
106+
graphics2D.drawRect(leftX, topY, rightX - leftX, bottomY - topY);
107+
108+
// Draw the label text.
109+
if (boundingBox.getLabel() != null && !boundingBox.getLabel().isEmpty()) {
110+
graphics2D.drawString(boundingBox.getLabel(), leftX + 8, topY + 20);
111+
}
112+
}
113+
graphics2D.dispose();
114+
115+
// Write the image to a file.
116+
String outputFilePath = "resources/output/bounding-boxes-socks.jpg";
117+
ImageIO.write(image, "jpg", new File(outputFilePath));
118+
System.out.println("Successfully saved image to: " + outputFilePath);
119+
}
120+
121+
public static void main(String[] args) throws IOException {
122+
// TODO(developer): Replace these variables before running the sample.
123+
String model = "gemini-2.5-flash";
124+
generateContent(model);
125+
}
126+
127+
// Shows how to send a multimodal prompt to the model and get a structured JSON response
128+
// containing bounding box data, and then uses that data to draw the boxes on the original
129+
// image, saving it to a new file.
130+
public static String generateContent(String modelId) throws IOException {
131+
// Client Initialization. Once created, it can be reused for multiple requests.
132+
try (Client client =
133+
Client.builder()
134+
.location("global")
135+
.httpOptions(HttpOptions.builder().apiVersion("v1").build())
136+
.vertexAI(true)
137+
.build()) {
138+
139+
String systemInstruction =
140+
"Return bounding boxes as an array with labels.\n"
141+
+ " Never return masks. Limit to 25 objects.\n"
142+
+ " If an object is present multiple times, give each object a unique label\n"
143+
+ " according to its distinct characteristics (colors, size, position, etc..).";
144+
145+
// Define the response schema.
146+
Schema responseSchema =
147+
Schema.builder()
148+
.type(ARRAY)
149+
.items(
150+
Schema.builder()
151+
.type(OBJECT)
152+
.properties(
153+
Map.of(
154+
"box2d",
155+
Schema.builder()
156+
.type(ARRAY)
157+
.items(Schema.builder().type(INTEGER).build())
158+
.build(),
159+
"label",
160+
Schema.builder().type(STRING).build()))
161+
.required("box2d", "label")
162+
.build())
163+
.build();
164+
165+
// Define the GenerateContentConfig and set the response schema.
166+
GenerateContentConfig contentConfig =
167+
GenerateContentConfig.builder()
168+
.systemInstruction(Content.fromParts(Part.fromText(systemInstruction)))
169+
.temperature(0.5F)
170+
.safetySettings(
171+
SafetySetting.builder()
172+
.category(HarmCategory.Known.HARM_CATEGORY_DANGEROUS_CONTENT)
173+
.threshold(HarmBlockThreshold.Known.BLOCK_ONLY_HIGH)
174+
.build())
175+
.responseMimeType("application/json")
176+
.responseSchema(responseSchema)
177+
.build();
178+
179+
String imageUri = "https://storage.googleapis.com/generativeai-downloads/images/socks.jpg";
180+
URL url = new URL(imageUri);
181+
182+
try (InputStream inputStream = url.openStream()) {
183+
byte[] imageBytes = inputStream.readAllBytes();
184+
185+
String prompt =
186+
"Output the positions of the socks with a face."
187+
+ " Label according to position in the image";
188+
189+
GenerateContentResponse response =
190+
client.models.generateContent(
191+
modelId,
192+
Content.fromParts(Part.fromBytes(imageBytes, "image/jpeg"), Part.fromText(prompt)),
193+
contentConfig);
194+
195+
System.out.println(response.text());
196+
// Example response:
197+
// [
198+
// {"box2d": [24, 24, 521, 526], "label": "top left light blue cat face sock"},
199+
// {"box2d": [238, 627, 649, 863], "label": "top right light blue cat face sock"}
200+
// ]
201+
202+
// Use Gson to parse the JSON string into a list of BoundingBox objects.
203+
Gson gson = new Gson();
204+
Type boundingBoxListType = new TypeToken<List<BoundingBox>>() {}.getType();
205+
List<BoundingBox> boundingBoxes = gson.fromJson(response.text(), boundingBoxListType);
206+
207+
// Plot the bounding boxes on the image.
208+
if (boundingBoxes != null) {
209+
plotBoundingBoxes(imageUri, boundingBoxes);
210+
}
211+
212+
return response.text();
213+
}
214+
}
215+
}
216+
}
217+
// [END googlegenaisdk_boundingbox_with_txt_img]
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
/*
2+
* Copyright 2025 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package genai.expressmode;
18+
19+
// [START googlegenaisdk_vertexai_express_mode]
20+
21+
import com.google.genai.Client;
22+
import com.google.genai.types.GenerateContentConfig;
23+
import com.google.genai.types.GenerateContentResponse;
24+
25+
public class ExpressModeWithApiKey {
26+
27+
public static void main(String[] args) {
28+
// TODO(developer): Replace these variables before running the sample.
29+
String modelId = "gemini-2.5-flash";
30+
String apiKey = "YOUR_API_KEY";
31+
generateContent(modelId, apiKey);
32+
}
33+
34+
// Generates content with Vertex AI Api key.
35+
public static String generateContent(String modelId, String apiKey) {
36+
// Client Initialization. Once created, it can be reused for multiple requests.
37+
try (Client client = Client.builder().apiKey(apiKey).vertexAI(true).build()) {
38+
39+
GenerateContentResponse response =
40+
client.models.generateContent(
41+
modelId, "Explain bubble sort to me.", GenerateContentConfig.builder().build());
42+
43+
System.out.print(response.text());
44+
// Example response:
45+
// Bubble sort is one of the simplest sorting algorithms. It's often used to introduce the
46+
// concept of sorting because its logic is very straightforward.
47+
//
48+
// Imagine you have a list of numbers that you want to put in order, like `[5, 1, 4, 2, 8]`.
49+
// ...
50+
return response.text();
51+
}
52+
}
53+
}
54+
// [END googlegenaisdk_vertexai_express_mode]
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
/*
2+
* Copyright 2025 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package genai.boundingbox;
18+
19+
import static com.google.common.truth.Truth.assertThat;
20+
import static com.google.common.truth.Truth.assertWithMessage;
21+
22+
import java.io.ByteArrayOutputStream;
23+
import java.io.IOException;
24+
import java.io.PrintStream;
25+
import org.junit.After;
26+
import org.junit.Before;
27+
import org.junit.BeforeClass;
28+
import org.junit.Test;
29+
import org.junit.runner.RunWith;
30+
import org.junit.runners.JUnit4;
31+
32+
@RunWith(JUnit4.class)
33+
public class BoundingBoxIT {
34+
35+
private static final String GEMINI_FLASH = "gemini-2.5-flash";
36+
37+
private ByteArrayOutputStream bout;
38+
private PrintStream out;
39+
40+
// Check if the required environment variables are set.
41+
public static void requireEnvVar(String envVarName) {
42+
assertWithMessage(String.format("Missing environment variable '%s' ", envVarName))
43+
.that(System.getenv(envVarName))
44+
.isNotEmpty();
45+
}
46+
47+
@BeforeClass
48+
public static void checkRequirements() {
49+
requireEnvVar("GOOGLE_CLOUD_PROJECT");
50+
}
51+
52+
@Before
53+
public void setUp() {
54+
bout = new ByteArrayOutputStream();
55+
out = new PrintStream(bout);
56+
System.setOut(out);
57+
}
58+
59+
@After
60+
public void tearDown() {
61+
System.setOut(null);
62+
}
63+
64+
@Test
65+
public void testBoundingBoxWithTxtImg() throws IOException {
66+
String response = BoundingBoxWithTxtImg.generateContent(GEMINI_FLASH);
67+
assertThat(response).isNotEmpty();
68+
String output = bout.toString();
69+
assertThat(output).contains("box2d");
70+
assertThat(output).contains("label");
71+
assertThat(output)
72+
.contains("Successfully saved image to: resources/output/bounding-boxes-socks.jpg");
73+
}
74+
}

0 commit comments

Comments
 (0)