Skip to content

Commit a04fcd1

Browse files
authored
Fix 'argument list too long' error and add couple vision examples (#634)
1 parent 0e5c5f2 commit a04fcd1

File tree

1 file changed

+119
-25
lines changed

1 file changed

+119
-25
lines changed

samples/rest/text_generation.sh

100644100755
Lines changed: 119 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ SCRIPT_DIR=$(dirname "$0")
44
MEDIA_DIR=$(realpath ${SCRIPT_DIR}/../../third_party)
55

66
IMG_PATH=${MEDIA_DIR}/organ.jpg
7+
IMG_PATH2=${MEDIA_DIR}/Cajun_instruments.jpg
78
AUDIO_PATH=${MEDIA_DIR}/sample.mp3
89
VIDEO_PATH=${MEDIA_DIR}/Big_Buck_Bunny.mp4
910
PDF_PATH=${MEDIA_DIR}/test.pdf
@@ -38,43 +39,136 @@ curl "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:s
3839

3940
echo "[START text_gen_multimodal_one_image_prompt]"
4041
# [START text_gen_multimodal_one_image_prompt]
42+
# Use a temporary file to hold the base64 encoded image data
43+
TEMP_B64=$(mktemp)
44+
trap 'rm -f "$TEMP_B64"' EXIT
45+
base64 $B64FLAGS $IMG_PATH > "$TEMP_B64"
46+
47+
# Use a temporary file to hold the JSON payload
48+
TEMP_JSON=$(mktemp)
49+
trap 'rm -f "$TEMP_JSON"' EXIT
50+
51+
cat > "$TEMP_JSON" << EOF
52+
{
53+
"contents": [{
54+
"parts":[
55+
{"text": "Tell me about this instrument"},
56+
{
57+
"inline_data": {
58+
"mime_type":"image/jpeg",
59+
"data": "$(cat "$TEMP_B64")"
60+
}
61+
}
62+
]
63+
}]
64+
}
65+
EOF
66+
4167
curl "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent?key=$GOOGLE_API_KEY" \
4268
-H 'Content-Type: application/json' \
4369
-X POST \
44-
-d '{
45-
"contents": [{
46-
"parts":[
47-
{"text": "Tell me about this instrument"},
48-
{
49-
"inline_data": {
50-
"mime_type":"image/jpeg",
51-
"data": "'$(base64 $B64FLAGS $IMG_PATH)'"
52-
}
53-
}
54-
]
55-
}]
56-
}' 2> /dev/null
70+
-d "@$TEMP_JSON" 2> /dev/null
5771
# [END text_gen_multimodal_one_image_prompt]
5872

5973
echo "[START text_gen_multimodal_one_image_prompt_streaming]"
6074
# [START text_gen_multimodal_one_image_prompt_streaming]
75+
cat > "$TEMP_JSON" << EOF
76+
{
77+
"contents": [{
78+
"parts":[
79+
{"text": "Tell me about this instrument"},
80+
{
81+
"inline_data": {
82+
"mime_type":"image/jpeg",
83+
"data": "$(cat "$TEMP_B64")"
84+
}
85+
}
86+
]
87+
}]
88+
}
89+
EOF
90+
6191
curl "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:streamGenerateContent?alt=sse&key=$GOOGLE_API_KEY" \
6292
-H 'Content-Type: application/json' \
6393
-X POST \
64-
-d '{
65-
"contents": [{
94+
-d "@$TEMP_JSON" 2> /dev/null
95+
# [END text_gen_multimodal_one_image_prompt_streaming]
96+
97+
echo "[START text_gen_multimodal_two_image_prompt]"
98+
# [START text_gen_multimodal_two_image_prompt]
99+
# Base64 encode both images into temporary files
100+
TEMP_B64_1=$(mktemp)
101+
TEMP_B64_2=$(mktemp)
102+
trap 'rm -f "$TEMP_B64_1" "$TEMP_B64_2"' EXIT
103+
base64 $B64FLAGS "$IMG_PATH" > "$TEMP_B64_1"
104+
base64 $B64FLAGS "$IMG_PATH2" > "$TEMP_B64_2"
105+
106+
# Create the JSON payload using the base64 data from both images
107+
cat > "$TEMP_JSON" << EOF
108+
{
109+
"contents": [{
66110
"parts":[
67-
{"text": "Tell me about this instrument"},
68111
{
69-
"inline_data": {
70-
"mime_type":"image/jpeg",
71-
"data": "'$(base64 $B64FLAGS $IMG_PATH)'"
72-
}
112+
"inline_data": {
113+
"mime_type": "image/jpeg",
114+
"data": "$(cat "$TEMP_B64_1")"
115+
}
116+
},
117+
{
118+
"inline_data": {
119+
"mime_type": "image/jpeg",
120+
"data": "$(cat "$TEMP_B64_2")"
121+
}
122+
},
123+
{
124+
"text": "Generate a list of all the objects contained in both images."
73125
}
74126
]
75-
}]
76-
}' 2> /dev/null
77-
# [END text_gen_multimodal_one_image_prompt_streaming]
127+
}]
128+
}
129+
EOF
130+
131+
# Make the API request using the JSON file
132+
curl "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent?key=$GOOGLE_API_KEY" \
133+
-H 'Content-Type: application/json' \
134+
-X POST \
135+
-d "@$TEMP_JSON" 2> /dev/null > response.json
136+
137+
# Display the response
138+
cat response.json
139+
# [END text_gen_multimodal_two_image_prompt]
140+
141+
echo "[START text_gen_multimodal_one_image_bounding_box_prompt]"
142+
# [START text_gen_multimodal_one_image_bounding_box_prompt]
143+
# Re-use TEMP_B64_2 (from the previous two-image prompt) and TEMP_JSON
144+
145+
# Create the JSON payload for bounding box detection
146+
cat > "$TEMP_JSON" << EOF
147+
{
148+
"contents": [{
149+
"parts":[
150+
{
151+
"inline_data": {
152+
"mime_type": "image/jpeg",
153+
"data": "$(cat "$TEMP_B64_2")"
154+
}
155+
},
156+
{
157+
"text": "Generate bounding boxes for each of the objects in this image in [y_min, x_min, y_max, x_max] format."
158+
}
159+
]
160+
}]
161+
}
162+
EOF
163+
164+
# Make the API request using the JSON file
165+
curl "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-pro:generateContent?key=$GOOGLE_API_KEY" \
166+
-H 'Content-Type: application/json' \
167+
-X POST \
168+
-d "@$TEMP_JSON" 2> /dev/null > response.json
169+
170+
cat response.json
171+
# [END text_gen_multimodal_one_image_bounding_box_prompt]
78172

79173
echo "[START text_gen_multimodal_audio]"
80174
# [START text_gen_multimodal_audio]
@@ -184,7 +278,7 @@ DISPLAY_NAME=VIDEO
184278
# Initial resumable request defining metadata.
185279
# The upload url is in the response headers dump them to a file.
186280
curl "${BASE_URL}/upload/v1beta/files?key=${GOOGLE_API_KEY}" \
187-
-D upload-header.tmp \
281+
-D "${tmp_header_file}" \
188282
-H "X-Goog-Upload-Protocol: resumable" \
189283
-H "X-Goog-Upload-Command: start" \
190284
-H "X-Goog-Upload-Header-Content-Length: ${NUM_BYTES}" \
@@ -226,7 +320,7 @@ curl "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:g
226320
-d '{
227321
"contents": [{
228322
"parts":[
229-
{"text": "Please describe this file."},
323+
{"text": "Transcribe the audio from this video, giving timestamps for salient events in the video. Also provide visual descriptions."},
230324
{"file_data":{"mime_type": "video/mp4", "file_uri": '$file_uri'}}]
231325
}]
232326
}' 2> /dev/null > response.json

0 commit comments

Comments
 (0)