Skip to content

Commit 695ee95

Browse files
add audio streaming & pdf examples (#483)
* add audio streaming * Add pdf examples * Update samples/rest/text_generation.sh * remove test.pdf Change-Id: Icadde0849a0d358b605e7cfe6ff208d49d639dfb * use alt=sse for all streaming examples Change-Id: I6ee214edcc06827d1e73f7c1fdd3e380e7988896 --------- Co-authored-by: Mark Daoust <[email protected]>
1 parent f8b049f commit 695ee95

File tree

2 files changed

+158
-7
lines changed

2 files changed

+158
-7
lines changed

samples/rest/chat.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ curl https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:ge
2525

2626
echo "[START chat_streaming]"
2727
# [START chat_streaming]
28-
curl https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:streamGenerateContent?key=$GOOGLE_API_KEY \
28+
curl https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:streamGenerateContent?alt=sse&key=$GOOGLE_API_KEY \
2929
-H 'Content-Type: application/json' \
3030
-X POST \
3131
-d '{
@@ -53,7 +53,7 @@ else
5353
B64FLAGS="-w0"
5454
fi
5555

56-
curl https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:streamGenerateContent?key=$GOOGLE_API_KEY \
56+
curl https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:streamGenerateContent?alt=sse&key=$GOOGLE_API_KEY \
5757
-H 'Content-Type: application/json' \
5858
-X POST \
5959
-d '{

samples/rest/text_generation.sh

Lines changed: 156 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,16 @@ MEDIA_DIR=$(realpath ${SCRIPT_DIR}/../../third_party)
66
IMG_PATH=${MEDIA_DIR}/organ.jpg
77
AUDIO_PATH=${MEDIA_DIR}/sample.mp3
88
VIDEO_PATH=${MEDIA_DIR}/Big_Buck_Bunny.mp4
9-
10-
BASE_URL="https://generativelanguage.googleapis.com"
9+
PDF_PATH=${MEDIA_DIR}/test.pdf
1110

1211
if [[ "$(base64 --version 2>&1)" = *"FreeBSD"* ]]; then
1312
B64FLAGS="--input"
1413
else
1514
B64FLAGS="-w0"
1615
fi
1716

17+
BASE_URL="https://generativelanguage.googleapis.com"
18+
1819
echo "[START text_gen_text_only_prompt]"
1920
# [START text_gen_text_only_prompt]
2021
curl "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent?key=$GOOGLE_API_KEY" \
@@ -57,7 +58,7 @@ curl "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:g
5758

5859
echo "[START text_gen_multimodal_one_image_prompt_streaming]"
5960
# [START text_gen_multimodal_one_image_prompt_streaming]
60-
curl "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:streamGenerateContent?key=$GOOGLE_API_KEY" \
61+
curl "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:streamGenerateContent?alt=sse&key=$GOOGLE_API_KEY" \
6162
-H 'Content-Type: application/json' \
6263
-X POST \
6364
-d '{
@@ -125,6 +126,54 @@ echo
125126
jq ".candidates[].content.parts[].text" response.json
126127
# [END text_gen_multimodal_audio]
127128

129+
echo "[START text_gen_multimodal_audio_streaming]"
130+
# [START text_gen_multimodal_audio_streaming]
131+
# Use File API to upload audio data to API request.
132+
MIME_TYPE=$(file -b --mime-type "${AUDIO_PATH}")
133+
NUM_BYTES=$(wc -c < "${AUDIO_PATH}")
134+
DISPLAY_NAME=AUDIO
135+
136+
tmp_header_file=upload-header.tmp
137+
138+
# Initial resumable request defining metadata.
139+
# The upload url is in the response headers dump them to a file.
140+
curl "${BASE_URL}/upload/v1beta/files?key=${GOOGLE_API_KEY}" \
141+
-D upload-header.tmp \
142+
-H "X-Goog-Upload-Protocol: resumable" \
143+
-H "X-Goog-Upload-Command: start" \
144+
-H "X-Goog-Upload-Header-Content-Length: ${NUM_BYTES}" \
145+
-H "X-Goog-Upload-Header-Content-Type: ${MIME_TYPE}" \
146+
-H "Content-Type: application/json" \
147+
-d "{'file': {'display_name': '${DISPLAY_NAME}'}}" 2> /dev/null
148+
149+
upload_url=$(grep -i "x-goog-upload-url: " "${tmp_header_file}" | cut -d" " -f2 | tr -d "\r")
150+
rm "${tmp_header_file}"
151+
152+
# Upload the actual bytes.
153+
curl "${upload_url}" \
154+
-H "Content-Length: ${NUM_BYTES}" \
155+
-H "X-Goog-Upload-Offset: 0" \
156+
-H "X-Goog-Upload-Command: upload, finalize" \
157+
--data-binary "@${AUDIO_PATH}" 2> /dev/null > file_info.json
158+
159+
file_uri=$(jq ".file.uri" file_info.json)
160+
echo file_uri=$file_uri
161+
162+
curl "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:streamGenerateContent?alt=sse&key=$GOOGLE_API_KEY" \
163+
-H 'Content-Type: application/json' \
164+
-X POST \
165+
-d '{
166+
"contents": [{
167+
"parts":[
168+
{"text": "Please describe this file."},
169+
{"file_data":{"mime_type": "audio/mpeg", "file_uri": '$file_uri'}}]
170+
}]
171+
}' 2> /dev/null > response.json
172+
173+
cat response.json
174+
echo
175+
# [END text_gen_multimodal_audio_streaming]
176+
128177
echo "[START text_gen_multimodal_video_prompt]"
129178
# [START text_gen_multimodal_video_prompt]
130179
# Use File API to upload audio data to API request.
@@ -231,7 +280,7 @@ do
231280
state=$(jq ".file.state" file_info.json)
232281
done
233282

234-
curl "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:streamGenerateContent?key=$GOOGLE_API_KEY" \
283+
curl "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:streamGenerateContent?alt=sse&key=$GOOGLE_API_KEY" \
235284
-H 'Content-Type: application/json' \
236285
-X POST \
237286
-d '{
@@ -244,4 +293,106 @@ curl "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:s
244293

245294
cat response.json
246295
echo
247-
# [END text_gen_multimodal_video_prompt_streaming]
296+
# [END text_gen_multimodal_video_prompt_streaming]
297+
298+
echo "[START text_gen_multimodal_pdf]"
299+
# [START text_gen_multimodal_pdf]
300+
MIME_TYPE=$(file -b --mime-type "${PDF_PATH}")
301+
NUM_BYTES=$(wc -c < "${PDF_PATH}")
302+
DISPLAY_NAME=TEXT
303+
304+
305+
echo $MIME_TYPE
306+
tmp_header_file=upload-header.tmp
307+
308+
# Initial resumable request defining metadata.
309+
# The upload url is in the response headers dump them to a file.
310+
curl "${BASE_URL}/upload/v1beta/files?key=${GOOGLE_API_KEY}" \
311+
-D upload-header.tmp \
312+
-H "X-Goog-Upload-Protocol: resumable" \
313+
-H "X-Goog-Upload-Command: start" \
314+
-H "X-Goog-Upload-Header-Content-Length: ${NUM_BYTES}" \
315+
-H "X-Goog-Upload-Header-Content-Type: ${MIME_TYPE}" \
316+
-H "Content-Type: application/json" \
317+
-d "{'file': {'display_name': '${DISPLAY_NAME}'}}" 2> /dev/null
318+
319+
upload_url=$(grep -i "x-goog-upload-url: " "${tmp_header_file}" | cut -d" " -f2 | tr -d "\r")
320+
rm "${tmp_header_file}"
321+
322+
# Upload the actual bytes.
323+
curl "${upload_url}" \
324+
-H "Content-Length: ${NUM_BYTES}" \
325+
-H "X-Goog-Upload-Offset: 0" \
326+
-H "X-Goog-Upload-Command: upload, finalize" \
327+
--data-binary "@${PDF_PATH}" 2> /dev/null > file_info.json
328+
329+
file_uri=$(jq ".file.uri" file_info.json)
330+
echo file_uri=$file_uri
331+
332+
# Now generate content using that file
333+
curl "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent?key=$GOOGLE_API_KEY" \
334+
-H 'Content-Type: application/json' \
335+
-X POST \
336+
-d '{
337+
"contents": [{
338+
"parts":[
339+
{"text": "Can you add a few more lines to this poem?"},
340+
{"file_data":{"mime_type": "application/pdf", "file_uri": '$file_uri'}}]
341+
}]
342+
}' 2> /dev/null > response.json
343+
344+
cat response.json
345+
echo
346+
347+
jq ".candidates[].content.parts[].text" response.json
348+
# [END text_gen_multimodal_pdf]
349+
350+
echo "[START text_gen_multimodal_pdf_streaming]"
351+
# [START text_gen_multimodal_pdf_streaming]
352+
MIME_TYPE=$(file -b --mime-type "${PDF_PATH}")
353+
NUM_BYTES=$(wc -c < "${PDF_PATH}")
354+
DISPLAY_NAME=TEXT
355+
356+
357+
echo $MIME_TYPE
358+
tmp_header_file=upload-header.tmp
359+
360+
# Initial resumable request defining metadata.
361+
# The upload url is in the response headers dump them to a file.
362+
curl "${BASE_URL}/upload/v1beta/files?key=${GOOGLE_API_KEY}" \
363+
-D upload-header.tmp \
364+
-H "X-Goog-Upload-Protocol: resumable" \
365+
-H "X-Goog-Upload-Command: start" \
366+
-H "X-Goog-Upload-Header-Content-Length: ${NUM_BYTES}" \
367+
-H "X-Goog-Upload-Header-Content-Type: ${MIME_TYPE}" \
368+
-H "Content-Type: application/json" \
369+
-d "{'file': {'display_name': '${DISPLAY_NAME}'}}" 2> /dev/null
370+
371+
upload_url=$(grep -i "x-goog-upload-url: " "${tmp_header_file}" | cut -d" " -f2 | tr -d "\r")
372+
rm "${tmp_header_file}"
373+
374+
# Upload the actual bytes.
375+
curl "${upload_url}" \
376+
-H "Content-Length: ${NUM_BYTES}" \
377+
-H "X-Goog-Upload-Offset: 0" \
378+
-H "X-Goog-Upload-Command: upload, finalize" \
379+
--data-binary "@${PDF_PATH}" 2> /dev/null > file_info.json
380+
381+
file_uri=$(jq ".file.uri" file_info.json)
382+
echo file_uri=$file_uri
383+
384+
# Now generate content using that file
385+
curl "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:streamGenerateContent?alt=sse&key=$GOOGLE_API_KEY" \
386+
-H 'Content-Type: application/json' \
387+
-X POST \
388+
-d '{
389+
"contents": [{
390+
"parts":[
391+
{"text": "Can you add a few more lines to this poem?"},
392+
{"file_data":{"mime_type": "application/pdf", "file_uri": '$file_uri'}}]
393+
}]
394+
}' 2> /dev/null > response.json
395+
396+
cat response.json
397+
echo
398+
# [END text_gen_multimodal_pdf_streaming]

0 commit comments

Comments
 (0)