1+ package com.google.firebase.example.ai.send_requests
2+
3+ import android.content.Context
4+ import android.content.res.Resources
5+ import android.graphics.Bitmap
6+ import android.graphics.BitmapFactory
7+ import android.net.Uri
8+ import android.util.Log
9+ import com.google.firebase.ai.GenerativeModel
10+ import com.google.firebase.ai.type.content
11+ import com.google.firebase.example.ai.R
12+
13+ class GenerateMultimodal (
14+ private val applicationContext : Context ,
15+ private val resources : Resources ,
16+ private val model : GenerativeModel ,
17+ ) {
18+
19+ private val TAG = " GenerateMultimodal"
20+
21+ private suspend fun audioNonStreaming (audioUri : Uri ) {
22+ // [START multimodal_audio_non_streaming]
23+ val contentResolver = applicationContext.contentResolver
24+
25+ val inputStream = contentResolver.openInputStream(audioUri)
26+
27+ if (inputStream != null ) { // Check if the audio loaded successfully
28+ inputStream.use { stream ->
29+ val bytes = stream.readBytes()
30+
31+ // Provide a prompt that includes the audio specified above and text
32+ val prompt = content {
33+ inlineData(bytes, " audio/mpeg" ) // Specify the appropriate audio MIME type
34+ text(" Transcribe what's said in this audio recording." )
35+ }
36+
37+ // To generate text output, call `generateContent` with the prompt
38+ val response = model.generateContent(prompt)
39+
40+ // Log the generated text, handling the case where it might be null
41+ Log .d(TAG , response.text ? : " " )
42+ }
43+ } else {
44+ Log .e(TAG , " Error getting input stream for audio." )
45+ // Handle the error appropriately
46+ }
47+ // [END multimodal_audio_non_streaming]
48+ }
49+
50+ private suspend fun audioStreaming (audioUri : Uri ) {
51+ // [START multimodal_audio_streaming]
52+ val contentResolver = applicationContext.contentResolver
53+
54+ val inputStream = contentResolver.openInputStream(audioUri)
55+
56+ if (inputStream != null ) { // Check if the audio loaded successfully
57+ inputStream.use { stream ->
58+ val bytes = stream.readBytes()
59+
60+ // Provide a prompt that includes the audio specified above and text
61+ val prompt = content {
62+ inlineData(bytes, " audio/mpeg" ) // Specify the appropriate audio MIME type
63+ text(" Transcribe what's said in this audio recording." )
64+ }
65+
66+ // To stream generated text output, call `generateContentStream` with the prompt
67+ var fullResponse = " "
68+ model.generateContentStream(prompt).collect { chunk ->
69+ // Log the generated text, handling the case where it might be null
70+ Log .d(TAG , chunk.text ? : " " )
71+ fullResponse + = chunk.text ? : " "
72+ }
73+ }
74+ } else {
75+ Log .e(TAG , " Error getting input stream for audio." )
76+ // Handle the error appropriately
77+ }
78+ // [END multimodal_audio_streaming]
79+ }
80+
81+ private suspend fun multiImagesNonStreaming () {
82+ // [START multimodal_images_non_streaming]
83+ // Loads an image from the app/res/drawable/ directory
84+ val bitmap1: Bitmap = BitmapFactory .decodeResource(resources, R .drawable.sparky)
85+ val bitmap2: Bitmap = BitmapFactory .decodeResource(resources, R .drawable.sparky_eats_pizza)
86+
87+ // Provide a prompt that includes the images specified above and text
88+ val prompt = content {
89+ image(bitmap1)
90+ image(bitmap2)
91+ text(" What is different between these pictures?" )
92+ }
93+
94+ // To generate text output, call generateContent with the prompt
95+ val response = model.generateContent(prompt)
96+ print (response.text)
97+ // [END multimodal_images_non_streaming]
98+ }
99+
100+ private suspend fun multiImagesStreaming () {
101+ // [START multimodal_images_streaming]
102+ // Loads an image from the app/res/drawable/ directory
103+ val bitmap1: Bitmap = BitmapFactory .decodeResource(resources, R .drawable.sparky)
104+ val bitmap2: Bitmap = BitmapFactory .decodeResource(resources, R .drawable.sparky_eats_pizza)
105+
106+ // Provide a prompt that includes the images specified above and text
107+ val prompt = content {
108+ image(bitmap1)
109+ image(bitmap2)
110+ text(" What's different between these pictures?" )
111+ }
112+
113+ // To stream generated text output, call generateContentStream with the prompt
114+ var fullResponse = " "
115+ model.generateContentStream(prompt).collect { chunk ->
116+ print (chunk.text)
117+ fullResponse + = chunk.text
118+ }
119+ // [END multimodal_images_streaming]
120+ }
121+
122+ private suspend fun oneImageNonStreaming () {
123+ // [START multimodal_one_image_non_streaming]
124+ // Loads an image from the app/res/drawable/ directory
125+ val bitmap: Bitmap = BitmapFactory .decodeResource(resources, R .drawable.sparky)
126+
127+ // Provide a prompt that includes the image specified above and text
128+ val prompt = content {
129+ image(bitmap)
130+ text(" What developer tool is this mascot from?" )
131+ }
132+
133+ // To generate text output, call generateContent with the prompt
134+ val response = model.generateContent(prompt)
135+ print (response.text)
136+ // [END multimodal_one_image_non_streaming]
137+ }
138+
139+ private suspend fun oneImageStreaming () {
140+ // [START multimodal_one_image_streaming]
141+ // Loads an image from the app/res/drawable/ directory
142+ val bitmap: Bitmap = BitmapFactory .decodeResource(resources, R .drawable.sparky)
143+
144+ // Provide a prompt that includes the image specified above and text
145+ val prompt = content {
146+ image(bitmap)
147+ text(" What developer tool is this mascot from?" )
148+ }
149+
150+ // To stream generated text output, call generateContentStream with the prompt
151+ var fullResponse = " "
152+ model.generateContentStream(prompt).collect { chunk ->
153+ print (chunk.text)
154+ fullResponse + = chunk.text
155+ }
156+ // [END multimodal_one_image_streaming]
157+ }
158+
159+ private suspend fun onePdfNonStreaming (pdfUri : Uri ) {
160+ // [START multimodal_one_pdf_non_streaming]
161+ val contentResolver = applicationContext.contentResolver
162+
163+ // Provide the URI for the PDF file you want to send to the model
164+ val inputStream = contentResolver.openInputStream(pdfUri)
165+
166+ if (inputStream != null ) { // Check if the PDF file loaded successfully
167+ inputStream.use { stream ->
168+ // Provide a prompt that includes the PDF file specified above and text
169+ val prompt = content {
170+ inlineData(
171+ bytes = stream.readBytes(),
172+ mimeType = " application/pdf" // Specify the appropriate PDF file MIME type
173+ )
174+ text(" Summarize the important results in this report." )
175+ }
176+
177+ // To generate text output, call `generateContent` with the prompt
178+ val response = model.generateContent(prompt)
179+
180+ // Log the generated text, handling the case where it might be null
181+ Log .d(TAG , response.text ? : " " )
182+ }
183+ } else {
184+ Log .e(TAG , " Error getting input stream for file." )
185+ // Handle the error appropriately
186+ }
187+ // [END multimodal_one_pdf_non_streaming]
188+ }
189+
190+ private suspend fun onePdfStreaming (pdfUri : Uri ) {
191+ // [START multimodal_one_pdf_streaming]
192+ val contentResolver = applicationContext.contentResolver
193+
194+ // Provide the URI for the PDF you want to send to the model
195+ val inputStream = contentResolver.openInputStream(pdfUri)
196+
197+ if (inputStream != null ) { // Check if the PDF file loaded successfully
198+ inputStream.use { stream ->
199+ // Provide a prompt that includes the PDF file specified above and text
200+ val prompt = content {
201+ inlineData(
202+ bytes = stream.readBytes(),
203+ mimeType = " application/pdf" // Specify the appropriate PDF file MIME type
204+ )
205+ text(" Summarize the important results in this report." )
206+ }
207+
208+ // To stream generated text output, call `generateContentStream` with the prompt
209+ var fullResponse = " "
210+ model.generateContentStream(prompt).collect { chunk ->
211+ // Log the generated text, handling the case where it might be null
212+ val chunkText = chunk.text ? : " "
213+ Log .d(TAG , chunkText)
214+ fullResponse + = chunkText
215+ }
216+ }
217+ } else {
218+ Log .e(TAG , " Error getting input stream for file." )
219+ // Handle the error appropriately
220+ }
221+ // [END multimodal_one_pdf_streaming]
222+ }
223+
224+ private suspend fun videoNonStreaming (videoUri : Uri ) {
225+ // [START multimodal_video_non_streaming]
226+ val contentResolver = applicationContext.contentResolver
227+ contentResolver.openInputStream(videoUri).use { stream ->
228+ stream?.let {
229+ val bytes = stream.readBytes()
230+
231+ // Provide a prompt that includes the video specified above and text
232+ val prompt = content {
233+ inlineData(bytes, " video/mp4" )
234+ text(" What is in the video?" )
235+ }
236+
237+ // To generate text output, call generateContent with the prompt
238+ val response = model.generateContent(prompt)
239+ Log .d(TAG , response.text ? : " " )
240+ }
241+ }
242+ // [END multimodal_video_non_streaming]
243+ }
244+
245+ private suspend fun videoStreaming (videoUri : Uri ) {
246+ // [START multimodal_video_streaming]
247+ val contentResolver = applicationContext.contentResolver
248+ contentResolver.openInputStream(videoUri).use { stream ->
249+ stream?.let {
250+ val bytes = stream.readBytes()
251+
252+ // Provide a prompt that includes the video specified above and text
253+ val prompt = content {
254+ inlineData(bytes, " video/mp4" )
255+ text(" What is in the video?" )
256+ }
257+
258+ // To stream generated text output, call generateContentStream with the prompt
259+ var fullResponse = " "
260+ model.generateContentStream(prompt).collect { chunk ->
261+ Log .d(TAG , chunk.text ? : " " )
262+ fullResponse + = chunk.text
263+ }
264+ }
265+ }
266+ // [END multimodal_video_streaming]
267+ }
268+
269+
270+ }
0 commit comments