Skip to content

Commit b5597fc

Browse files
Force structured response from model when requesting video metadata. (#80)
Co-authored-by: JolandaVerhoef <[email protected]>
1 parent eb7f2e1 commit b5597fc

File tree

18 files changed

+922
-383
lines changed

18 files changed

+922
-383
lines changed

ai-catalog/gradle/libs.versions.toml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[versions]
22
agp = "8.8.2"
33
coilCompose = "3.1.0"
4-
firebaseBom = "33.14.0"
4+
firebaseBom = "34.1.0"
55
lifecycleRuntimeCompose = "2.9.1"
66
mlkitGenAi = "1.0.0-beta1"
77
kotlin = "2.1.0"
@@ -29,7 +29,6 @@ uiToolingPreviewAndroid = "1.8.1"
2929
spotless = "7.0.4"
3030
uiToolingPreview = "1.8.3"
3131
uiTooling = "1.8.3"
32-
firebaseAi = "16.2.0"
3332
lifecycleViewmodelAndroid = "2.8.7"
3433
material3 = "1.3.2"
3534
exifinterface = "1.4.1"
@@ -76,7 +75,6 @@ androidx-media3-transformer = { module = "androidx.media3:media3-transformer", v
7675
androidx-ui-tooling-preview-android = { group = "androidx.compose.ui", name = "ui-tooling-preview-android", version.ref = "uiToolingPreviewAndroid" }
7776
ui-tooling-preview = { group = "androidx.compose.ui", name = "ui-tooling-preview", version.ref = "uiToolingPreview" }
7877
ui-tooling = { group = "androidx.compose.ui", name = "ui-tooling", version.ref = "uiTooling" }
79-
google-firebase-ai = { group = "com.google.firebase", name = "firebase-ai", version.ref = "firebaseAi" }
8078
androidx-lifecycle-viewmodel-android = { group = "androidx.lifecycle", name = "lifecycle-viewmodel-android", version.ref = "lifecycleViewmodelAndroid" }
8179
material3 = { group = "androidx.compose.material3", name = "material3", version.ref = "material3" }
8280
androidx-exifinterface = { group = "androidx.exifinterface", name = "exifinterface", version.ref = "exifinterface" }

ai-catalog/samples/gemini-live-todo/build.gradle.kts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ dependencies {
5656
implementation(platform(libs.androidx.compose.bom))
5757
implementation(libs.androidx.material.icons.extended)
5858
implementation(platform(libs.firebase.bom))
59-
implementation(libs.google.firebase.ai)
59+
implementation(libs.firebase.ai)
6060
implementation(libs.androidx.lifecycle.viewmodel.android)
6161
implementation(libs.material3)
6262
implementation(libs.hilt.android)

ai-catalog/samples/gemini-video-metadata-creation/build.gradle.kts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ plugins {
1919
alias(libs.plugins.ksp)
2020
alias(libs.plugins.compose.compiler)
2121
alias(libs.plugins.hilt.plugin)
22+
alias(libs.plugins.jetbrains.kotlin.serialization)
2223
}
2324

2425
android {
@@ -67,6 +68,8 @@ dependencies {
6768
ksp(libs.hilt.compiler)
6869
implementation(platform(libs.firebase.bom))
6970
implementation(libs.firebase.ai)
71+
implementation(libs.kotlinx.serialization.json)
72+
debugImplementation(libs.androidx.ui.tooling)
7073

7174
// Media3 ExoPlayer
7275
implementation(libs.androidx.media3.exoplayer)
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
/*
2+
* Copyright 2025 The Android Open Source Project
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* https://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package com.android.ai.samples.geminivideometadatacreation
17+
18+
import android.net.Uri
19+
import androidx.compose.runtime.Composable
20+
import com.android.ai.samples.geminivideometadatacreation.ui.AccountTagsUi
21+
import com.android.ai.samples.geminivideometadatacreation.ui.ErrorUi
22+
import com.google.firebase.Firebase
23+
import com.google.firebase.ai.ai
24+
import com.google.firebase.ai.type.GenerateContentResponse
25+
import com.google.firebase.ai.type.GenerativeBackend
26+
import com.google.firebase.ai.type.Schema
27+
import com.google.firebase.ai.type.content
28+
import com.google.firebase.ai.type.generationConfig
29+
import kotlinx.serialization.Serializable
30+
import kotlinx.serialization.json.Json
31+
32+
typealias AccountTags = List<AccountTag>
33+
34+
@Serializable
35+
data class AccountTag(
36+
val tag: String,
37+
val url: String,
38+
)
39+
40+
/**
41+
* Schema for the expected JSON output format when generating account tags.
42+
* It defines an array of objects, where each object has two properties:
43+
* - "tag": A string representing the account tag (e.g., "@username").
44+
* - "url": A string representing the URL to the account's profile (e.g., a YouTube channel URL).
45+
*/
46+
private val accountTagsSchema = Schema.array(
47+
items = Schema.obj(
48+
mapOf(
49+
"tag" to Schema.string(),
50+
"url" to Schema.string("The YouTube profile url for this account"),
51+
),
52+
),
53+
)
54+
55+
/**
56+
* A generative model instance configured to interact with the Vertex AI Gemini API
57+
* for generating account tags.
58+
*
59+
* This model is specifically set up with:
60+
* - `modelName = "gemini-2.5-flash"`: Specifies the underlying Gemini model to use.
61+
* - `responseMimeType = "application/json"`: Indicates that the model is expected to
62+
* return its response in JSON format.
63+
* - `responseSchema = accountTagsSchema`: Defines the expected structure of the JSON
64+
* response. This ensures that the output can be reliably parsed into a list of
65+
* `AccountTag` objects.
66+
*
67+
* This configuration allows for structured data extraction from the model's output,
68+
* making it easier to integrate the generated tags into the application.
69+
*/
70+
private val accountTagsModel = Firebase.ai(backend = GenerativeBackend.vertexAI())
71+
.generativeModel(
72+
modelName = "gemini-2.5-flash",
73+
// Tell Firebase AI the exact format of the response.
74+
generationConfig {
75+
responseMimeType = "application/json"
76+
responseSchema = accountTagsSchema
77+
},
78+
)
79+
80+
/**
81+
* Calls the Vertex AI Gemini API to generate relevant account tags for the given video.
82+
* The Gemini API analyzes the video content and the provided prompt to suggest accounts
83+
* that would be suitable for tagging in the video's description or comments.
84+
* This is intended to help increase the video's reach and engagement.
85+
*
86+
* @param videoUri The URI of the video to generate tags for.
87+
* @return A composable function that displays the generated tags or an error message.
88+
*/
89+
suspend fun generateAccountTags(videoUri: Uri): @Composable () -> Unit {
90+
// Execute the model call with our custom prompt
91+
val response: GenerateContentResponse = accountTagsModel
92+
.generateContent(
93+
content {
94+
fileData(videoUri.toString(), "video/mp4")
95+
text(
96+
"""
97+
Suggest relevant accounts to tag in the video's description or comments to
98+
increase its reach and engagement.
99+
""".trimIndent(),
100+
)
101+
},
102+
)
103+
104+
val responseText = response.text
105+
if (responseText != null) {
106+
// Successful response - parse the JSON and display the accountTags
107+
val accountTags: AccountTags =
108+
Json.decodeFromString<AccountTags>(responseText)
109+
return { AccountTagsUi(accountTags) }
110+
} else {
111+
// Failure - display an error text
112+
return {
113+
ErrorUi(response.promptFeedback?.blockReasonMessage)
114+
}
115+
}
116+
}
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
/*
2+
* Copyright 2025 The Android Open Source Project
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* https://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package com.android.ai.samples.geminivideometadatacreation
17+
18+
import android.net.Uri
19+
import androidx.compose.runtime.Composable
20+
import com.android.ai.samples.geminivideometadatacreation.ui.ChaptersUi
21+
import com.android.ai.samples.geminivideometadatacreation.ui.ErrorUi
22+
import com.google.firebase.Firebase
23+
import com.google.firebase.ai.ai
24+
import com.google.firebase.ai.type.GenerativeBackend
25+
import com.google.firebase.ai.type.Schema
26+
import com.google.firebase.ai.type.content
27+
import com.google.firebase.ai.type.generationConfig
28+
import kotlinx.serialization.Serializable
29+
import kotlinx.serialization.json.Json
30+
31+
typealias Chapters = List<Chapter>
32+
33+
@Serializable
34+
data class Chapter(
35+
val timestamp: Long,
36+
val title: String,
37+
)
38+
39+
/**
40+
* Schema defining the structure of the chapters data.
41+
*
42+
* This schema specifies that the output should be an array of objects,
43+
* where each object represents a chapter and contains:
44+
* - `timestamp`: A long value representing the chapter start time in milliseconds.
45+
* - `title`: A string representing the chapter title.
46+
*/
47+
private val chaptersSchema = Schema.array(
48+
items = Schema.obj(
49+
mapOf(
50+
"timestamp" to Schema.long("chapter start in milliseconds"),
51+
"title" to Schema.string(),
52+
),
53+
),
54+
)
55+
56+
/**
57+
* The configured generative model for creating video chapters.
58+
*
59+
* This model is initialized with the "gemini-2.5-flash" model name and
60+
* configured to expect a JSON response. The `responseSchema` ensures that
61+
* the output conforms to the `Chapters` data structure.
62+
*/
63+
private val chaptersModel = Firebase.ai(backend = GenerativeBackend.vertexAI())
64+
.generativeModel(
65+
modelName = "gemini-2.5-flash",
66+
// Tell Firebase AI the exact format of the response.
67+
generationConfig {
68+
responseMimeType = "application/json"
69+
responseSchema = chaptersSchema
70+
},
71+
)
72+
73+
/**
74+
* Generates chapters for a given video URI.
75+
*
76+
* This function sends a request to a generative AI model to analyze the
77+
* video and create a list of chapters. The model is instructed to generate
78+
* around 3-7 chapters, each with a timestamp and a descriptive title
79+
* (max 3 words). Chapters should be at least 10 seconds long and evenly
80+
* distributed throughout the video.
81+
*
82+
* The function returns a Composable function that will either display the
83+
* generated chapters in a UI or show an error message if the generation
84+
* failed.
85+
*
86+
* @param videoUri The URI of the video to generate chapters for.
87+
* @param onChapterClicked A callback function that is invoked when a chapter
88+
* is clicked, providing the timestamp of the
89+
* clicked chapter.
90+
* @return A Composable function that displays either the chapters UI or an
91+
* error message.
92+
*/
93+
suspend fun generateChapters(videoUri: Uri, onChapterClicked: (timestamp: Long) -> Unit): @Composable () -> Unit {
94+
95+
// Execute the model call with our custom prompt
96+
val response = chaptersModel.generateContent(
97+
content {
98+
fileData(videoUri.toString(), "video/mp4")
99+
text(
100+
"""
101+
Analyze the video and create a list of around 3-7 chapters with timestamps and descriptive titles (of max 3 words).
102+
Each chapter should be at least 10 seconds long.
103+
Make sure to evenly divide the chapters over the video.
104+
""".trimIndent(),
105+
)
106+
},
107+
)
108+
val responseText = response.text
109+
if (responseText != null) {
110+
// Successful response - parse the JSON and display the chapters
111+
val chapters: Chapters =
112+
Json.decodeFromString<Chapters>(responseText)
113+
return { ChaptersUi(chapters, onChapterClicked) }
114+
} else {
115+
// Failure - display an error text
116+
return {
117+
ErrorUi(response.promptFeedback?.blockReasonMessage)
118+
}
119+
}
120+
}
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
/*
2+
* Copyright 2025 The Android Open Source Project
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* https://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package com.android.ai.samples.geminivideometadatacreation
17+
18+
import android.net.Uri
19+
import androidx.compose.runtime.Composable
20+
import com.android.ai.samples.geminivideometadatacreation.ui.DescriptionUi
21+
import com.android.ai.samples.geminivideometadatacreation.ui.ErrorUi
22+
import com.google.firebase.Firebase
23+
import com.google.firebase.ai.ai
24+
import com.google.firebase.ai.type.GenerativeBackend
25+
import com.google.firebase.ai.type.content
26+
27+
/**
28+
* Generates a compelling and concise description for a video using a generative AI model.
29+
*
30+
* This function takes a video URI as input, sends it to the Gemini 1.5 Flash model
31+
* along with a prompt to generate a description. The description should be less than
32+
* 100 words, engaging, accurate, and formatted in HTML with limited styling options
33+
* (bold, italic, underline, bullet points).
34+
*
35+
* If the model successfully generates a description, a Composable function that displays
36+
* the description using `DescriptionUi` is returned.
37+
* If there's an error or the model fails to generate a description (e.g., due to safety
38+
* filters), a Composable function that displays an error message using `ErrorText` is returned.
39+
*
40+
* @param videoUri The URI of the video for which to generate the description.
41+
* @return A Composable function that will render either the generated description or an error message.
42+
*/
43+
suspend fun generateDescription(videoUri: Uri): @Composable () -> Unit {
44+
val response = Firebase.ai(backend = GenerativeBackend.vertexAI())
45+
.generativeModel(modelName = "gemini-2.5-flash")
46+
.generateContent(
47+
content {
48+
fileData(videoUri.toString(), "video/mp4")
49+
text(
50+
"""
51+
Provide a compelling and concise description for this video in less than 100 words.
52+
Don't assume if you don't know.
53+
The description should be engaging and accurately reflect the video\'s content.
54+
You should output your responses in HTML format. Use styling sparingly. You can use the following tags:
55+
* Bold: <b>
56+
* Italic: <i>
57+
* Underline: <u>
58+
* Bullet points: <ul>, <li>
59+
""".trimIndent(),
60+
)
61+
},
62+
)
63+
64+
val responseText = response.text
65+
return if (responseText != null) {
66+
{ DescriptionUi(responseText) }
67+
} else {
68+
{ ErrorUi(response.promptFeedback?.blockReasonMessage) }
69+
}
70+
}

0 commit comments

Comments
 (0)