Skip to content

Commit 9c476d1

Browse files
Add Video metadata creation sample with Gemini and Media3 (#71)
Add Gemini Video Metadata Creation Sample A new sample demonstrating how to use the Gemini API with Firebase and Media3 to extract metadata from videos. The sample includes: UI for selecting a video from a predefined list or a custom URL. A video player using ExoPlayer to display the selected video. Buttons to trigger different metadata extraction tasks: Thumbnails, Description, Hashtags, Account Tags, Chapters, and Links. Displays the generated text metadata and extracted thumbnail images. Utilizes media3-transformer to extract HDR thumbnails from the video. Includes utility functions for timestamp conversion and managing the video list. Implements a ViewModel to handle the interaction with the Gemini API and manage UI state.
1 parent 84ffde3 commit 9c476d1

File tree

19 files changed

+1175
-2
lines changed

19 files changed

+1175
-2
lines changed

ai-catalog/app/build.gradle.kts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ dependencies {
8989
implementation(project(":samples:magic-selfie"))
9090
implementation(project(":samples:gemini-video-summarization"))
9191
implementation(project(":samples:gemini-live-todo"))
92+
implementation(project(":samples:gemini-video-metadata-creation"))
9293

9394
testImplementation(libs.junit)
9495
androidTestImplementation(libs.androidx.junit)

ai-catalog/app/src/main/java/com/android/ai/catalog/ui/domain/SampleCatalog.kt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import com.android.ai.catalog.R
2222
import com.android.ai.samples.geminichatbot.GeminiChatbotScreen
2323
import com.android.ai.samples.geminilivetodo.ui.TodoScreen
2424
import com.android.ai.samples.geminimultimodal.ui.GeminiMultimodalScreen
25+
import com.android.ai.samples.geminivideometadatacreation.VideoMetadataCreationScreen
2526
import com.android.ai.samples.geminivideosummary.VideoSummarizationScreen
2627
import com.android.ai.samples.genai_image_description.GenAIImageDescriptionScreen
2728
import com.android.ai.samples.genai_summarization.GenAISummarizationScreen
@@ -92,6 +93,14 @@ val sampleCatalog = listOf(
9293
tags = listOf(SampleTags.GEMINI_FLASH, SampleTags.FIREBASE, SampleTags.MEDIA3),
9394
needsFirebase = true,
9495
),
96+
SampleCatalogItem(
97+
title = R.string.gemini_video_metadata_creation_sample_title,
98+
description = R.string.gemini_video_metadata_creation_sample_description,
99+
route = "VideoMetadataCreationScreen",
100+
sampleEntryScreen = { VideoMetadataCreationScreen() },
101+
tags = listOf(SampleTags.GEMINI_FLASH, SampleTags.FIREBASE, SampleTags.MEDIA3),
102+
needsFirebase = true,
103+
),
95104
SampleCatalogItem(
96105
title = R.string.gemini_live_todo_title,
97106
description = R.string.gemini_live_todo_description,

ai-catalog/app/src/main/res/values/strings.xml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
<string name="genai_image_description_sample_title">Image Description with Gemini Nano</string>
1010
<string name="genai_image_description_sample_description">Generate short descriptions of images on-device with GenAI API powered by Gemini Nano</string>
1111
<string name="genai_writing_assistance_sample_title">Polish text with Gemini Nano</string>
12-
<string name="genai_writing_assistance_sample_description">Proofread and rewrite short content on-device with GenAI API powered by Gemini Nano</string>"
12+
<string name="genai_writing_assistance_sample_description">Proofread and rewrite short content on-device with GenAI API powered by Gemini Nano</string>
1313
<string name="top_bar_title">Android AI Samples</string>
1414
<string name="open_sample_button">Open sample</string>
1515
<string name="imagen_sample_title">Image generation with Imagen</string>
@@ -18,6 +18,8 @@
1818
<string name="magic_selfie_sample_description">Change the background of your selfies with Imagen and the ML Kit Segmentation API</string>
1919
<string name="gemini_video_summarization_sample_title">Video Summarization with Gemini and Firebase</string>
2020
<string name="gemini_video_summarization_sample_description">"Generate a summary of a video (from a cloud URL or Youtube) with Gemini API powered by Firebase"</string>
21+
<string name="gemini_video_metadata_creation_sample_title">Video Metadata Creation with Gemini and Firebase</string>
22+
<string name="gemini_video_metadata_creation_sample_description">"Generate metadata of a video (from a cloud URL or Youtube) with Gemini API powered by Firebase"</string>
2123
<string name="gemini_live_todo_title">Gemini Live Todo</string>
2224
<string name="gemini_live_todo_description">"Simple Todo app using the Gemini Live API to interact with the items in the list"</string>
2325
<string name="firebase_required">Firebase Required</string>

ai-catalog/gradle/libs.versions.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ hiltNavigationCompose = "1.2.0"
2323
ksp = "2.1.0-1.0.29"
2424
runtimeLivedata = "1.7.6"
2525
material3Android = "1.3.1"
26-
media3 = "1.6.1"
26+
media3 = "1.8.0"
2727
firebaseCommonKtx = "21.0.0"
2828
uiToolingPreviewAndroid = "1.8.1"
2929
spotless = "7.0.4"
@@ -70,6 +70,8 @@ hilt-navigation-compose = { group = "androidx.hilt", name = "hilt-navigation-com
7070
androidx-runtime-livedata = { group = "androidx.compose.runtime", name = "runtime-livedata", version.ref = "runtimeLivedata" }
7171
androidx-media3-exoplayer = { module = "androidx.media3:media3-exoplayer", version.ref = "media3" }
7272
androidx-media3-ui = { module = "androidx.media3:media3-ui", version.ref = "media3" }
73+
androidx-media3-ui-compose = { module = "androidx.media3:media3-ui-compose", version.ref = "media3"}
74+
androidx-media3-transformer = { module = "androidx.media3:media3-transformer", version.ref = "media3" }
7375
androidx-ui-tooling-preview-android = { group = "androidx.compose.ui", name = "ui-tooling-preview-android", version.ref = "uiToolingPreviewAndroid" }
7476
ui-tooling-preview = { group = "androidx.compose.ui", name = "ui-tooling-preview", version.ref = "uiToolingPreview" }
7577
ui-tooling = { group = "androidx.compose.ui", name = "ui-tooling", version.ref = "uiTooling" }
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
/*
2+
* Copyright 2025 The Android Open Source Project
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* https://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
plugins {
17+
alias(libs.plugins.android.library)
18+
alias(libs.plugins.jetbrains.kotlin.android)
19+
alias(libs.plugins.ksp)
20+
alias(libs.plugins.compose.compiler)
21+
alias(libs.plugins.hilt.plugin)
22+
}
23+
24+
android {
25+
namespace = "com.android.ai.samples.geminivideometadatacreation"
26+
compileSdk = 36
27+
28+
defaultConfig {
29+
minSdk = 24
30+
testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner"
31+
}
32+
33+
buildTypes {
34+
release {
35+
isMinifyEnabled = false
36+
proguardFiles(
37+
getDefaultProguardFile("proguard-android-optimize.txt"),
38+
"proguard-rules.pro",
39+
)
40+
}
41+
}
42+
compileOptions {
43+
sourceCompatibility = JavaVersion.VERSION_17
44+
targetCompatibility = JavaVersion.VERSION_17
45+
}
46+
kotlinOptions {
47+
jvmTarget = "17"
48+
}
49+
buildFeatures {
50+
compose = true
51+
}
52+
}
53+
54+
dependencies {
55+
56+
implementation(libs.androidx.core.ktx)
57+
implementation(libs.androidx.appcompat)
58+
implementation(libs.androidx.activity.compose)
59+
implementation(libs.androidx.material.icons.extended)
60+
implementation(platform(libs.androidx.compose.bom))
61+
implementation(libs.hilt.android)
62+
implementation(libs.hilt.navigation.compose)
63+
implementation(libs.androidx.material3.android)
64+
implementation(libs.firebase.common.ktx)
65+
implementation(libs.androidx.lifecycle.runtime.compose)
66+
implementation(libs.androidx.ui.tooling.preview.android)
67+
ksp(libs.hilt.compiler)
68+
implementation(platform(libs.firebase.bom))
69+
implementation(libs.firebase.ai)
70+
71+
// Media3 ExoPlayer
72+
implementation(libs.androidx.media3.exoplayer)
73+
implementation(libs.androidx.media3.ui)
74+
implementation(libs.androidx.media3.transformer)
75+
implementation(libs.androidx.media3.ui.compose)
76+
implementation(libs.kotlinx.coroutines.guava)
77+
78+
androidTestImplementation(libs.androidx.junit)
79+
androidTestImplementation(libs.androidx.espresso.core)
80+
androidTestImplementation(platform(libs.androidx.compose.bom))
81+
androidTestImplementation(libs.androidx.ui.test.junit4)
82+
}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# Add project specific ProGuard rules here.
2+
# You can control the set of applied configuration files using the
3+
# proguardFiles setting in build.gradle.
4+
#
5+
# For more details, see
6+
# http://developer.android.com/guide/developing/tools/proguard.html
7+
8+
# If your project uses WebView with JS, uncomment the following
9+
# and specify the fully qualified class name to the JavaScript interface
10+
# class:
11+
#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
12+
# public *;
13+
#}
14+
15+
# Uncomment this to preserve the line number information for
16+
# debugging stack traces.
17+
#-keepattributes SourceFile,LineNumberTable
18+
19+
# If you keep the line number information, uncomment this to
20+
# hide the original source file name.
21+
#-renamesourcefileattribute SourceFile
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,217 @@
1+
/*
2+
* Copyright 2025 The Android Open Source Project
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* https://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package com.android.ai.samples.geminivideometadatacreation
17+
18+
import android.content.Intent
19+
import androidx.compose.foundation.layout.Arrangement
20+
import androidx.compose.foundation.layout.Column
21+
import androidx.compose.foundation.layout.fillMaxHeight
22+
import androidx.compose.foundation.layout.fillMaxWidth
23+
import androidx.compose.foundation.layout.padding
24+
import androidx.compose.material.icons.Icons
25+
import androidx.compose.material.icons.filled.Code
26+
import androidx.compose.material3.AlertDialog
27+
import androidx.compose.material3.Button
28+
import androidx.compose.material3.CircularProgressIndicator
29+
import androidx.compose.material3.ExperimentalMaterial3Api
30+
import androidx.compose.material3.Icon
31+
import androidx.compose.material3.MaterialTheme
32+
import androidx.compose.material3.Scaffold
33+
import androidx.compose.material3.Text
34+
import androidx.compose.material3.TopAppBar
35+
import androidx.compose.material3.TopAppBarDefaults.topAppBarColors
36+
import androidx.compose.runtime.Composable
37+
import androidx.compose.runtime.DisposableEffect
38+
import androidx.compose.runtime.LaunchedEffect
39+
import androidx.compose.runtime.getValue
40+
import androidx.compose.runtime.mutableStateOf
41+
import androidx.compose.runtime.remember
42+
import androidx.compose.runtime.setValue
43+
import androidx.compose.ui.Alignment
44+
import androidx.compose.ui.Modifier
45+
import androidx.compose.ui.platform.LocalContext
46+
import androidx.compose.ui.res.stringResource
47+
import androidx.compose.ui.unit.dp
48+
import androidx.compose.ui.unit.sp
49+
import androidx.core.net.toUri
50+
import androidx.hilt.navigation.compose.hiltViewModel
51+
import androidx.lifecycle.compose.collectAsStateWithLifecycle
52+
import androidx.media3.common.MediaItem
53+
import androidx.media3.exoplayer.ExoPlayer
54+
import com.android.ai.samples.geminivideometadatacreation.player.VideoPlayer
55+
import com.android.ai.samples.geminivideometadatacreation.player.VideoSelectionDropdown
56+
import com.android.ai.samples.geminivideometadatacreation.ui.ButtonGrid
57+
import com.android.ai.samples.geminivideometadatacreation.ui.OutputTextDisplay
58+
import com.android.ai.samples.geminivideometadatacreation.ui.ThumbnailScreen
59+
import com.android.ai.samples.geminivideometadatacreation.util.sampleVideoList
60+
import com.android.ai.samples.geminivideometadatacreation.viewmodel.MetadataCreationState
61+
import com.android.ai.samples.geminivideometadatacreation.viewmodel.MetadataType
62+
import com.android.ai.samples.geminivideometadatacreation.viewmodel.VideoMetadataCreationState
63+
import com.android.ai.samples.geminivideometadatacreation.viewmodel.VideoMetadataCreationViewModel
64+
65+
/**
66+
* Composable function for the AI Video Metadata Creation screen.
67+
*
68+
* This screen allows users to select a video, play it, and generate metadata of its content
69+
* using Firebase AI. It also provides text-to-speech functionality to read out
70+
*/
71+
@OptIn(ExperimentalMaterial3Api::class)
72+
@Composable
73+
fun VideoMetadataCreationScreen(viewModel: VideoMetadataCreationViewModel = hiltViewModel()) {
74+
val uiState by viewModel.uiState.collectAsStateWithLifecycle()
75+
val context = LocalContext.current
76+
var isDropdownExpanded by remember { mutableStateOf(false) }
77+
78+
val exoPlayer = remember(context) {
79+
ExoPlayer.Builder(context).build().apply {
80+
playWhenReady = true
81+
}
82+
}
83+
84+
LaunchedEffect(uiState.selectedVideoUri) {
85+
uiState.selectedVideoUri?.let {
86+
exoPlayer.setMediaItem(MediaItem.fromUri(it))
87+
exoPlayer.prepare()
88+
}
89+
}
90+
91+
Scaffold(
92+
topBar = {
93+
TopAppBar(
94+
colors = topAppBarColors(
95+
containerColor = MaterialTheme.colorScheme.primaryContainer,
96+
titleContentColor = MaterialTheme.colorScheme.primary,
97+
),
98+
title = {
99+
Text(text = stringResource(R.string.video_metadata_creation_title))
100+
},
101+
actions = {
102+
SeeCodeButton()
103+
},
104+
)
105+
},
106+
) { innerPadding ->
107+
Column(
108+
modifier = Modifier
109+
.padding(16.dp)
110+
.padding(innerPadding)
111+
.fillMaxHeight(),
112+
verticalArrangement = Arrangement.spacedBy(8.dp),
113+
) {
114+
VideoSelectionDropdown(
115+
selectedVideoUri = uiState.selectedVideoUri,
116+
isDropdownExpanded = isDropdownExpanded,
117+
videoOptions = sampleVideoList,
118+
onVideoUriSelected = { uri ->
119+
viewModel.onVideoSelected(uri)
120+
viewModel.resetMetadataState()
121+
},
122+
onDropdownExpanded = { isDropdownExpanded = it },
123+
)
124+
125+
VideoPlayer(
126+
player = exoPlayer,
127+
modifier = Modifier
128+
.fillMaxWidth()
129+
.weight(0.25f),
130+
)
131+
132+
MetadataCreationSection(
133+
uiState = uiState,
134+
onDismissError = { viewModel.dismissError() },
135+
onMetadataTypeClicked = {
136+
viewModel.onMetadataTypeSelected(it)
137+
viewModel.createMetadata(it)
138+
},
139+
modifier = Modifier.weight(0.75f),
140+
)
141+
}
142+
}
143+
144+
DisposableEffect(key1 = exoPlayer) {
145+
onDispose {
146+
exoPlayer.release()
147+
}
148+
}
149+
}
150+
151+
@Composable
152+
private fun MetadataCreationSection(
153+
uiState: VideoMetadataCreationState,
154+
onDismissError: () -> Unit,
155+
onMetadataTypeClicked: (MetadataType) -> Unit,
156+
modifier: Modifier = Modifier,
157+
) {
158+
Column(
159+
verticalArrangement = Arrangement.spacedBy(16.dp),
160+
modifier = modifier,
161+
) {
162+
ButtonGrid(
163+
selectedMetadataType = uiState.selectedMetadataType,
164+
onMetadataCreationClicked = onMetadataTypeClicked,
165+
)
166+
167+
when (val metadataCreationState = uiState.metadataCreationState) {
168+
is MetadataCreationState.InProgress -> {
169+
CircularProgressIndicator(modifier = Modifier.align(Alignment.CenterHorizontally))
170+
}
171+
172+
is MetadataCreationState.Error -> {
173+
AlertDialog(
174+
onDismissRequest = onDismissError,
175+
title = { Text("Error") },
176+
text = { Text(metadataCreationState.message) },
177+
confirmButton = {
178+
Button(onClick = onDismissError) {
179+
Text("OK")
180+
}
181+
},
182+
)
183+
}
184+
185+
is MetadataCreationState.Success -> {
186+
Column(modifier = Modifier.weight(1f), verticalArrangement = Arrangement.spacedBy(16.dp)) {
187+
OutputTextDisplay(metadataCreationState.metadataText)
188+
ThumbnailScreen(thumbnailState = metadataCreationState.thumbnailState)
189+
}
190+
}
191+
192+
MetadataCreationState.Idle -> {
193+
// Default state - No button is selected unless explicitly selected
194+
}
195+
}
196+
}
197+
}
198+
199+
@Composable
200+
fun SeeCodeButton() {
201+
val context = LocalContext.current
202+
val githubLink =
203+
"https://github.com/android/ai-samples/tree/main/ai-catalog/samples/gemini-video-metadata-creation"
204+
Button(
205+
onClick = {
206+
val intent = Intent(Intent.ACTION_VIEW, githubLink.toUri())
207+
context.startActivity(intent)
208+
},
209+
) {
210+
Icon(Icons.Filled.Code, contentDescription = "See code")
211+
Text(
212+
modifier = Modifier.padding(start = 8.dp),
213+
fontSize = 12.sp,
214+
text = stringResource(R.string.see_code),
215+
)
216+
}
217+
}

0 commit comments

Comments
 (0)