Skip to content

Commit baa2ee7

Browse files
authored
Merge pull request #170 from android/github_live
Use Nano to generate image description
2 parents f900499 + 5d16dad commit baa2ee7

File tree

26 files changed

+491
-160
lines changed

26 files changed

+491
-160
lines changed

app/src/main/AndroidManifest.xml

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
<?xml version="1.0" encoding="utf-8"?>
2-
<!--
1+
<?xml version="1.0" encoding="utf-8"?><!--
32
Copyright 2025 The Android Open Source Project
43
54
Licensed under the Apache License, Version 2.0 (the "License");
@@ -24,10 +23,11 @@
2423
android:required="true" />
2524

2625
<!-- Androidify can use XR features if they're available; they're not required. -->
27-
<uses-feature android:name="android.software.xr.api.spatial" android:required="false" />
26+
<uses-feature
27+
android:name="android.software.xr.api.spatial"
28+
android:required="false" />
2829

2930
<uses-permission android:name="android.permission.CAMERA" />
30-
<uses-sdk tools:overrideLibrary="com.google.ai.edge.aicore" />
3131

3232
<application
3333
android:name=".AndroidifyApplication"
@@ -68,6 +68,9 @@
6868
<meta-data
6969
android:name="com.android.developers.androidify.startup.FirebaseRemoteConfigInitializer"
7070
android:value="@string/androidx_startup" />
71+
<meta-data
72+
android:name="com.android.developers.androidify.startup.GeminiNanoDownloaderInitializer"
73+
android:value="@string/androidx_startup" />
7174
</provider>
7275

7376
<activity
@@ -84,10 +87,13 @@
8487
<!-- Required deeplink to make the app launchable from the watch -->
8588
<intent-filter>
8689
<action android:name="android.intent.action.VIEW" />
90+
8791
<category android:name="android.intent.category.DEFAULT" />
8892
<category android:name="android.intent.category.BROWSABLE" />
89-
<data android:scheme="androidify"
90-
android:host="launch" />
93+
94+
<data
95+
android:host="launch"
96+
android:scheme="androidify" />
9197
</intent-filter>
9298
</activity>
9399
<!-- need to use Theme.AppCompat -->
@@ -105,4 +111,4 @@
105111
android:value="subject_segmentation" />
106112
</application>
107113

108-
</manifest>
114+
</manifest>

core/network/src/main/java/com/android/developers/androidify/RemoteConfigDataSource.kt

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,11 @@ interface RemoteConfigDataSource {
2727
fun isBackgroundVibesFeatureEnabled(): Boolean
2828
fun promptTextVerify(): String
2929
fun promptImageValidation(): String
30+
fun promptImageValidationNano(): String
3031
fun promptImageDescription(): String
32+
fun promptImageDescriptionNano(): String
3133
fun useGeminiNano(): Boolean
34+
fun enabledGeminiNanoModelVersions(): String
3235
fun generateBotPrompt(): String
3336
fun promptImageGenerationWithSkinTone(): String
3437

@@ -77,14 +80,26 @@ class RemoteConfigDataSourceImpl @Inject constructor() : RemoteConfigDataSource
7780
return remoteConfig.getString("prompt_image_validation")
7881
}
7982

83+
override fun promptImageValidationNano(): String {
84+
return remoteConfig.getString("prompt_image_validation_nano")
85+
}
86+
8087
override fun promptImageDescription(): String {
8188
return remoteConfig.getString("prompt_image_description")
8289
}
8390

91+
override fun promptImageDescriptionNano(): String {
92+
return remoteConfig.getString("prompt_image_description_nano")
93+
}
94+
8495
override fun useGeminiNano(): Boolean {
8596
return remoteConfig.getBoolean("use_gemini_nano")
8697
}
8798

99+
override fun enabledGeminiNanoModelVersions(): String {
100+
return remoteConfig.getString("enabled_gemini_nano_model_versions")
101+
}
102+
88103
override fun generateBotPrompt(): String {
89104
return remoteConfig.getString("generate_bot_prompt")
90105
}

core/network/src/main/res/xml/remote_config_defaults.xml

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,10 +62,36 @@
6262
- it cannot contain hate speech or other offensive language
6363
-it cannot contain blood or gore or violence.</value>
6464
</entry>
65+
<entry>
66+
<key>prompt_image_validation_nano</key>
67+
<value>[TASK]
68+
You are a Validator. Analyze the attached image and determine its validity based on the
69+
rules.
70+
[RULES]
71+
VALID if AND ONLY if:
72+
1. PRIMARY subject is a person showing their head and shoulder
73+
2. The image MUST NOT contain: Nudity, Explicit content, Illegal weapons, Violent
74+
references, Drugs, Illicit substances, Hate speech, Offensive language, Blood, Gore, or
75+
Violence.
76+
[OUTPUT]
77+
Return ONLY one string.
78+
Check sequentially. Output the first failure code that applies:
79+
1. Is the PRIMARY subject NOT a person (e.g., animal, object, landscape)? ->
80+
"not_a_person"
81+
2. Is the person present but missing face/head/shoulders or too blurry? ->
82+
"not_enough_detail"
83+
3. Does the image violate any negative policy (Rule 2)? -> "policy_violation"
84+
4. If all rules are passed: -> null
85+
</value>
86+
</entry>
6587
<entry>
6688
<key>use_gemini_nano</key>
6789
<value>false</value>
6890
</entry>
91+
<entry>
92+
<key>enabled_gemini_nano_model_versions</key>
93+
<value>nano-v3</value>
94+
</entry>
6995
<entry>
7096
<key>dancing_droid_gif_link</key>
7197
<value>https://services.google.com/fh/files/misc/android_dancing.gif</value>
@@ -148,6 +174,45 @@
148174
* Do not say rendered, rendering, or digital.
149175
* Only respond with new image description as a paragraph.</value>
150176
</entry>
177+
<entry>
178+
<key>prompt_image_description_nano</key>
179+
<value>## Role
180+
You are an expert image analyst specializing in generating detailed, objective descriptions of people.
181+
182+
## Task
183+
Your task is to describe the person in the provided image in vivid detail, following the guidelines and examples below.
184+
185+
## Guidelines
186+
- Start with the overall mood or impression of the person (e.g., serene, joyful, pensive).
187+
- Describe the person's physical appearance, focusing on hair (color, style, length) and any visible facial features.
188+
- Detail the clothing, including the type of garments, style, color, and material.
189+
- Mention any accessories, such as glasses, hats, or jewelry.
190+
- Describe the immediate surroundings, including any objects, animals, or other people interacting with the subject.
191+
192+
## Constraints
193+
- The output must be a single, coherent paragraph.
194+
- If no person is visible in the image, state that clearly and do not describe anything else.
195+
- Provide only the description. Do not add any introductory or concluding remarks.
196+
197+
## Examples
198+
199+
### Example 1: Standard Case
200+
Input: [Image of a person on a picnic blanket with a dog]
201+
Output: A highly detailed and realistic portrayal of a person with a serene and pleasant mood. The figure has short, chin-length, straight dark black hair. No facial hair is present. Blue mirrored sunglasses are resting on top of its head. The figure is wearing a loose-fitting, light gray kimono-like top with a V-neckline and wide, elbow-length sleeves. This top features intricate, colorful embroidery in muted red, green, and yellow floral patterns on the front and sleeves. On its bottom, the figure wears loose-fitting, light gray wide-leg pants made of a soft, flowing material. No footwear is visible. The figure is seated on a red and white checkered picnic blanket. Next to it on the blanket is a clear plastic bottle. It is interacting with a black and white Pomeranian-like dog, which has black fur with distinct white markings on its chest, legs, and face, and a leash attached to its collar. The overall depiction aims for a clear and life-like appearance.
202+
203+
### Example 2: Corner Case (No Person)
204+
Input: [Image of an empty park bench]
205+
Output: No person is visible in the image.
206+
207+
## Input
208+
{{image}}
209+
210+
## Output Reminder
211+
Take a deep breath, read the instructions again, read the inputs again. Each instruction is crucial and must be executed with utmost care and attention to detail.
212+
213+
Description:
214+
</value>
215+
</entry>
151216
<entry>
152217
<key>promo_video_link</key>
153218
<value>https://services.google.com/fh/files/misc/androidfy_storyboard_b_v07.mp4</value>

core/testing/src/main/AndroidManifest.xml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,5 +17,4 @@
1717
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
1818
xmlns:tools="http://schemas.android.com/tools">
1919

20-
<uses-sdk tools:overrideLibrary="com.google.ai.edge.aicore"/>
2120
</manifest>
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
package com.android.developers.testing.data
2+
3+
import com.android.developers.androidify.data.GeminiNanoDownloader
4+
5+
class TestGeminiNanoDownloader(val modelDownloaded: Boolean) : GeminiNanoDownloader {
6+
override fun isModelDownloaded(): Boolean {
7+
return modelDownloaded
8+
}
9+
}

core/testing/src/main/java/com/android/developers/testing/data/TestGeminiNanoGenerationDataSource.kt

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,27 @@
1515
*/
1616
package com.android.developers.testing.data
1717

18+
import android.graphics.Bitmap
19+
import com.android.developers.androidify.data.GeminiNanoDownloader
1820
import com.android.developers.androidify.data.GeminiNanoGenerationDataSource
21+
import com.android.developers.androidify.model.ValidatedDescription
22+
import com.android.developers.androidify.model.ValidatedImage
1923

20-
class TestGeminiNanoGenerationDataSource(val promptOutput: String?) : GeminiNanoGenerationDataSource {
21-
override suspend fun initialize() {
22-
}
24+
class TestGeminiNanoGenerationDataSource(
25+
val promptOutput: String?,
26+
val geminiNanoDownloader: GeminiNanoDownloader
27+
) : GeminiNanoGenerationDataSource {
2328

2429
override suspend fun generatePrompt(prompt: String): String? {
2530
return promptOutput
2631
}
32+
33+
override suspend fun validateImageHasEnoughInformation(image: Bitmap): ValidatedImage? {
34+
return ValidatedImage(true, null)
35+
}
36+
37+
override suspend fun generateDescriptivePromptFromImage(image: Bitmap): ValidatedDescription? {
38+
if (!geminiNanoDownloader.isModelDownloaded()) return null
39+
return ValidatedDescription(true, "Nano description")
40+
}
2741
}

core/testing/src/main/java/com/android/developers/testing/network/TestFirebaseAiDataSource.kt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,15 +24,15 @@ import com.android.developers.androidify.vertexai.FirebaseAiDataSource
2424

2525
class TestFirebaseAiDataSource(val promptOutput: List<String>) : FirebaseAiDataSource {
2626
override suspend fun validatePromptHasEnoughInformation(inputPrompt: String): ValidatedDescription {
27-
return ValidatedDescription(true, "User description")
27+
return ValidatedDescription(true, "Firebase description")
2828
}
2929

3030
override suspend fun validateImageHasEnoughInformation(image: Bitmap): ValidatedImage {
3131
return ValidatedImage(true, null)
3232
}
3333

3434
override suspend fun generateDescriptivePromptFromImage(image: Bitmap): ValidatedDescription {
35-
return ValidatedDescription(true, "User description")
35+
return ValidatedDescription(true, "Firebase description")
3636
}
3737

3838
override suspend fun generateImageFromPromptAndSkinTone(
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
package com.android.developers.testing.network
2+
3+
import android.graphics.Bitmap
4+
import com.android.developers.androidify.ondevice.LocalSegmentationDataSource
5+
import androidx.core.graphics.createBitmap
6+
7+
class TestLocalSegmentationDataSource() : LocalSegmentationDataSource {
8+
9+
override suspend fun removeBackground(bitmap: Bitmap): Bitmap {
10+
return createBitmap(100, 100)
11+
}
12+
}

core/testing/src/main/java/com/android/developers/testing/network/TestRemoteConfigDataSource.kt

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,14 +42,26 @@ class TestRemoteConfigDataSource(private val useGeminiNano: Boolean) : RemoteCon
4242
TODO("Not yet implemented")
4343
}
4444

45+
override fun promptImageValidationNano(): String {
46+
TODO("Not yet implemented")
47+
}
48+
4549
override fun promptImageDescription(): String {
4650
TODO("Not yet implemented")
4751
}
4852

53+
override fun promptImageDescriptionNano(): String {
54+
TODO("Not yet implemented")
55+
}
56+
4957
override fun useGeminiNano(): Boolean {
5058
return useGeminiNano
5159
}
5260

61+
override fun enabledGeminiNanoModelVersions(): String {
62+
TODO("Not yet implemented")
63+
}
64+
5365
override fun generateBotPrompt(): String {
5466
return "generateBotPrompt"
5567
}

core/testing/src/main/java/com/android/developers/testing/repository/FakeImageGenerationRepository.kt

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,10 @@ import android.net.Uri
2020
import androidx.core.graphics.createBitmap
2121
import androidx.core.net.toUri
2222
import com.android.developers.androidify.data.ImageGenerationRepository
23+
import com.android.developers.androidify.model.ValidatedDescription
2324
import java.io.File
2425

2526
class FakeImageGenerationRepository : ImageGenerationRepository {
26-
override suspend fun initialize() {
27-
}
2827
var exceptionToThrow: Exception? = null
2928

3029
override suspend fun generateFromDescription(
@@ -35,6 +34,10 @@ class FakeImageGenerationRepository : ImageGenerationRepository {
3534
return createBitmap(1, 1)
3635
}
3736

37+
override suspend fun getDescriptionFromImage(file: File): ValidatedDescription {
38+
return ValidatedDescription(true, "")
39+
}
40+
3841
override suspend fun generateFromImage(
3942
file: File,
4043
skinTone: String,

0 commit comments

Comments
 (0)