Skip to content

Commit d63db72

Browse files
YoungHypopeterfriese
authored andcommitted
[FirebaseAI] Add Multimodal Analysis demos (#1750)
1 parent 0900fc9 commit d63db72

File tree

29 files changed

+1450
-871
lines changed

29 files changed

+1450
-871
lines changed

firebaseai/FirebaseAIExample.xcodeproj/project.pbxproj

Lines changed: 131 additions & 92 deletions
Large diffs are not rendered by default.

firebaseai/FirebaseAIExample/ChatExample/Models/ChatMessage.swift

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,27 +19,34 @@
1919
#endif
2020
import Foundation
2121
import ConversationKit
22+
import UIKit
2223

2324
public struct ChatMessage: Message {
2425
public let id: UUID = .init()
2526
public var content: String?
26-
public let imageURL: String?
2727
public let participant: Participant
2828
public let error: (any Error)?
2929
public var pending = false
3030
public var groundingMetadata: GroundingMetadata?
31+
public var attachments: [MultimodalAttachment] = []
32+
public var image: UIImage?
33+
// required by the Message protocol, but not used in this app
34+
public var imageURL: String?
3135

3236
public init(content: String? = nil, imageURL: String? = nil, participant: Participant,
33-
error: (any Error)? = nil, pending: Bool = false) {
37+
error: (any Error)? = nil, pending: Bool = false,
38+
attachments: [MultimodalAttachment] = [], image: UIImage? = nil) {
3439
self.content = content
3540
self.imageURL = imageURL
3641
self.participant = participant
3742
self.error = error
3843
self.pending = pending
44+
self.attachments = attachments
45+
self.image = image
3946
}
4047

4148
// Protocol-required initializer
42-
public init(content: String?, imageURL: String?, participant: Participant) {
49+
public init(content: String?, imageURL: String? = nil, participant: Participant) {
4350
self.content = content
4451
self.imageURL = imageURL
4552
self.participant = participant
@@ -58,16 +65,18 @@ extension ChatMessage {
5865
public static func == (lhs: ChatMessage, rhs: ChatMessage) -> Bool {
5966
lhs.id == rhs.id &&
6067
lhs.content == rhs.content &&
61-
lhs.imageURL == rhs.imageURL &&
62-
lhs.participant == rhs.participant
68+
lhs.participant == rhs.participant &&
69+
lhs.image == rhs.image &&
70+
lhs.attachments == rhs.attachments
6371
// intentionally ignore `error`
6472
}
6573

6674
public func hash(into hasher: inout Hasher) {
6775
hasher.combine(id)
6876
hasher.combine(content)
69-
hasher.combine(imageURL)
7077
hasher.combine(participant)
78+
hasher.combine(image)
79+
hasher.combine(attachments)
7180
// intentionally ignore `error`
7281
}
7382
}

firebaseai/FirebaseAIExample/ChatExample/Screens/ChatScreen.swift

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,13 @@ import SwiftUI
1717
import ConversationKit
1818

1919
struct ChatScreen: View {
20-
let firebaseService: FirebaseAI
20+
let backendType: BackendOption
2121
@StateObject var viewModel: ChatViewModel
2222

23-
init(firebaseService: FirebaseAI, sample: Sample? = nil) {
24-
self.firebaseService = firebaseService
23+
init(backendType: BackendOption, sample: Sample? = nil) {
24+
self.backendType = backendType
2525
_viewModel =
26-
StateObject(wrappedValue: ChatViewModel(firebaseService: firebaseService,
26+
StateObject(wrappedValue: ChatViewModel(backendType: backendType,
2727
sample: sample))
2828
}
2929

@@ -35,9 +35,7 @@ struct ChatScreen: View {
3535
}
3636
.disableAttachments()
3737
.onSendMessage { message in
38-
Task {
39-
await viewModel.sendMessage(message.content ?? "", streaming: true)
40-
}
38+
await viewModel.sendMessage(message.content ?? "", streaming: true)
4139
}
4240
.onError { error in
4341
viewModel.presentErrorDetails = true
@@ -65,5 +63,5 @@ struct ChatScreen: View {
6563
}
6664

6765
#Preview {
68-
ChatScreen(firebaseService: FirebaseAI.firebaseAI())
66+
ChatScreen(backendType: .googleAI)
6967
}

firebaseai/FirebaseAIExample/ChatExample/ViewModels/ChatViewModel.swift

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -40,24 +40,27 @@ class ChatViewModel: ObservableObject {
4040

4141
private var model: GenerativeModel
4242
private var chat: Chat
43-
private var stopGenerating = false
4443

4544
private var chatTask: Task<Void, Never>?
4645

4746
private var sample: Sample?
47+
private var backendType: BackendOption
4848

49-
init(firebaseService: FirebaseAI, sample: Sample? = nil) {
49+
init(backendType: BackendOption, sample: Sample? = nil) {
5050
self.sample = sample
51+
self.backendType = backendType
52+
53+
let firebaseService = backendType == .googleAI
54+
? FirebaseAI.firebaseAI(backend: .googleAI())
55+
: FirebaseAI.firebaseAI(backend: .vertexAI())
5156

52-
// create a generative model with sample data
5357
model = firebaseService.generativeModel(
54-
modelName: "gemini-2.0-flash-001",
55-
tools: sample?.tools,
58+
modelName: sample?.modelName ?? "gemini-2.5-flash",
59+
generationConfig: sample?.generationConfig,
5660
systemInstruction: sample?.systemInstruction
5761
)
5862

5963
if let chatHistory = sample?.chatHistory, !chatHistory.isEmpty {
60-
// Initialize with sample chat history if it's available
6164
messages = ChatMessage.from(chatHistory)
6265
chat = model.startChat(history: chatHistory)
6366
} else {
@@ -116,13 +119,14 @@ class ChatViewModel: ObservableObject {
116119
.content = (messages[messages.count - 1].content ?? "") + text
117120
}
118121

119-
if let candidate = chunk.candidates.first {
120-
if let groundingMetadata = candidate.groundingMetadata {
121-
self.messages[self.messages.count - 1].groundingMetadata = groundingMetadata
122+
if let inlineDataPart = chunk.inlineDataParts.first {
123+
if let uiImage = UIImage(data: inlineDataPart.data) {
124+
messages[messages.count - 1].image = uiImage
125+
} else {
126+
print("Failed to convert inline data to UIImage")
122127
}
123128
}
124129
}
125-
126130
} catch {
127131
self.error = error
128132
print(error.localizedDescription)
@@ -160,11 +164,13 @@ class ChatViewModel: ObservableObject {
160164
// replace pending message with backend response
161165
messages[messages.count - 1].content = responseText
162166
messages[messages.count - 1].pending = false
167+
}
163168

164-
if let candidate = response?.candidates.first {
165-
if let groundingMetadata = candidate.groundingMetadata {
166-
self.messages[self.messages.count - 1].groundingMetadata = groundingMetadata
167-
}
169+
if let inlineDataPart = response?.inlineDataParts.first {
170+
if let uiImage = UIImage(data: inlineDataPart.data) {
171+
messages[messages.count - 1].image = uiImage
172+
} else {
173+
print("Failed to convert inline data to UIImage")
168174
}
169175
}
170176
} catch {

firebaseai/FirebaseAIExample/ChatExample/Views/MessageView.swift

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -57,16 +57,28 @@ struct MessageContentView: View {
5757
}
5858
.labelStyle(.iconOnly)
5959
}
60-
}
60+
} else {
61+
VStack(alignment: .leading, spacing: 8) {
62+
if message.participant == .user && !message.attachments.isEmpty {
63+
AttachmentPreviewScrollView(attachments: message.attachments)
64+
}
6165

62-
// Grounded Response
63-
else if let groundingMetadata = message.groundingMetadata {
64-
GroundedResponseView(message: message, groundingMetadata: groundingMetadata)
65-
}
66+
if let image = message.image {
67+
Image(uiImage: image)
68+
.resizable()
69+
.aspectRatio(contentMode: .fit)
70+
.frame(maxWidth: 300, maxHeight: 300)
71+
.clipShape(RoundedRectangle(cornerRadius: 8))
72+
}
6673

67-
// Non-grounded response
68-
else {
69-
ResponseTextView(message: message)
74+
// Grounded Response
75+
if let groundingMetadata = message.groundingMetadata {
76+
GroundedResponseView(message: message, groundingMetadata: groundingMetadata)
77+
} else {
78+
// Non-grounded response
79+
ResponseTextView(message: message)
80+
}
81+
}
7082
}
7183
}
7284
}

firebaseai/FirebaseAIExample/ContentView.swift

Lines changed: 14 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -18,25 +18,20 @@ import FirebaseAI
1818
enum BackendOption: String, CaseIterable, Identifiable {
1919
case googleAI = "Gemini Developer API"
2020
case vertexAI = "Vertex AI Gemini API"
21-
var id: String { rawValue }
2221

23-
var backendValue: FirebaseAI {
24-
switch self {
25-
case .googleAI:
26-
return FirebaseAI.firebaseAI(backend: .googleAI())
27-
case .vertexAI:
28-
return FirebaseAI.firebaseAI(backend: .vertexAI())
29-
}
30-
}
22+
var id: String { rawValue }
3123
}
3224

3325
struct ContentView: View {
3426
@State private var selectedBackend: BackendOption = .googleAI
35-
@State private var firebaseService: FirebaseAI = FirebaseAI.firebaseAI(backend: .googleAI())
36-
@State private var selectedUseCase: UseCase = .text
27+
@State private var selectedUseCase: UseCase = .all
3728

3829
var filteredSamples: [Sample] {
39-
Sample.samples.filter { $0.useCases.contains(selectedUseCase) }
30+
if selectedUseCase == .all {
31+
return Sample.samples
32+
} else {
33+
return Sample.samples.filter { $0.useCases.contains(selectedUseCase) }
34+
}
4035
}
4136

4237
let columns = [
@@ -102,23 +97,22 @@ struct ContentView: View {
10297
}
10398
.background(Color(.systemGroupedBackground))
10499
.navigationTitle("Firebase AI Logic")
105-
.onChange(of: selectedBackend) { newBackend in
106-
firebaseService = newBackend.backendValue
107-
}
108100
}
109101
}
110102

111103
@ViewBuilder
112104
private func destinationView(for sample: Sample) -> some View {
113105
switch sample.navRoute {
114106
case "ChatScreen":
115-
ChatScreen(firebaseService: firebaseService, sample: sample)
107+
ChatScreen(backendType: selectedBackend, sample: sample)
116108
case "ImagenScreen":
117-
ImagenScreen(firebaseService: firebaseService, sample: sample)
118-
case "PhotoReasoningScreen":
119-
PhotoReasoningScreen(firebaseService: firebaseService)
109+
ImagenScreen(backendType: selectedBackend, sample: sample)
110+
case "MultimodalScreen":
111+
MultimodalScreen(backendType: selectedBackend, sample: sample)
120112
case "FunctionCallingScreen":
121-
FunctionCallingScreen(firebaseService: firebaseService, sample: sample)
113+
FunctionCallingScreen(backendType: selectedBackend, sample: sample)
114+
case "GroundingScreen":
115+
GroundingScreen(backendType: selectedBackend, sample: sample)
122116
default:
123117
EmptyView()
124118
}

firebaseai/FirebaseAIExample/FunctionCallingExample/Screens/FunctionCallingScreen.swift

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,13 @@ import SwiftUI
1717
import ConversationKit
1818

1919
struct FunctionCallingScreen: View {
20-
let firebaseService: FirebaseAI
20+
let backendType: BackendOption
2121
@StateObject var viewModel: FunctionCallingViewModel
2222

23-
init(firebaseService: FirebaseAI, sample: Sample? = nil) {
24-
self.firebaseService = firebaseService
23+
init(backendType: BackendOption, sample: Sample? = nil) {
24+
self.backendType = backendType
2525
_viewModel =
26-
StateObject(wrappedValue: FunctionCallingViewModel(firebaseService: firebaseService,
26+
StateObject(wrappedValue: FunctionCallingViewModel(backendType: backendType,
2727
sample: sample))
2828
}
2929

@@ -35,9 +35,7 @@ struct FunctionCallingScreen: View {
3535
}
3636
.disableAttachments()
3737
.onSendMessage { message in
38-
Task {
39-
await viewModel.sendMessage(message.content ?? "", streaming: true)
40-
}
38+
await viewModel.sendMessage(message.content ?? "", streaming: true)
4139
}
4240
.onError { error in
4341
viewModel.presentErrorDetails = true
@@ -65,5 +63,5 @@ struct FunctionCallingScreen: View {
6563
}
6664

6765
#Preview {
68-
FunctionCallingScreen(firebaseService: FirebaseAI.firebaseAI())
66+
FunctionCallingScreen(backendType: .googleAI)
6967
}

firebaseai/FirebaseAIExample/FunctionCallingExample/ViewModels/FunctionCallingViewModel.swift

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -44,13 +44,19 @@ class FunctionCallingViewModel: ObservableObject {
4444
private var chatTask: Task<Void, Never>?
4545

4646
private var sample: Sample?
47+
private var backendType: BackendOption
4748

48-
init(firebaseService: FirebaseAI, sample: Sample? = nil) {
49+
init(backendType: BackendOption, sample: Sample? = nil) {
4950
self.sample = sample
51+
self.backendType = backendType
52+
53+
let firebaseService = backendType == .googleAI
54+
? FirebaseAI.firebaseAI(backend: .googleAI())
55+
: FirebaseAI.firebaseAI(backend: .vertexAI())
5056

5157
// create a generative model with sample data
5258
model = firebaseService.generativeModel(
53-
modelName: "gemini-2.0-flash-001",
59+
modelName: sample?.modelName ?? "gemini-2.0-flash-001",
5460
tools: sample?.tools,
5561
systemInstruction: sample?.systemInstruction
5662
)
@@ -103,18 +109,24 @@ class FunctionCallingViewModel: ObservableObject {
103109
do {
104110
let responseStream = try chat.sendMessageStream(text)
105111

112+
var functionCalls = [FunctionCallPart]()
113+
106114
for try await chunk in responseStream {
107115
if !chunk.functionCalls.isEmpty {
108-
try await handleFunctionCallsStreaming(chunk)
109-
} else {
110-
if let text = chunk.text {
111-
messages[messages.count - 1]
112-
.content = (messages[messages.count - 1].content ?? "") + text
113-
messages[messages.count - 1].pending = false
114-
}
116+
functionCalls.append(contentsOf: chunk.functionCalls)
117+
}
118+
if let text = chunk.text {
119+
messages[messages.count - 1]
120+
.content = (messages[messages.count - 1].content ?? "") + text
121+
messages[messages.count - 1].pending = false
115122
}
116123
}
117124

125+
// On functionCalls, never keep reading the old stream or call the second API inside the first for-loop.
126+
// Start a NEW stream only after the function response turn is sent.
127+
if !functionCalls.isEmpty {
128+
try await handleFunctionCallsStreaming(functionCalls)
129+
}
118130
} catch {
119131
self.error = error
120132
print(error.localizedDescription)
@@ -168,10 +180,10 @@ class FunctionCallingViewModel: ObservableObject {
168180
}
169181
}
170182

171-
private func handleFunctionCallsStreaming(_ response: GenerateContentResponse) async throws {
183+
private func handleFunctionCallsStreaming(_ functionCalls: [FunctionCallPart]) async throws {
172184
var functionResponses = [FunctionResponsePart]()
173185

174-
for functionCall in response.functionCalls {
186+
for functionCall in functionCalls {
175187
switch functionCall.name {
176188
case "fetchWeather":
177189
guard case let .string(city) = functionCall.args["city"],
@@ -198,7 +210,7 @@ class FunctionCallingViewModel: ObservableObject {
198210
}
199211

200212
if !functionResponses.isEmpty {
201-
let finalResponse = try await chat
213+
let finalResponse = try chat
202214
.sendMessageStream([ModelContent(role: "function", parts: functionResponses)])
203215

204216
for try await chunk in finalResponse {

0 commit comments

Comments
 (0)