Skip to content

Commit 12a7bef

Browse files
committed
[Vertex AI] Add responseModalities to GenerationConfig
1 parent 5003be4 commit 12a7bef

File tree

4 files changed

+80
-1
lines changed

4 files changed

+80
-1
lines changed

FirebaseVertexAI/Sources/GenerationConfig.swift

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,9 @@ public struct GenerationConfig: Sendable {
4848
/// Output schema of the generated candidate text.
4949
let responseSchema: Schema?
5050

51+
/// Supported modalities of the response.
52+
let responseModalities: [ResponseModality]?
53+
5154
/// Creates a new `GenerationConfig` value.
5255
///
5356
/// See the
@@ -140,11 +143,12 @@ public struct GenerationConfig: Sendable {
140143
/// [Generate structured
141144
/// output](https://firebase.google.com/docs/vertex-ai/structured-output?platform=ios) guide
142145
/// for more details.
146+
/// - responseModalities: Supported modalities of the response.
143147
public init(temperature: Float? = nil, topP: Float? = nil, topK: Int? = nil,
144148
candidateCount: Int? = nil, maxOutputTokens: Int? = nil,
145149
presencePenalty: Float? = nil, frequencyPenalty: Float? = nil,
146150
stopSequences: [String]? = nil, responseMIMEType: String? = nil,
147-
responseSchema: Schema? = nil) {
151+
responseSchema: Schema? = nil, responseModalities: [ResponseModality]? = nil) {
148152
// Explicit init because otherwise if we re-arrange the above variables it changes the API
149153
// surface.
150154
self.temperature = temperature
@@ -157,6 +161,7 @@ public struct GenerationConfig: Sendable {
157161
self.stopSequences = stopSequences
158162
self.responseMIMEType = responseMIMEType
159163
self.responseSchema = responseSchema
164+
self.responseModalities = responseModalities
160165
}
161166
}
162167

@@ -175,5 +180,6 @@ extension GenerationConfig: Encodable {
175180
case stopSequences
176181
case responseMIMEType = "responseMimeType"
177182
case responseSchema
183+
case responseModalities
178184
}
179185
}
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
// Copyright 2025 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
import Foundation
16+
17+
/// Represents the available response modalities.
18+
@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
19+
public struct ResponseModality: EncodableProtoEnum, Sendable {
20+
enum Kind: String {
21+
case text = "TEXT"
22+
case image = "IMAGE"
23+
case audio = "AUDIO"
24+
}
25+
26+
/// Text response modality.
27+
public static let text = ResponseModality(kind: .text)
28+
29+
/// Image response modality.
30+
public static let image = ResponseModality(kind: .image)
31+
32+
/// Audio response modality.
33+
public static let audio = ResponseModality(kind: .audio)
34+
35+
let rawValue: String
36+
}

FirebaseVertexAI/Tests/TestApp/Sources/Constants.swift

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,4 +23,5 @@ public enum FirebaseAppNames {
2323
public enum ModelNames {
2424
public static let gemini2Flash = "gemini-2.0-flash-001"
2525
public static let gemini2FlashLite = "gemini-2.0-flash-lite-001"
26+
public static let gemini2FlashExperimental = "gemini-2.0-flash-exp"
2627
}

FirebaseVertexAI/Tests/TestApp/Tests/Integration/GenerateContentIntegrationTests.swift

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@ import FirebaseVertexAI
1919
import Testing
2020
import VertexAITestApp
2121

22+
#if canImport(UIKit)
23+
import UIKit
24+
#endif // canImport(UIKit)
25+
2226
@Suite(.serialized)
2327
struct GenerateContentIntegrationTests {
2428
// Set temperature, topP and topK to lowest allowed values to make responses more deterministic.
@@ -118,6 +122,38 @@ struct GenerateContentIntegrationTests {
118122
#expect(candidatesTokensDetails.tokenCount == usageMetadata.candidatesTokenCount)
119123
}
120124

125+
@Test(arguments: [InstanceConfig.vertexV1Beta])
126+
func generateImage(_ config: InstanceConfig) async throws {
127+
let generationConfig = GenerationConfig(
128+
temperature: 0.0,
129+
topP: 0.0,
130+
topK: 1,
131+
responseModalities: [.text, .image]
132+
)
133+
let model = VertexAI.componentInstance(config).generativeModel(
134+
modelName: ModelNames.gemini2FlashExperimental,
135+
generationConfig: generationConfig,
136+
safetySettings: safetySettings
137+
)
138+
let prompt = """
139+
Generate an image of a cute cartoon kitten playing with a ball of yarn. Do not respond with any
140+
text.
141+
"""
142+
143+
let response = try await model.generateContent(prompt)
144+
145+
let candidate = try #require(response.candidates.first)
146+
let inlineDataPart = try #require(candidate.content.parts
147+
.first { $0 is InlineDataPart } as? InlineDataPart)
148+
#expect(inlineDataPart.mimeType == "image/png")
149+
#expect(inlineDataPart.data.count > 0)
150+
#if canImport(UIKit)
151+
let uiImage = try #require(UIImage(data: inlineDataPart.data))
152+
#expect(uiImage.size.width == 1024.0)
153+
#expect(uiImage.size.height == 1024.0)
154+
#endif // canImport(UIKit)
155+
}
156+
121157
// MARK: Streaming Tests
122158

123159
@Test(arguments: InstanceConfig.allConfigs)

0 commit comments

Comments
 (0)