Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions FirebaseVertexAI/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
# Unreleased
- [added] **Public Preview**: Added support for specifying response modalities
in `GenerationConfig`. This includes **public experimental** support for image
generation using Gemini 2.0 Flash (`gemini-2.0-flash-exp`). (#14658)
<br /><br />
Note: This feature is in Public Preview (and relies on experimental models),
which means that it is not subject to any SLA or deprecation policy and could
change in backwards-incompatible ways.

# 11.11.0
- [added] Emits a warning when attempting to use an incompatible model with
`GenerativeModel` or `ImagenModel`. (#14610)
Expand Down
16 changes: 15 additions & 1 deletion FirebaseVertexAI/Sources/GenerationConfig.swift
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ public struct GenerationConfig: Sendable {
/// Output schema of the generated candidate text.
let responseSchema: Schema?

/// Supported modalities of the response.
let responseModalities: [ResponseModality]?

/// Creates a new `GenerationConfig` value.
///
/// See the
Expand Down Expand Up @@ -140,11 +143,20 @@ public struct GenerationConfig: Sendable {
/// [Generate structured
/// output](https://firebase.google.com/docs/vertex-ai/structured-output?platform=ios) guide
/// for more details.
/// - responseModalities: The data types (modalities) that may be returned in model responses.
///
/// See the [multimodal
/// responses](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal-response-generation)
/// documentation for more details.
///
/// > Warning: Specifying response modalities is a **Public Preview** feature, which means
/// > that it is not subject to any SLA or deprecation policy and could change in
/// > backwards-incompatible ways.
public init(temperature: Float? = nil, topP: Float? = nil, topK: Int? = nil,
candidateCount: Int? = nil, maxOutputTokens: Int? = nil,
presencePenalty: Float? = nil, frequencyPenalty: Float? = nil,
stopSequences: [String]? = nil, responseMIMEType: String? = nil,
responseSchema: Schema? = nil) {
responseSchema: Schema? = nil, responseModalities: [ResponseModality]? = nil) {
// Explicit init because otherwise if we re-arrange the above variables it changes the API
// surface.
self.temperature = temperature
Expand All @@ -157,6 +169,7 @@ public struct GenerationConfig: Sendable {
self.stopSequences = stopSequences
self.responseMIMEType = responseMIMEType
self.responseSchema = responseSchema
self.responseModalities = responseModalities
}
}

Expand All @@ -175,5 +188,6 @@ extension GenerationConfig: Encodable {
case stopSequences
case responseMIMEType = "responseMimeType"
case responseSchema
case responseModalities
}
}
52 changes: 52 additions & 0 deletions FirebaseVertexAI/Sources/Types/Public/ResponseModality.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// Copyright 2025 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import Foundation

/// Represents the different types, or modalities, of data that a model can produce as output.
///
/// To configure the desired output modalities for model requests, set the `responseModalities`
/// parameter when initializing a ``GenerationConfig``. See the [multimodal
/// responses](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal-response-generation)
/// documentation for more details.
///
/// > Important: Support for each response modality, or combination of modalities, depends on the
/// > model.
@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
public struct ResponseModality: EncodableProtoEnum, Sendable {
enum Kind: String {
case text = "TEXT"
case image = "IMAGE"
}

/// Specifies that the model should generate textual content.
///
/// Use this modality when you need the model to produce written language, such as answers to
/// questions, summaries, creative writing, code snippets, or structured data formats like JSON.
public static let text = ResponseModality(kind: .text)

/// **Public Experimental**: Specifies that the model should generate image data.
///
/// Use this modality when you want the model to create visual content based on the provided input
/// or prompts. The response might contain one or more generated images. See the [image
/// generation](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal-response-generation#image-generation)
/// documentation for more details.
///
/// > Warning: Image generation using Gemini 2.0 Flash is a **Public Experimental** feature, which
/// > means that it is not subject to any SLA or deprecation policy and could change in
/// > backwards-incompatible ways.
public static let image = ResponseModality(kind: .image)

let rawValue: String
}
1 change: 1 addition & 0 deletions FirebaseVertexAI/Tests/TestApp/Sources/Constants.swift
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,5 @@ public enum FirebaseAppNames {
public enum ModelNames {
public static let gemini2Flash = "gemini-2.0-flash-001"
public static let gemini2FlashLite = "gemini-2.0-flash-lite-001"
public static let gemini2FlashExperimental = "gemini-2.0-flash-exp"
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@ import FirebaseVertexAI
import Testing
import VertexAITestApp

#if canImport(UIKit)
import UIKit
#endif // canImport(UIKit)

@testable import struct FirebaseVertexAI.BackendError

@Suite(.serialized)
struct GenerateContentIntegrationTests {
// Set temperature, topP and topK to lowest allowed values to make responses more deterministic.
Expand Down Expand Up @@ -119,6 +125,51 @@ struct GenerateContentIntegrationTests {
#expect(candidatesTokensDetails.tokenCount == usageMetadata.candidatesTokenCount)
}

@Test(arguments: [
InstanceConfig.vertexV1Beta,
InstanceConfig.developerV1Beta,
])
func generateImage(_ config: InstanceConfig) async throws {
let generationConfig = GenerationConfig(
temperature: 0.0,
topP: 0.0,
topK: 1,
responseModalities: [.text, .image]
)
let model = VertexAI.componentInstance(config).generativeModel(
modelName: ModelNames.gemini2FlashExperimental,
generationConfig: generationConfig,
safetySettings: safetySettings
)
let prompt = "Generate an image of a cute cartoon kitten playing with a ball of yarn."

var response: GenerateContentResponse?
try await withKnownIssue(
"Backend may fail with a 503 - Service Unavailable error when overloaded",
isIntermittent: true
) {
response = try await model.generateContent(prompt)
} matching: { issue in
(issue.error as? BackendError).map { $0.httpResponseCode == 503 } ?? false
}

guard let response else { return }
let candidate = try #require(response.candidates.first)
let inlineDataPart = try #require(candidate.content.parts
.first { $0 is InlineDataPart } as? InlineDataPart)
#expect(inlineDataPart.mimeType == "image/png")
#expect(inlineDataPart.data.count > 0)
#if canImport(UIKit)
let uiImage = try #require(UIImage(data: inlineDataPart.data))
// Gemini 2.0 Flash Experimental returns images sized to fit within a 1024x1024 pixel box but
// dimensions may vary depending on the aspect ratio.
#expect(uiImage.size.width <= 1024)
#expect(uiImage.size.width >= 500)
#expect(uiImage.size.height <= 1024)
#expect(uiImage.size.height >= 500)
#endif // canImport(UIKit)
}

// MARK: Streaming Tests

@Test(arguments: InstanceConfig.allConfigs)
Expand Down
7 changes: 6 additions & 1 deletion FirebaseVertexAI/Tests/Unit/GenerationConfigTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@ final class GenerationConfigTests: XCTestCase {
frequencyPenalty: frequencyPenalty,
stopSequences: stopSequences,
responseMIMEType: responseMIMEType,
responseSchema: .array(items: .string())
responseSchema: .array(items: .string()),
responseModalities: [.text, .image]
)

let jsonData = try encoder.encode(generationConfig)
Expand All @@ -74,6 +75,10 @@ final class GenerationConfigTests: XCTestCase {
"maxOutputTokens" : \(maxOutputTokens),
"presencePenalty" : \(presencePenalty),
"responseMimeType" : "\(responseMIMEType)",
"responseModalities" : [
"TEXT",
"IMAGE"
],
"responseSchema" : {
"items" : {
"nullable" : false,
Expand Down
Loading