Add developer API integration testing and update docs

andrewheard · andrewheard · commit 0ca33d427c72 · 2025-04-07T17:44:49.000-04:00
diff --git a/FirebaseVertexAI/CHANGELOG.md b/FirebaseVertexAI/CHANGELOG.md
@@ -1,3 +1,12 @@
+# Unreleased
+- [added] **Public Preview**: Added support for specifying response modalities
+  in `GenerationConfig`. This includes **public experimental** support for image
+  generation using Gemini 2.0 Flash (`gemini-2.0-flash-exp`). (#14658)
+  <br /><br />
+  Note: This feature is in Public Preview (and relies on experimental models),
+  which means that it is not subject to any SLA or deprecation policy and could
+  change in backwards-incompatible ways.
+
 # 11.11.0
 - [added] Emits a warning when attempting to use an incompatible model with
   `GenerativeModel` or `ImagenModel`. (#14610)
diff --git a/FirebaseVertexAI/Sources/GenerationConfig.swift b/FirebaseVertexAI/Sources/GenerationConfig.swift
@@ -143,7 +143,15 @@ public struct GenerationConfig: Sendable {
   ///     [Generate structured
   ///     output](https://firebase.google.com/docs/vertex-ai/structured-output?platform=ios) guide
   ///     for more details.
-  ///   - responseModalities: Supported modalities of the response.
+  ///   - responseModalities: The data types (modalities) that may be returned in model responses.
+  ///
+  ///     See the [multimodal
+  ///     responses](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal-response-generation)
+  ///     documentation for more details.
+  ///
+  ///     > Warning: Specifying response modalities is a **Public Preview** feature, which means
+  ///     > that it is not subject to any SLA or deprecation policy and could change in
+  ///     > backwards-incompatible ways.
   public init(temperature: Float? = nil, topP: Float? = nil, topK: Int? = nil,
               candidateCount: Int? = nil, maxOutputTokens: Int? = nil,
               presencePenalty: Float? = nil, frequencyPenalty: Float? = nil,
diff --git a/FirebaseVertexAI/Sources/Types/Public/ResponseModality.swift b/FirebaseVertexAI/Sources/Types/Public/ResponseModality.swift
@@ -14,23 +14,39 @@
 
 import Foundation
 
-/// Represents the available response modalities.
+/// Represents the different types, or modalities, of data that a model can produce as output.
+///
+/// To configure the desired output modalities for model requests, set the `responseModalities`
+/// parameter when initializing a ``GenerationConfig``. See the [multimodal
+/// responses](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal-response-generation)
+/// documentation for more details.
+///
+/// > Important: Support for each response modality, or combination of modalities, depends on the
+/// > model.
 @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
 public struct ResponseModality: EncodableProtoEnum, Sendable {
   enum Kind: String {
     case text = "TEXT"
     case image = "IMAGE"
-    case audio = "AUDIO"
   }
 
-  /// Text response modality.
+  /// Specifies that the model should generate textual content.
+  ///
+  /// Use this modality when you need the model to produce written language, such as answers to
+  /// questions, summaries, creative writing, code snippets, or structured data formats like JSON.
   public static let text = ResponseModality(kind: .text)
 
-  /// Image response modality.
+  /// **Public Experimental**: Specifies that the model should generate image data.
+  ///
+  /// Use this modality when you want the model to create visual content based on the provided input
+  /// or prompts. The response might contain one or more generated images. See the [image
+  /// generation](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal-response-generation#image-generation)
+  /// documentation for more details.
+  ///
+  /// > Warning: Image generation using Gemini 2.0 Flash is a **Public Experimental** feature, which
+  /// > means that it is not subject to any SLA or deprecation policy and could change in
+  /// > backwards-incompatible ways.
   public static let image = ResponseModality(kind: .image)
 
-  /// Audio response modality.
-  public static let audio = ResponseModality(kind: .audio)
-
   let rawValue: String
 }
diff --git a/FirebaseVertexAI/Tests/TestApp/Tests/Integration/GenerateContentIntegrationTests.swift b/FirebaseVertexAI/Tests/TestApp/Tests/Integration/GenerateContentIntegrationTests.swift
@@ -122,7 +122,10 @@ struct GenerateContentIntegrationTests {
     #expect(candidatesTokensDetails.tokenCount == usageMetadata.candidatesTokenCount)
   }
 
-  @Test(arguments: [InstanceConfig.vertexV1Beta])
+  @Test(arguments: [
+    InstanceConfig.vertexV1Beta,
+    InstanceConfig.developerV1Beta,
+  ])
   func generateImage(_ config: InstanceConfig) async throws {
     let generationConfig = GenerationConfig(
       temperature: 0.0,