diff --git a/FirebaseAI/CHANGELOG.md b/FirebaseAI/CHANGELOG.md index fcd35a32e43..4c2d482f8d5 100644 --- a/FirebaseAI/CHANGELOG.md +++ b/FirebaseAI/CHANGELOG.md @@ -1,3 +1,8 @@ +# 12.3.0 +- [fixed] Fixed a decoding error when generating images with the + `gemini-2.5-flash-image-preview` model using `generateContentStream` or + `sendMessageStream` with the Gemini Developer API. (#15262) + # 12.2.0 - [feature] Added support for returning thought summaries, which are synthesized versions of a model's internal reasoning process. (#15096) diff --git a/FirebaseAI/Sources/AILog.swift b/FirebaseAI/Sources/AILog.swift index 4019c2cd0ff..cae85a0ff0a 100644 --- a/FirebaseAI/Sources/AILog.swift +++ b/FirebaseAI/Sources/AILog.swift @@ -62,11 +62,13 @@ enum AILog { case decodedInvalidCitationPublicationDate = 3011 case generateContentResponseUnrecognizedContentModality = 3012 case decodedUnsupportedImagenPredictionType = 3013 + case decodedUnsupportedPartData = 3014 // SDK State Errors case generateContentResponseNoCandidates = 4000 case generateContentResponseNoText = 4001 case appCheckTokenFetchFailed = 4002 + case generateContentResponseEmptyCandidates = 4003 // SDK Debugging case loadRequestStreamResponseLine = 5000 diff --git a/FirebaseAI/Sources/GenerateContentResponse.swift b/FirebaseAI/Sources/GenerateContentResponse.swift index 0756d2afd9a..015d5dae56c 100644 --- a/FirebaseAI/Sources/GenerateContentResponse.swift +++ b/FirebaseAI/Sources/GenerateContentResponse.swift @@ -163,6 +163,12 @@ public struct Candidate: Sendable { self.citationMetadata = citationMetadata self.groundingMetadata = groundingMetadata } + + // Returns `true` if the candidate contains no information that a developer could use. + var isEmpty: Bool { + content.parts + .isEmpty && finishReason == nil && citationMetadata == nil && groundingMetadata == nil + } } /// A collection of source attributions for a piece of content. @@ -525,15 +531,6 @@ extension Candidate: Decodable { finishReason = try container.decodeIfPresent(FinishReason.self, forKey: .finishReason) - // The `content` may only be empty if a `finishReason` is included; if neither are included in - // the response then this is likely the `"content": {}` bug. - guard !content.parts.isEmpty || finishReason != nil else { - throw InvalidCandidateError.emptyContent(underlyingError: DecodingError.dataCorrupted(.init( - codingPath: [CodingKeys.content, CodingKeys.finishReason], - debugDescription: "Invalid Candidate: empty content and no finish reason" - ))) - } - citationMetadata = try container.decodeIfPresent( CitationMetadata.self, forKey: .citationMetadata diff --git a/FirebaseAI/Sources/GenerativeModel.swift b/FirebaseAI/Sources/GenerativeModel.swift index 8d3f5e043a7..428e1fe6f26 100644 --- a/FirebaseAI/Sources/GenerativeModel.swift +++ b/FirebaseAI/Sources/GenerativeModel.swift @@ -174,6 +174,13 @@ public final class GenerativeModel: Sendable { throw GenerateContentError.responseStoppedEarly(reason: reason, response: response) } + // If all candidates are empty (contain no information that a developer could act on) then throw + if response.candidates.allSatisfy({ $0.isEmpty }) { + throw GenerateContentError.internalError(underlying: InvalidCandidateError.emptyContent( + underlyingError: Candidate.EmptyContentError() + )) + } + return response } @@ -223,6 +230,7 @@ public final class GenerativeModel: Sendable { let responseStream = generativeAIService.loadRequestStream(request: generateContentRequest) Task { do { + var didYieldResponse = false for try await response in responseStream { // Check the prompt feedback to see if the prompt was blocked. if response.promptFeedback?.blockReason != nil { @@ -237,9 +245,30 @@ public final class GenerativeModel: Sendable { ) } - continuation.yield(response) + // Skip returning the response if all candidates are empty (i.e., they contain no + // information that a developer could act on). + if response.candidates.allSatisfy({ $0.isEmpty }) { + AILog.log( + level: .debug, + code: .generateContentResponseEmptyCandidates, + "Skipped response with all empty candidates: \(response)" + ) + } else { + continuation.yield(response) + didYieldResponse = true + } + } + + // Throw an error if all responses were skipped due to empty content. + if didYieldResponse { + continuation.finish() + } else { + continuation.finish(throwing: GenerativeModel.generateContentError( + from: InvalidCandidateError.emptyContent( + underlyingError: Candidate.EmptyContentError() + ) + )) } - continuation.finish() } catch { continuation.finish(throwing: GenerativeModel.generateContentError(from: error)) return diff --git a/FirebaseAI/Sources/ModelContent.swift b/FirebaseAI/Sources/ModelContent.swift index 1a0aa6f5f09..a0dfe6eb937 100644 --- a/FirebaseAI/Sources/ModelContent.swift +++ b/FirebaseAI/Sources/ModelContent.swift @@ -39,9 +39,17 @@ struct InternalPart: Equatable, Sendable { case fileData(FileData) case functionCall(FunctionCall) case functionResponse(FunctionResponse) + + struct UnsupportedDataError: Error { + let decodingError: DecodingError + + var localizedDescription: String { + decodingError.localizedDescription + } + } } - let data: OneOfData + let data: OneOfData? let isThought: Bool? @@ -65,7 +73,7 @@ public struct ModelContent: Equatable, Sendable { /// The data parts comprising this ``ModelContent`` value. public var parts: [any Part] { - return internalParts.map { part -> any Part in + return internalParts.compactMap { part -> (any Part)? in switch part.data { case let .text(text): return TextPart(text, isThought: part.isThought, thoughtSignature: part.thoughtSignature) @@ -85,6 +93,9 @@ public struct ModelContent: Equatable, Sendable { return FunctionResponsePart( functionResponse, isThought: part.isThought, thoughtSignature: part.thoughtSignature ) + case .none: + // Filter out parts that contain missing or unrecognized data + return nil } } } @@ -179,7 +190,14 @@ extension InternalPart: Codable { } public init(from decoder: Decoder) throws { - data = try OneOfData(from: decoder) + do { + data = try OneOfData(from: decoder) + } catch let error as OneOfData.UnsupportedDataError { + AILog.error(code: .decodedUnsupportedPartData, error.localizedDescription) + data = nil + } catch { // Re-throw any other error types + throw error + } let container = try decoder.container(keyedBy: CodingKeys.self) isThought = try container.decodeIfPresent(Bool.self, forKey: .isThought) thoughtSignature = try container.decodeIfPresent(String.self, forKey: .thoughtSignature) @@ -226,9 +244,11 @@ extension InternalPart.OneOfData: Codable { self = try .functionResponse(values.decode(FunctionResponse.self, forKey: .functionResponse)) } else { let unexpectedKeys = values.allKeys.map { $0.stringValue } - throw DecodingError.dataCorrupted(DecodingError.Context( - codingPath: values.codingPath, - debugDescription: "Unexpected Part type(s): \(unexpectedKeys)" + throw UnsupportedDataError(decodingError: DecodingError.dataCorrupted( + DecodingError.Context( + codingPath: values.codingPath, + debugDescription: "Unexpected Part type(s): \(unexpectedKeys)" + ) )) } } diff --git a/FirebaseAI/Sources/Types/Internal/Errors/EmptyContentError.swift b/FirebaseAI/Sources/Types/Internal/Errors/EmptyContentError.swift new file mode 100644 index 00000000000..7c33a975c18 --- /dev/null +++ b/FirebaseAI/Sources/Types/Internal/Errors/EmptyContentError.swift @@ -0,0 +1,20 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +extension Candidate { + struct EmptyContentError: Error { + let localizedDescription = "Invalid Candidate: empty content and no finish reason" + } +} diff --git a/FirebaseAI/Tests/TestApp/Sources/Constants.swift b/FirebaseAI/Tests/TestApp/Sources/Constants.swift index ef7d9e7c061..be5c0c06891 100644 --- a/FirebaseAI/Tests/TestApp/Sources/Constants.swift +++ b/FirebaseAI/Tests/TestApp/Sources/Constants.swift @@ -24,6 +24,7 @@ public enum ModelNames { public static let gemini2Flash = "gemini-2.0-flash-001" public static let gemini2FlashLite = "gemini-2.0-flash-lite-001" public static let gemini2FlashPreviewImageGeneration = "gemini-2.0-flash-preview-image-generation" + public static let gemini2_5_FlashImagePreview = "gemini-2.5-flash-image-preview" public static let gemini2_5_Flash = "gemini-2.5-flash" public static let gemini2_5_Pro = "gemini-2.5-pro" public static let gemma3_4B = "gemma-3-4b-it" diff --git a/FirebaseAI/Tests/TestApp/Tests/Integration/GenerateContentIntegrationTests.swift b/FirebaseAI/Tests/TestApp/Tests/Integration/GenerateContentIntegrationTests.swift index ef0f19be217..5b70223ece4 100644 --- a/FirebaseAI/Tests/TestApp/Tests/Integration/GenerateContentIntegrationTests.swift +++ b/FirebaseAI/Tests/TestApp/Tests/Integration/GenerateContentIntegrationTests.swift @@ -322,14 +322,20 @@ struct GenerateContentIntegrationTests { } @Test(arguments: [ - InstanceConfig.vertexAI_v1beta, - InstanceConfig.vertexAI_v1beta_global, - InstanceConfig.googleAI_v1beta, + (InstanceConfig.vertexAI_v1beta, ModelNames.gemini2FlashPreviewImageGeneration), + (InstanceConfig.vertexAI_v1beta_global, ModelNames.gemini2FlashPreviewImageGeneration), + (InstanceConfig.vertexAI_v1beta_global, ModelNames.gemini2_5_FlashImagePreview), + (InstanceConfig.googleAI_v1beta, ModelNames.gemini2FlashPreviewImageGeneration), + (InstanceConfig.googleAI_v1beta, ModelNames.gemini2_5_FlashImagePreview), // Note: The following configs are commented out for easy one-off manual testing. - // InstanceConfig.googleAI_v1beta_staging, - // InstanceConfig.googleAI_v1beta_freeTier_bypassProxy, + // (InstanceConfig.googleAI_v1beta_staging, ModelNames.gemini2FlashPreviewImageGeneration) + // (InstanceConfig.googleAI_v1beta_freeTier, ModelNames.gemini2FlashPreviewImageGeneration), + // ( + // InstanceConfig.googleAI_v1beta_freeTier_bypassProxy, + // ModelNames.gemini2FlashPreviewImageGeneration + // ), ]) - func generateImage(_ config: InstanceConfig) async throws { + func generateImage(_ config: InstanceConfig, modelName: String) async throws { let generationConfig = GenerationConfig( temperature: 0.0, topP: 0.0, @@ -342,7 +348,7 @@ struct GenerateContentIntegrationTests { $0.harmCategory != .civicIntegrity } let model = FirebaseAI.componentInstance(config).generativeModel( - modelName: ModelNames.gemini2FlashPreviewImageGeneration, + modelName: modelName, generationConfig: generationConfig, safetySettings: safetySettings ) @@ -483,6 +489,73 @@ struct GenerateContentIntegrationTests { #expect(response == expectedResponse) } + @Test(arguments: [ + (InstanceConfig.vertexAI_v1beta, ModelNames.gemini2FlashPreviewImageGeneration), + (InstanceConfig.vertexAI_v1beta_global, ModelNames.gemini2FlashPreviewImageGeneration), + (InstanceConfig.vertexAI_v1beta_global, ModelNames.gemini2_5_FlashImagePreview), + (InstanceConfig.googleAI_v1beta, ModelNames.gemini2FlashPreviewImageGeneration), + (InstanceConfig.googleAI_v1beta, ModelNames.gemini2_5_FlashImagePreview), + // Note: The following configs are commented out for easy one-off manual testing. + // (InstanceConfig.googleAI_v1beta_staging, ModelNames.gemini2FlashPreviewImageGeneration) + // (InstanceConfig.googleAI_v1beta_freeTier, ModelNames.gemini2FlashPreviewImageGeneration), + // ( + // InstanceConfig.googleAI_v1beta_freeTier_bypassProxy, + // ModelNames.gemini2FlashPreviewImageGeneration + // ), + ]) + func generateImageStreaming(_ config: InstanceConfig, modelName: String) async throws { + let generationConfig = GenerationConfig( + temperature: 0.0, + topP: 0.0, + topK: 1, + responseModalities: [.text, .image] + ) + let safetySettings = safetySettings.filter { + // HARM_CATEGORY_CIVIC_INTEGRITY is deprecated in Vertex AI but only rejected when using the + // 'gemini-2.0-flash-preview-image-generation' model. + $0.harmCategory != .civicIntegrity + } + let model = FirebaseAI.componentInstance(config).generativeModel( + modelName: modelName, + generationConfig: generationConfig, + safetySettings: safetySettings + ) + let prompt = "Generate an image of a cute cartoon kitten playing with a ball of yarn" + + let stream = try model.generateContentStream(prompt) + + var inlineDataParts = [InlineDataPart]() + for try await response in stream { + let candidate = try #require(response.candidates.first) + let inlineDataPart = candidate.content.parts.first { $0 is InlineDataPart } as? InlineDataPart + if let inlineDataPart { + inlineDataParts.append(inlineDataPart) + let inlineDataPartsViaAccessor = response.inlineDataParts + #expect(inlineDataPartsViaAccessor.count == 1) + #expect(inlineDataPartsViaAccessor == response.inlineDataParts) + } + let textPart = candidate.content.parts.first { $0 is TextPart } as? TextPart + #expect( + inlineDataPart != nil || textPart != nil || candidate.finishReason == .stop, + "No text or image found in the candidate" + ) + } + + #expect(inlineDataParts.count == 1) + let inlineDataPart = try #require(inlineDataParts.first) + #expect(inlineDataPart.mimeType == "image/png") + #expect(inlineDataPart.data.count > 0) + #if canImport(UIKit) + let uiImage = try #require(UIImage(data: inlineDataPart.data)) + // Gemini 2.0 Flash Experimental returns images sized to fit within a 1024x1024 pixel box but + // dimensions may vary depending on the aspect ratio. + #expect(uiImage.size.width <= 1024) + #expect(uiImage.size.width >= 500) + #expect(uiImage.size.height <= 1024) + #expect(uiImage.size.height >= 500) + #endif // canImport(UIKit) + } + // MARK: - App Check Tests @Test(arguments: InstanceConfig.appCheckNotConfiguredConfigs) diff --git a/FirebaseAI/Tests/Unit/GenerativeModelGoogleAITests.swift b/FirebaseAI/Tests/Unit/GenerativeModelGoogleAITests.swift index 00e0d398855..b1ee49da6a1 100644 --- a/FirebaseAI/Tests/Unit/GenerativeModelGoogleAITests.swift +++ b/FirebaseAI/Tests/Unit/GenerativeModelGoogleAITests.swift @@ -509,6 +509,23 @@ final class GenerativeModelGoogleAITests: XCTestCase { XCTAssertTrue(thoughtSignature.hasPrefix("CiIBVKhc7vB+vaaq6rA")) } + func testGenerateContentStream_success_ignoresEmptyParts() async throws { + MockURLProtocol.requestHandler = try GenerativeModelTestUtil.httpRequestHandler( + forResource: "streaming-success-empty-parts", + withExtension: "txt", + subdirectory: googleAISubdirectory + ) + + let stream = try model.generateContentStream("Hi") + for try await response in stream { + let candidate = try XCTUnwrap(response.candidates.first) + XCTAssertGreaterThan(candidate.content.parts.count, 0) + let text = response.text + let inlineData = response.inlineDataParts.first + XCTAssertTrue(text != nil || inlineData != nil, "Response did not contain text or data") + } + } + func testGenerateContentStream_failureInvalidAPIKey() async throws { MockURLProtocol.requestHandler = try GenerativeModelTestUtil.httpRequestHandler( forResource: "unary-failure-api-key", diff --git a/FirebaseAI/Tests/Unit/GenerativeModelVertexAITests.swift b/FirebaseAI/Tests/Unit/GenerativeModelVertexAITests.swift index 0e33ba557e6..2b7a60ec0a8 100644 --- a/FirebaseAI/Tests/Unit/GenerativeModelVertexAITests.swift +++ b/FirebaseAI/Tests/Unit/GenerativeModelVertexAITests.swift @@ -477,6 +477,27 @@ final class GenerativeModelVertexAITests: XCTestCase { XCTAssertGreaterThan(imagePart.data.count, 0) } + func testGenerateContent_success_image_emptyPartIgnored() async throws { + MockURLProtocol.requestHandler = try GenerativeModelTestUtil.httpRequestHandler( + forResource: "unary-success-empty-part", + withExtension: "json", + subdirectory: vertexSubdirectory + ) + + let response = try await model.generateContent(testPrompt) + + XCTAssertEqual(response.candidates.count, 1) + let candidate = try XCTUnwrap(response.candidates.first) + XCTAssertEqual(candidate.content.parts.count, 2) + let inlineDataParts = response.inlineDataParts + XCTAssertEqual(inlineDataParts.count, 1) + let imagePart = try XCTUnwrap(inlineDataParts.first) + XCTAssertEqual(imagePart.mimeType, "image/png") + XCTAssertGreaterThan(imagePart.data.count, 0) + let text = try XCTUnwrap(response.text) + XCTAssertTrue(text.starts(with: "I can certainly help you with that")) + } + func testGenerateContent_appCheck_validToken() async throws { let appCheckToken = "test-valid-token" model = GenerativeModel( @@ -786,12 +807,12 @@ final class GenerativeModelVertexAITests: XCTestCase { XCTFail("Should throw GenerateContentError.internalError; no error thrown.") } catch let GenerateContentError .internalError(underlying: invalidCandidateError as InvalidCandidateError) { - guard case let .emptyContent(decodingError) = invalidCandidateError else { - XCTFail("Not an InvalidCandidateError.emptyContent error: \(invalidCandidateError)") + guard case let .emptyContent(underlyingError) = invalidCandidateError else { + XCTFail("Should be an InvalidCandidateError.emptyContent error: \(invalidCandidateError)") return } - _ = try XCTUnwrap(decodingError as? DecodingError, - "Not a DecodingError: \(decodingError)") + _ = try XCTUnwrap(underlyingError as? Candidate.EmptyContentError, + "Should be an empty content error: \(underlyingError)") } catch { XCTFail("Should throw GenerateContentError.internalError; error thrown: \(error)") } @@ -976,7 +997,7 @@ final class GenerativeModelVertexAITests: XCTestCase { XCTAssertNotNil(responseError) let generateContentError = try XCTUnwrap(responseError as? GenerateContentError) guard case let .internalError(underlyingError) = generateContentError else { - XCTFail("Not an internal error: \(generateContentError)") + XCTFail("Should be an internal error: \(generateContentError)") return } XCTAssertEqual(underlyingError.localizedDescription, "Response was not an HTTP response.") @@ -1004,12 +1025,12 @@ final class GenerativeModelVertexAITests: XCTestCase { XCTAssertNotNil(responseError) let generateContentError = try XCTUnwrap(responseError as? GenerateContentError) guard case let .internalError(underlyingError) = generateContentError else { - XCTFail("Not an internal error: \(generateContentError)") + XCTFail("Should be an internal error: \(generateContentError)") return } let decodingError = try XCTUnwrap(underlyingError as? DecodingError) guard case let .dataCorrupted(context) = decodingError else { - XCTFail("Not a data corrupted error: \(decodingError)") + XCTFail("Should be a data corrupted error: \(decodingError)") return } XCTAssert(context.debugDescription.hasPrefix("Failed to decode GenerateContentResponse")) @@ -1038,17 +1059,17 @@ final class GenerativeModelVertexAITests: XCTestCase { XCTAssertNotNil(responseError) let generateContentError = try XCTUnwrap(responseError as? GenerateContentError) guard case let .internalError(underlyingError) = generateContentError else { - XCTFail("Not an internal error: \(generateContentError)") + XCTFail("Should be an internal error: \(generateContentError)") return } let invalidCandidateError = try XCTUnwrap(underlyingError as? InvalidCandidateError) guard case let .emptyContent(emptyContentUnderlyingError) = invalidCandidateError else { - XCTFail("Not an empty content error: \(invalidCandidateError)") + XCTFail("Should be an empty content error: \(invalidCandidateError)") return } _ = try XCTUnwrap( - emptyContentUnderlyingError as? DecodingError, - "Not a decoding error: \(emptyContentUnderlyingError)" + emptyContentUnderlyingError as? Candidate.EmptyContentError, + "Should be an empty content error: \(emptyContentUnderlyingError)" ) } @@ -1563,7 +1584,7 @@ final class GenerativeModelVertexAITests: XCTestCase { } } catch let GenerateContentError.internalError(underlying as DecodingError) { guard case let .dataCorrupted(context) = underlying else { - XCTFail("Not a data corrupted error: \(underlying)") + XCTFail("Should be a data corrupted error: \(underlying)") return } XCTAssert(context.debugDescription.hasPrefix("Failed to decode GenerateContentResponse")) @@ -1591,11 +1612,11 @@ final class GenerativeModelVertexAITests: XCTestCase { } } catch let GenerateContentError.internalError(underlyingError as InvalidCandidateError) { guard case let .emptyContent(contentError) = underlyingError else { - XCTFail("Not an empty content error: \(underlyingError)") + XCTFail("Should be an empty content error: \(underlyingError)") return } - XCTAssert(contentError is DecodingError) + XCTAssert(contentError is Candidate.EmptyContentError) return }