diff --git a/FirebaseAI/Sources/Types/Internal/Imagen/ImageGenerationInstance.swift b/FirebaseAI/Sources/Types/Internal/Imagen/ImageGenerationInstance.swift index c1d853643cc..5b741fe32d2 100644 --- a/FirebaseAI/Sources/Types/Internal/Imagen/ImageGenerationInstance.swift +++ b/FirebaseAI/Sources/Types/Internal/Imagen/ImageGenerationInstance.swift @@ -15,12 +15,48 @@ @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) struct ImageGenerationInstance { let prompt: String + let referenceImages: [ImagenReferenceImage]? + + init(prompt: String, referenceImages: [ImagenReferenceImage]? = nil) { + self.prompt = prompt + self.referenceImages = referenceImages + } } @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) -extension ImageGenerationInstance: Equatable {} +extension ImageGenerationInstance: Equatable { + static func == (lhs: ImageGenerationInstance, rhs: ImageGenerationInstance) -> Bool { + return lhs.prompt == rhs.prompt && lhs.referenceImages?.count == rhs.referenceImages?.count + } +} // MARK: - Codable Conformance @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) -extension ImageGenerationInstance: Encodable {} +extension ImageGenerationInstance: Encodable { + enum CodingKeys: String, CodingKey { + case prompt + case referenceImages = "image" + } + + func encode(to encoder: any Encoder) throws { + var container = encoder.container(keyedBy: CodingKeys.self) + try container.encode(prompt, forKey: .prompt) + if let referenceImages = referenceImages { + var imagesContainer = container.nestedUnkeyedContainer(forKey: .referenceImages) + for image in referenceImages { + switch image { + case let rawImage as ImagenRawImage: + try imagesContainer.encode(rawImage) + case let mask as ImagenMaskReference: + try imagesContainer.encode(mask) + default: + throw EncodingError.invalidValue(image, EncodingError.Context( + codingPath: imagesContainer.codingPath, + debugDescription: "Unknown ImagenReferenceImage type." + )) + } + } + } + } +} diff --git a/FirebaseAI/Sources/Types/Internal/Imagen/ImageGenerationParameters.swift b/FirebaseAI/Sources/Types/Internal/Imagen/ImageGenerationParameters.swift index 4189e5fbac7..d68aacc85d4 100644 --- a/FirebaseAI/Sources/Types/Internal/Imagen/ImageGenerationParameters.swift +++ b/FirebaseAI/Sources/Types/Internal/Imagen/ImageGenerationParameters.swift @@ -23,6 +23,32 @@ struct ImageGenerationParameters { let outputOptions: ImageGenerationOutputOptions? let addWatermark: Bool? let includeResponsibleAIFilterReason: Bool? + let editMode: String? + let editConfig: ImageEditingParameters? + + init(sampleCount: Int?, + storageURI: String?, + negativePrompt: String?, + aspectRatio: String?, + safetyFilterLevel: String?, + personGeneration: String?, + outputOptions: ImageGenerationOutputOptions?, + addWatermark: Bool?, + includeResponsibleAIFilterReason: Bool?, + editMode: String? = nil, + editConfig: ImageEditingParameters? = nil) { + self.sampleCount = sampleCount + self.storageURI = storageURI + self.negativePrompt = negativePrompt + self.aspectRatio = aspectRatio + self.safetyFilterLevel = safetyFilterLevel + self.personGeneration = personGeneration + self.outputOptions = outputOptions + self.addWatermark = addWatermark + self.includeResponsibleAIFilterReason = includeResponsibleAIFilterReason + self.editMode = editMode + self.editConfig = editConfig + } } @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) @@ -42,6 +68,8 @@ extension ImageGenerationParameters: Encodable { case outputOptions case addWatermark case includeResponsibleAIFilterReason = "includeRaiReason" + case editMode + case editConfig } func encode(to encoder: any Encoder) throws { @@ -58,5 +86,12 @@ extension ImageGenerationParameters: Encodable { includeResponsibleAIFilterReason, forKey: .includeResponsibleAIFilterReason ) + try container.encodeIfPresent(editMode, forKey: .editMode) + try container.encodeIfPresent(editConfig, forKey: .editConfig) } } + +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +struct ImageEditingParameters: Codable, Equatable { + let editSteps: Int? +} diff --git a/FirebaseAI/Sources/Types/Public/Imagen/Dimensions.swift b/FirebaseAI/Sources/Types/Public/Imagen/Dimensions.swift new file mode 100644 index 00000000000..5ed70cfb320 --- /dev/null +++ b/FirebaseAI/Sources/Types/Public/Imagen/Dimensions.swift @@ -0,0 +1,30 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +/// Represents the dimensions of an image. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +public struct Dimensions: Codable, Sendable { + /// The width of the image in pixels. + public let width: Int + + /// The height of the image in pixels. + public let height: Int + + public init(width: Int, height: Int) { + self.width = width + self.height = height + } +} diff --git a/FirebaseAI/Sources/Types/Public/Imagen/ImagenEditMode.swift b/FirebaseAI/Sources/Types/Public/Imagen/ImagenEditMode.swift new file mode 100644 index 00000000000..b1f44f7ec3b --- /dev/null +++ b/FirebaseAI/Sources/Types/Public/Imagen/ImagenEditMode.swift @@ -0,0 +1,31 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +/// The editing method to use. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +public enum ImagenEditMode: String, Codable, Sendable { + /// The model should use the prompt and reference images to generate a new image. + case product = "product-image" + + /// The model should generate a new background for the given image. + case background = "background-refresh" + + /// The model should replace the masked region of the image with new content. + case inpaint = "inpainting" + + /// The model should extend the image beyond its original borders. + case outpaint = "outpainting" +} diff --git a/FirebaseAI/Sources/Types/Public/Imagen/ImagenEditingConfig.swift b/FirebaseAI/Sources/Types/Public/Imagen/ImagenEditingConfig.swift new file mode 100644 index 00000000000..28ca15cbbb7 --- /dev/null +++ b/FirebaseAI/Sources/Types/Public/Imagen/ImagenEditingConfig.swift @@ -0,0 +1,30 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +/// Configuration for editing an image with Imagen. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +public struct ImagenEditingConfig: Codable, Sendable { + /// The editing method to use. + public let editMode: ImagenEditMode + + /// The number of steps to use for the editing process. + public let editSteps: Int? + + public init(editMode: ImagenEditMode, editSteps: Int? = nil) { + self.editMode = editMode + self.editSteps = editSteps + } +} diff --git a/FirebaseAI/Sources/Types/Public/Imagen/ImagenImagePlacement.swift b/FirebaseAI/Sources/Types/Public/Imagen/ImagenImagePlacement.swift new file mode 100644 index 00000000000..6e86cb3a2b4 --- /dev/null +++ b/FirebaseAI/Sources/Types/Public/Imagen/ImagenImagePlacement.swift @@ -0,0 +1,49 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +/// Represents the placement of an image within a larger canvas. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +public enum ImagenImagePlacement: Sendable { + /// The image is placed at the top left corner of the canvas. + case topLeft + + /// The image is placed at the top center of the canvas. + case topCenter + + /// The image is placed at the top right corner of the canvas. + case topRight + + /// The image is placed at the middle left of the canvas. + case middleLeft + + /// The image is placed in the center of the canvas. + case center + + /// The image is placed at the middle right of the canvas. + case middleRight + + /// The image is placed at the bottom left corner of the canvas. + case bottomLeft + + /// The image is placed at the bottom center of the canvas. + case bottomCenter + + /// The image is placed at the bottom right corner of the canvas. + case bottomRight + + /// The image is placed at a custom offset from the top left corner of the canvas. + case custom(x: Int, y: Int) +} diff --git a/FirebaseAI/Sources/Types/Public/Imagen/ImagenInlineImage.swift b/FirebaseAI/Sources/Types/Public/Imagen/ImagenInlineImage.swift index 4f9a1d9d74f..d477ffa0459 100644 --- a/FirebaseAI/Sources/Types/Public/Imagen/ImagenInlineImage.swift +++ b/FirebaseAI/Sources/Types/Public/Imagen/ImagenInlineImage.swift @@ -26,7 +26,7 @@ public struct ImagenInlineImage: Sendable { /// The image data in PNG or JPEG format. public let data: Data - init(mimeType: String, data: Data) { + public init(mimeType: String, data: Data) { self.mimeType = mimeType self.data = data } diff --git a/FirebaseAI/Sources/Types/Public/Imagen/ImagenMaskReference.swift b/FirebaseAI/Sources/Types/Public/Imagen/ImagenMaskReference.swift new file mode 100644 index 00000000000..f901d465f46 --- /dev/null +++ b/FirebaseAI/Sources/Types/Public/Imagen/ImagenMaskReference.swift @@ -0,0 +1,169 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import CoreGraphics +import Foundation +import ImageIO + +/// A reference to a mask for inpainting. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +public struct ImagenMaskReference: ImagenReferenceImage, Encodable { + /// The mask data. + public let data: Data + + public init(data: Data) { + self.data = data + } + + enum CodingKeys: String, CodingKey { + case data = "bytesBase64Encoded" + } + + public func encode(to encoder: Encoder) throws { + var container = encoder.container(keyedBy: CodingKeys.self) + try container.encode(data.base64EncodedString(), forKey: .data) + } + + /// Errors that can occur during outpainting. + public enum OutpaintingError: Error { + /// The provided image data could not be decoded. + case invalidImageData + /// The new dimensions are smaller than the original image. + case dimensionsTooSmall + /// The image context could not be created. + case contextCreationFailed + /// The image could not be created from the context. + case imageCreationFailed + /// The image data could not be created from the image. + case dataCreationFailed + } + + static func generateMaskAndPadForOutpainting(image: ImagenInlineImage, + newDimensions: Dimensions, + newPosition: ImagenImagePlacement) throws + -> [ImagenReferenceImage] { + guard let cgImage = CGImage.fromData(image.data) else { + throw OutpaintingError.invalidImageData + } + + let originalWidth = cgImage.width + let originalHeight = cgImage.height + + guard newDimensions.width >= originalWidth, newDimensions.height >= originalHeight else { + throw OutpaintingError.dimensionsTooSmall + } + + let offsetX: Int + let offsetY: Int + + switch newPosition { + case .topLeft: + offsetX = 0 + offsetY = 0 + case .topCenter: + offsetX = (newDimensions.width - originalWidth) / 2 + offsetY = 0 + case .topRight: + offsetX = newDimensions.width - originalWidth + offsetY = 0 + case .middleLeft: + offsetX = 0 + offsetY = (newDimensions.height - originalHeight) / 2 + case .center: + offsetX = (newDimensions.width - originalWidth) / 2 + offsetY = (newDimensions.height - originalHeight) / 2 + case .middleRight: + offsetX = newDimensions.width - originalWidth + offsetY = (newDimensions.height - originalHeight) / 2 + case .bottomLeft: + offsetX = 0 + offsetY = newDimensions.height - originalHeight + case .bottomCenter: + offsetX = (newDimensions.width - originalWidth) / 2 + offsetY = newDimensions.height - originalHeight + case .bottomRight: + offsetX = newDimensions.width - originalWidth + offsetY = newDimensions.height - originalHeight + case let .custom(x, y): + offsetX = x + offsetY = y + } + + let colorSpace = CGColorSpaceCreateDeviceRGB() + let bitmapInfo = CGImageAlphaInfo.premultipliedLast.rawValue + + // Create padded image + guard let paddedContext = CGContext( + data: nil, + width: newDimensions.width, + height: newDimensions.height, + bitsPerComponent: 8, + bytesPerRow: 0, + space: colorSpace, + bitmapInfo: bitmapInfo + ) else { + throw OutpaintingError.contextCreationFailed + } + paddedContext.draw( + cgImage, + in: CGRect(x: offsetX, y: offsetY, width: originalWidth, height: originalHeight) + ) + guard let paddedCGImage = paddedContext.makeImage(), + let paddedImageData = paddedCGImage.toData() else { + throw OutpaintingError.imageCreationFailed + } + + // Create mask + guard let maskContext = CGContext( + data: nil, + width: newDimensions.width, + height: newDimensions.height, + bitsPerComponent: 8, + bytesPerRow: 0, + space: CGColorSpaceCreateDeviceGray(), + bitmapInfo: CGImageAlphaInfo.none.rawValue + ) else { + throw OutpaintingError.contextCreationFailed + } + maskContext.setFillColor(gray: 1.0, alpha: 1.0) + maskContext.fill(CGRect(x: 0, y: 0, width: newDimensions.width, height: newDimensions.height)) + maskContext.setFillColor(gray: 0.0, alpha: 1.0) + maskContext.fill(CGRect(x: offsetX, y: offsetY, width: originalWidth, height: originalHeight)) + guard let maskCGImage = maskContext.makeImage(), let maskData = maskCGImage.toData() else { + throw OutpaintingError.dataCreationFailed + } + + return [ImagenRawImage(data: paddedImageData), ImagenMaskReference(data: maskData)] + } +} + +extension CGImage { + static func fromData(_ data: Data) -> CGImage? { + guard let source = CGImageSourceCreateWithData(data as CFData, nil) else { return nil } + return CGImageSourceCreateImageAtIndex(source, 0, nil) + } + + func toData() -> Data? { + guard let mutableData = CFDataCreateMutable(nil, 0), + let destination = CGImageDestinationCreateWithData( + mutableData, + "public.png" as CFString, + 1, + nil + ) else { return nil } + CGImageDestinationAddImage(destination, self, nil) + guard CGImageDestinationFinalize(destination) else { return nil } + return mutableData as Data + } +} diff --git a/FirebaseAI/Sources/Types/Public/Imagen/ImagenModel.swift b/FirebaseAI/Sources/Types/Public/Imagen/ImagenModel.swift index e6f96df511a..8f9105a6645 100644 --- a/FirebaseAI/Sources/Types/Public/Imagen/ImagenModel.swift +++ b/FirebaseAI/Sources/Types/Public/Imagen/ImagenModel.swift @@ -86,7 +86,8 @@ public final class ImagenModel { parameters: ImagenModel.imageGenerationParameters( storageURI: nil, generationConfig: generationConfig, - safetySettings: safetySettings + safetySettings: safetySettings, + editingConfig: nil ) ) } @@ -122,11 +123,108 @@ public final class ImagenModel { parameters: ImagenModel.imageGenerationParameters( storageURI: gcsURI, generationConfig: generationConfig, - safetySettings: safetySettings + safetySettings: safetySettings, + editingConfig: nil ) ) } + /// **[Public Preview]** Generates an image from a single or set of base images. + /// + /// - Parameters: + /// - referenceImages: The image inputs given to the model as a prompt. + /// - prompt: The text input given to the model as a prompt. + /// - config: The editing configuration settings. + public func editImage(referenceImages: [ImagenReferenceImage], + prompt: String, + config: ImagenEditingConfig? = nil) async throws + -> ImagenGenerationResponse { + let request = ImagenGenerationRequest( + model: modelResourceName, + apiConfig: apiConfig, + options: requestOptions, + instances: [ImageGenerationInstance(prompt: prompt, referenceImages: referenceImages)], + parameters: ImagenModel.imageGenerationParameters( + storageURI: nil, + generationConfig: generationConfig, + safetySettings: safetySettings, + editingConfig: config + ) + ) + return try await generativeAIService.loadRequest(request: request) + } + + /// **[Public Preview]** Generates an image by inpainting a masked off part of a base image. + /// + /// Inpainting is the process of filling in missing or masked off parts of the image using + /// context from the original image and prompt. + /// + /// - Parameters: + /// - image: The base image. + /// - prompt: The text input given to the model as a prompt. + /// - mask: The mask which defines where in the image can be painted by Imagen. + /// - config: The editing configuration settings, it should include an ``ImagenEditMode``. + public func inpaintImage(image: ImagenInlineImage, + prompt: String, + mask: ImagenMaskReference, + editSteps: Int? = nil) async throws + -> ImagenGenerationResponse { + do { + return try await editImage( + referenceImages: [ImagenRawImage(data: image.data), mask], + prompt: prompt, + config: ImagenEditingConfig(editMode: .inpaint, editSteps: editSteps) + ) + } catch let backendError as BackendError { + if backendError.message.contains("not supported for this model") { + throw APINotAvailableOnModelError(message: backendError.message) + } else { + throw backendError + } + } + } + + /// **[Public Preview]** Generates an image by outpainting the given image. + /// + /// Outpainting extends the image content beyond the original borders using context from the + /// original image, and optionally, the prompt. + /// + /// - Parameters: + /// - image: The base image. + /// - newDimensions: The new dimensions for the image, *must* be larger than the original + /// image. + /// - newPosition: The placement of the base image within the new image. This can either be + /// coordinates (0,0 is the top left corner) or an alignment (ex: + /// ``ImagenImagePlacement/bottomCenter``). + /// - prompt: Optional, can be used to specify the background generated if context is + /// insufficient. + /// - config: The editing configuration settings. + public func outpaintImage(image: ImagenInlineImage, + newDimensions: Dimensions, + newPosition: ImagenImagePlacement = .center, + prompt: String = "", + editSteps: Int? = nil) async throws + -> ImagenGenerationResponse { + do { + let referenceImages = try ImagenMaskReference.generateMaskAndPadForOutpainting( + image: image, + newDimensions: newDimensions, + newPosition: newPosition + ) + return try await editImage( + referenceImages: referenceImages, + prompt: prompt, + config: ImagenEditingConfig(editMode: .outpaint, editSteps: editSteps) + ) + } catch let backendError as BackendError { + if backendError.message.contains("not supported for this model") { + throw APINotAvailableOnModelError(message: backendError.message) + } else { + throw backendError + } + } + } + func generateImages(prompt: String, parameters: ImageGenerationParameters) async throws -> ImagenGenerationResponse where T: Decodable, T: ImagenImageRepresentable { @@ -134,7 +232,7 @@ public final class ImagenModel { model: modelResourceName, apiConfig: apiConfig, options: requestOptions, - instances: [ImageGenerationInstance(prompt: prompt)], + instances: [ImageGenerationInstance(prompt: prompt, referenceImages: nil)], parameters: parameters ) @@ -143,7 +241,8 @@ public final class ImagenModel { static func imageGenerationParameters(storageURI: String?, generationConfig: ImagenGenerationConfig?, - safetySettings: ImagenSafetySettings?) + safetySettings: ImagenSafetySettings?, + editingConfig: ImagenEditingConfig? = nil) -> ImageGenerationParameters { return ImageGenerationParameters( sampleCount: generationConfig?.numberOfImages ?? 1, @@ -159,7 +258,11 @@ public final class ImagenModel { ) }, addWatermark: generationConfig?.addWatermark, - includeResponsibleAIFilterReason: true + includeResponsibleAIFilterReason: true, + editMode: editingConfig?.editMode.rawValue, + editConfig: editingConfig.map { + ImageEditingParameters(editSteps: $0.editSteps) + } ) } } diff --git a/FirebaseAI/Sources/Types/Public/Imagen/ImagenRawImage.swift b/FirebaseAI/Sources/Types/Public/Imagen/ImagenRawImage.swift new file mode 100644 index 00000000000..0365ebdaebc --- /dev/null +++ b/FirebaseAI/Sources/Types/Public/Imagen/ImagenRawImage.swift @@ -0,0 +1,35 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +/// A reference image based on raw image data. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +public struct ImagenRawImage: ImagenReferenceImage, Encodable { + /// The raw image data. + public let data: Data + + public init(data: Data) { + self.data = data + } + + enum CodingKeys: String, CodingKey { + case data = "bytesBase64Encoded" + } + + public func encode(to encoder: Encoder) throws { + var container = encoder.container(keyedBy: CodingKeys.self) + try container.encode(data.base64EncodedString(), forKey: .data) + } +} diff --git a/FirebaseAI/Sources/Types/Public/Imagen/ImagenReferenceImage.swift b/FirebaseAI/Sources/Types/Public/Imagen/ImagenReferenceImage.swift new file mode 100644 index 00000000000..45b28c023b3 --- /dev/null +++ b/FirebaseAI/Sources/Types/Public/Imagen/ImagenReferenceImage.swift @@ -0,0 +1,19 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +/// A type that represents a reference image for Imagen editing. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +public protocol ImagenReferenceImage: Sendable {} diff --git a/FirebaseAI/Tests/Unit/ImagenMaskReferenceTests.swift b/FirebaseAI/Tests/Unit/ImagenMaskReferenceTests.swift new file mode 100644 index 00000000000..a0d80756bf9 --- /dev/null +++ b/FirebaseAI/Tests/Unit/ImagenMaskReferenceTests.swift @@ -0,0 +1,105 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import XCTest + +@testable import FirebaseAI + +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +final class ImagenMaskReferenceTests: XCTestCase { + func testGenerateMaskAndPadForOutpainting() throws { + // Setup + let originalWidth = 100 + let originalHeight = 100 + let newWidth = 200 + let newHeight = 200 + let newDimensions = Dimensions(width: newWidth, height: newHeight) + let image = ImagenInlineImage( + mimeType: "image/png", + data: createDummyImageData(width: originalWidth, height: originalHeight) + ) + + // Act + let referenceImages = try ImagenMaskReference.generateMaskAndPadForOutpainting( + image: image, + newDimensions: newDimensions, + newPosition: .center + ) + + // Assert + XCTAssertEqual(referenceImages.count, 2) + + let paddedImage = try XCTUnwrap(referenceImages[0] as? ImagenRawImage) + let mask = try XCTUnwrap(referenceImages[1] as? ImagenMaskReference) + + let paddedCGImage = try XCTUnwrap(CGImage.fromData(paddedImage.data)) + XCTAssertEqual(paddedCGImage.width, newWidth) + XCTAssertEqual(paddedCGImage.height, newHeight) + + let maskCGImage = try XCTUnwrap(CGImage.fromData(mask.data)) + XCTAssertEqual(maskCGImage.width, newWidth) + XCTAssertEqual(maskCGImage.height, newHeight) + } + + func testGenerateMaskAndPadForOutpainting_invalidData() { + // Setup + let newDimensions = Dimensions(width: 200, height: 200) + let image = ImagenInlineImage(mimeType: "image/png", data: Data()) + + // Act & Assert + XCTAssertThrowsError(try ImagenMaskReference.generateMaskAndPadForOutpainting( + image: image, + newDimensions: newDimensions, + newPosition: .center + )) { error in + XCTAssertEqual(error as? ImagenMaskReference.OutpaintingError, .invalidImageData) + } + } + + func testGenerateMaskAndPadForOutpainting_dimensionsTooSmall() { + // Setup + let newDimensions = Dimensions(width: 50, height: 50) + let image = ImagenInlineImage( + mimeType: "image/png", + data: createDummyImageData(width: 100, height: 100) + ) + + // Act & Assert + XCTAssertThrowsError(try ImagenMaskReference.generateMaskAndPadForOutpainting( + image: image, + newDimensions: newDimensions, + newPosition: .center + )) { error in + XCTAssertEqual(error as? ImagenMaskReference.OutpaintingError, .dimensionsTooSmall) + } + } + + private func createDummyImageData(width: Int, height: Int) -> Data { + let colorSpace = CGColorSpaceCreateDeviceRGB() + let bitmapInfo = CGImageAlphaInfo.premultipliedLast.rawValue + let context = CGContext( + data: nil, + width: width, + height: height, + bitsPerComponent: 8, + bytesPerRow: 0, + space: colorSpace, + bitmapInfo: bitmapInfo + )! + context.setFillColor(red: 1, green: 1, blue: 1, alpha: 1) + context.fill(CGRect(x: 0, y: 0, width: width, height: height)) + let cgImage = context.makeImage()! + return cgImage.toData()! + } +}