remove edit and upscale

MarkDaoust · MarkDaoust · commit 7f6ca1d12252 · 2024-09-25T15:02:31.000-07:00
Change-Id: Ic9c270279ee020baef2c3b2117199ff17b066d88
diff --git a/google/generativeai/vision_models/__init__.py b/google/generativeai/vision_models/__init__.py
@@ -15,12 +15,14 @@
 """Classes for working with vision models."""
 
 from google.generativeai.vision_models._vision_models import (
+    Image,
     GeneratedImage,
     ImageGenerationModel,
     ImageGenerationResponse,
 )
 
 __all__ = [
+    "Image",
     "GeneratedImage",
     "ImageGenerationModel",
     "ImageGenerationResponse",
diff --git a/google/generativeai/vision_models/_vision_models.py b/google/generativeai/vision_models/_vision_models.py
@@ -227,20 +227,6 @@ def _generate_images(
         height: Optional[int] = None,
         aspect_ratio: Optional[Literal["1:1", "9:16", "16:9", "4:3", "3:4"]] = None,
         guidance_scale: Optional[float] = None,
-        base_image: Optional["Image"] = None,
-        mask: Optional["Image"] = None,
-        edit_mode: Optional[
-            Literal[
-                "inpainting-insert",
-                "inpainting-remove",
-                "outpainting",
-                "product-image",
-            ]
-        ] = None,
-        mask_mode: Optional[Literal["background", "foreground", "semantic"]] = None,
-        segmentation_classes: Optional[List[str]] = None,
-        mask_dilation: Optional[float] = None,
-        product_position: Optional[Literal["fixed", "reposition"]] = None,
         output_mime_type: Optional[Literal["image/png", "image/jpeg"]] = None,
         compression_quality: Optional[float] = None,
         language: Optional[str] = None,
@@ -267,30 +253,6 @@ def _generate_images(
             guidance_scale: Controls the strength of the prompt. Suggested values
               are - * 0-9 (low strength) * 10-20 (medium strength) * 21+ (high
               strength)
-            base_image: Base image to use for the image generation.
-            mask: Mask for the base image.
-            edit_mode: Describes the editing mode for the request. Supported values
-              are - * inpainting-insert: fills the mask area based on the text
-              prompt (requires mask and text) * inpainting-remove: removes the
-              object(s) in the mask area. (requires mask)
-                * outpainting: extend the image based on the mask area. (Requires
-                  mask) * product-image: Changes the background for the predominant
-                  product or subject in the image
-            mask_mode: Solicits generation of the mask (v/s providing mask as an
-              input). Supported values are:
-                * background: Automatically generates a mask for all regions except
-                  the primary subject(s) of the image
-                * foreground: Automatically generates a mask for the primary
-                  subjects(s) of the image.
-                * semantic: Segment one or more of the segmentation classes using
-                  class ID
-            segmentation_classes: List of class IDs for segmentation. Max of 5 IDs
-            mask_dilation: Defines the dilation percentage of the mask provided.
-              Float between 0 and 1. Defaults to 0.03
-            product_position: Defines whether the product should stay fixed or be
-              repositioned. Supported Values:
-                * fixed: Fixed position
-                * reposition: Can be moved (default)
             output_mime_type: Which image format should the output be saved as.
               Supported values: * image/png: Save as a PNG image * image/jpeg: Save
               as a JPEG image
@@ -325,24 +287,6 @@ class ID
             "number_of_images_in_batch": number_of_images,
         }
 
-        if base_image:
-            instance["image"] = {
-                "bytesBase64Encoded": base_image._as_base64_string()  # pylint: disable=protected-access
-            }
-            shared_generation_parameters["base_image_hash"] = hashlib.sha1(
-                base_image._image_bytes  # pylint: disable=protected-access
-            ).hexdigest()
-
-        if mask:
-            instance["mask"] = {
-                "image": {
-                    "bytesBase64Encoded": mask._as_base64_string()  # pylint: disable=protected-access
-                },
-            }
-            shared_generation_parameters["mask_hash"] = hashlib.sha1(
-                mask._image_bytes  # pylint: disable=protected-access
-            ).hexdigest()
-
         parameters = {}
         max_size = max(width or 0, height or 0) or None
         if aspect_ratio is not None:
@@ -366,29 +310,6 @@ class ID
             parameters["language"] = language
             shared_generation_parameters["language"] = language
 
-        parameters["editConfig"] = {}
-        if edit_mode is not None:
-            parameters["editConfig"]["editMode"] = edit_mode
-            shared_generation_parameters["edit_mode"] = edit_mode
-
-        if mask is None and edit_mode != "product-image":
-            parameters["editConfig"]["maskMode"] = {}
-            if mask_mode is not None:
-                parameters["editConfig"]["maskMode"]["maskType"] = mask_mode
-                shared_generation_parameters["mask_mode"] = mask_mode
-
-            if segmentation_classes is not None:
-                parameters["editConfig"]["maskMode"]["classes"] = segmentation_classes
-                shared_generation_parameters["classes"] = segmentation_classes
-
-        if mask_dilation is not None:
-            parameters["editConfig"]["maskDilation"] = mask_dilation
-            shared_generation_parameters["mask_dilation"] = mask_dilation
-
-        if product_position is not None:
-            parameters["editConfig"]["productPosition"] = product_position
-            shared_generation_parameters["product_position"] = product_position
-
         parameters["outputOptions"] = {}
         if output_mime_type is not None:
             parameters["outputOptions"]["mimeType"] = output_mime_type
@@ -489,243 +410,6 @@ def generate_images(
             person_generation=person_generation,
         )
 
-    def edit_image(
-        self,
-        *,
-        prompt: str,
-        base_image: "Image",
-        mask: Optional["Image"] = None,
-        negative_prompt: Optional[str] = None,
-        number_of_images: int = 1,
-        guidance_scale: Optional[float] = None,
-        edit_mode: Optional[
-            Literal["inpainting-insert", "inpainting-remove", "outpainting", "product-image"]
-        ] = None,
-        mask_mode: Optional[Literal["background", "foreground", "semantic"]] = None,
-        segmentation_classes: Optional[List[str]] = None,
-        mask_dilation: Optional[float] = None,
-        product_position: Optional[Literal["fixed", "reposition"]] = None,
-        output_mime_type: Optional[Literal["image/png", "image/jpeg"]] = None,
-        compression_quality: Optional[float] = None,
-        language: Optional[str] = None,
-        safety_filter_level: Optional[
-            Literal["block_most", "block_some", "block_few", "block_fewest"]
-        ] = None,
-        person_generation: Optional[Literal["dont_allow", "allow_adult", "allow_all"]] = None,
-    ) -> "ImageGenerationResponse":
-        """Edits an existing image based on text prompt.
-
-        Args:
-            prompt: Text prompt for the image.
-            base_image: Base image from which to generate the new image.
-            mask: Mask for the base image.
-            negative_prompt: A description of what you want to omit in
-                the generated images.
-            number_of_images: Number of images to generate. Range: 1..8.
-            guidance_scale: Controls the strength of the prompt.
-                Suggested values are:
-                * 0-9 (low strength)
-                * 10-20 (medium strength)
-                * 21+ (high strength)
-            edit_mode: Describes the editing mode for the request. Supported values are:
-                * inpainting-insert: fills the mask area based on the text prompt
-                (requires mask and text)
-                * inpainting-remove: removes the object(s) in the mask area.
-                (requires mask)
-                * outpainting: extend the image based on the mask area.
-                (Requires mask)
-                * product-image: Changes the background for the predominant product
-                or subject in the image
-            mask_mode: Solicits generation of the mask (v/s providing mask as an
-                input). Supported values are:
-                * background: Automatically generates a mask for all regions except
-                the primary subject(s) of the image
-                * foreground: Automatically generates a mask for the primary
-                subjects(s) of the image.
-                * semantic: Segment one or more of the segmentation classes using
-                class ID
-            segmentation_classes: List of class IDs for segmentation. Max of 5 IDs
-            mask_dilation: Defines the dilation percentage of the mask provided.
-                Float between 0 and 1. Defaults to 0.03
-            product_position: Defines whether the product should stay fixed or be
-                repositioned. Supported Values:
-                * fixed: Fixed position
-                * reposition: Can be moved (default)
-            output_mime_type: Which image format should the output be saved as.
-                Supported values:
-                * image/png: Save as a PNG image
-                * image/jpeg: Save as a JPEG image
-            compression_quality: Level of compression if the output mime type is
-              selected to be image/jpeg. Float between 0 to 100
-            language: Language of the text prompt for the image. Default: None.
-                Supported values are `"en"` for English, `"hi"` for Hindi,
-                `"ja"` for Japanese, `"ko"` for Korean, and `"auto"` for
-                automatic language detection.
-            safety_filter_level: Adds a filter level to Safety filtering. Supported
-                values are:
-                * "block_most" : Strongest filtering level, most strict
-                blocking
-                * "block_some" : Block some problematic prompts and responses
-                * "block_few" : Block fewer problematic prompts and responses
-                * "block_fewest" : Block very few problematic prompts and responses
-            person_generation: Allow generation of people by the model Supported
-                values are:
-                * "dont_allow" : Block generation of people
-                * "allow_adult" : Generate adults, but not children
-                * "allow_all" : Generate adults and children
-
-        Returns:
-            An `ImageGenerationResponse` object.
-        """
-        return self._generate_images(
-            prompt=prompt,
-            negative_prompt=negative_prompt,
-            number_of_images=number_of_images,
-            guidance_scale=guidance_scale,
-            base_image=base_image,
-            mask=mask,
-            edit_mode=edit_mode,
-            mask_mode=mask_mode,
-            segmentation_classes=segmentation_classes,
-            mask_dilation=mask_dilation,
-            product_position=product_position,
-            output_mime_type=output_mime_type,
-            compression_quality=compression_quality,
-            language=language,
-            safety_filter_level=safety_filter_level,
-            person_generation=person_generation,
-        )
-
-    def upscale_image(
-        self,
-        image: Union["Image", "GeneratedImage"],
-        new_size: Optional[int] = None,
-        upscale_factor: Optional[Literal["x2", "x4"]] = None,
-        output_mime_type: Optional[Literal["image/png", "image/jpeg"]] = "image/png",
-        output_compression_quality: Optional[int] = None,
-    ) -> "Image":
-        """Upscales an image.
-
-        This supports upscaling images generated through the `generate_images()`
-        method, or upscaling a new image.
-
-        Examples::
-
-            # Upscale a generated image
-            model = ImageGenerationModel.from_pretrained("imagegeneration@002")
-            response = model.generate_images(
-                prompt="Astronaut riding a horse",
-            )
-            model.upscale_image(image=response[0])
-
-            # Upscale a new 1024x1024 image
-            my_image = Image.load_from_file("my-image.png")
-            model.upscale_image(image=my_image)
-
-            # Upscale a new arbitrary sized image using a x2 or x4 upscaling factor
-            my_image = Image.load_from_file("my-image.png")
-            model.upscale_image(image=my_image, upscale_factor="x2")
-
-            # Upscale an image and get the result in JPEG format
-            my_image = Image.load_from_file("my-image.png")
-            model.upscale_image(image=my_image, output_mime_type="image/jpeg",
-            output_compression_quality=90)
-
-        Args:
-            image (Union[GeneratedImage, Image]): Required. The generated image
-                to upscale.
-            new_size (int): The size of the biggest dimension of the upscaled
-                image.
-                Only 2048 and 4096 are currently supported. Results in a
-                2048x2048 or 4096x4096 image. Defaults to 2048 if not provided.
-            upscale_factor: The upscaling factor. Supported values are "x2" and
-                "x4". Defaults to None.
-            output_mime_type: The mime type of the output image. Supported values
-                are "image/png" and "image/jpeg". Defaults to "image/png".
-            output_compression_quality: The compression quality of the output
-                image
-                as an int (0-100). Only applicable if the output mime type is
-                "image/jpeg". Defaults to None.
-
-        Returns:
-            An `Image` object.
-        """
-        if self._client is None:
-            self._client = client.get_default_prediction_client()
-
-        target_image_size = new_size if new_size else None
-        longest_dim = max(image._size[0], image._size[1])
-
-        if not new_size and not upscale_factor:
-            raise ValueError("Either new_size or upscale_factor must be provided.")
-
-        if not upscale_factor:
-            x2_factor = 2.0
-            x4_factor = 4.0
-            epsilon = 0.1
-            is_upscaling_x2_request = abs(new_size / longest_dim - x2_factor) < epsilon
-            is_upscaling_x4_request = abs(new_size / longest_dim - x4_factor) < epsilon
-
-            if not is_upscaling_x2_request and not is_upscaling_x4_request:
-                raise ValueError(
-                    "Only x2 and x4 upscaling are currently supported. Requested"
-                    f" upscaling factor: {new_size / longest_dim}"
-                )
-        else:
-            if upscale_factor == "x2":
-                target_image_size = longest_dim * 2
-            else:
-                target_image_size = longest_dim * 4
-        if new_size not in _SUPPORTED_UPSCALING_SIZES:
-            raise ValueError(
-                "Only the folowing square upscaling sizes are currently supported:"
-                f" {_SUPPORTED_UPSCALING_SIZES}."
-            )
-
-        instance = {"prompt": ""}
-
-        instance["image"] = {
-            "bytesBase64Encoded": image._as_base64_string()  # pylint: disable=protected-access
-        }
-
-        parameters = {
-            "sampleCount": 1,
-            "mode": "upscale",
-        }
-
-        if upscale_factor:
-            parameters["upscaleConfig"] = {"upscaleFactor": upscale_factor}
-
-        else:
-            parameters["sampleImageSize"] = str(new_size)
-
-        parameters["outputOptions"] = {"mimeType": output_mime_type}
-        if output_mime_type == "image/jpeg" and output_compression_quality is not None:
-            parameters["outputOptions"]["compressionQuality"] = output_compression_quality
-
-
-        pr = protos.PredictRequest.pb()
-        request = pr(
-            model=self.model_name, instances=[to_value(instance)], parameters=to_value(parameters)
-        )
-        response = self._client.predict(request)
-
-        upscaled_image = response.predictions[0]
-
-        if isinstance(image, GeneratedImage):
-            generation_parameters = image.generation_parameters
-
-        else:
-            generation_parameters = {}
-
-        generation_parameters["upscaled_image_size"] = target_image_size
-
-        encoded_bytes = upscaled_image.get("bytesBase64Encoded")
-        return GeneratedImage(
-            image_bytes=base64.b64decode(encoded_bytes) if encoded_bytes else None,
-            generation_parameters=generation_parameters,
-        )
-
 
 @dataclasses.dataclass
 class ImageGenerationResponse: