Skip to content

Commit 7f6ca1d

Browse files
committed
remove edit and upscale
Change-Id: Ic9c270279ee020baef2c3b2117199ff17b066d88
1 parent 50ed7db commit 7f6ca1d

File tree

2 files changed

+2
-316
lines changed

2 files changed

+2
-316
lines changed

google/generativeai/vision_models/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,14 @@
1515
"""Classes for working with vision models."""
1616

1717
from google.generativeai.vision_models._vision_models import (
18+
Image,
1819
GeneratedImage,
1920
ImageGenerationModel,
2021
ImageGenerationResponse,
2122
)
2223

2324
__all__ = [
25+
"Image",
2426
"GeneratedImage",
2527
"ImageGenerationModel",
2628
"ImageGenerationResponse",

google/generativeai/vision_models/_vision_models.py

Lines changed: 0 additions & 316 deletions
Original file line numberDiff line numberDiff line change
@@ -227,20 +227,6 @@ def _generate_images(
227227
height: Optional[int] = None,
228228
aspect_ratio: Optional[Literal["1:1", "9:16", "16:9", "4:3", "3:4"]] = None,
229229
guidance_scale: Optional[float] = None,
230-
base_image: Optional["Image"] = None,
231-
mask: Optional["Image"] = None,
232-
edit_mode: Optional[
233-
Literal[
234-
"inpainting-insert",
235-
"inpainting-remove",
236-
"outpainting",
237-
"product-image",
238-
]
239-
] = None,
240-
mask_mode: Optional[Literal["background", "foreground", "semantic"]] = None,
241-
segmentation_classes: Optional[List[str]] = None,
242-
mask_dilation: Optional[float] = None,
243-
product_position: Optional[Literal["fixed", "reposition"]] = None,
244230
output_mime_type: Optional[Literal["image/png", "image/jpeg"]] = None,
245231
compression_quality: Optional[float] = None,
246232
language: Optional[str] = None,
@@ -267,30 +253,6 @@ def _generate_images(
267253
guidance_scale: Controls the strength of the prompt. Suggested values
268254
are - * 0-9 (low strength) * 10-20 (medium strength) * 21+ (high
269255
strength)
270-
base_image: Base image to use for the image generation.
271-
mask: Mask for the base image.
272-
edit_mode: Describes the editing mode for the request. Supported values
273-
are - * inpainting-insert: fills the mask area based on the text
274-
prompt (requires mask and text) * inpainting-remove: removes the
275-
object(s) in the mask area. (requires mask)
276-
* outpainting: extend the image based on the mask area. (Requires
277-
mask) * product-image: Changes the background for the predominant
278-
product or subject in the image
279-
mask_mode: Solicits generation of the mask (v/s providing mask as an
280-
input). Supported values are:
281-
* background: Automatically generates a mask for all regions except
282-
the primary subject(s) of the image
283-
* foreground: Automatically generates a mask for the primary
284-
subjects(s) of the image.
285-
* semantic: Segment one or more of the segmentation classes using
286-
class ID
287-
segmentation_classes: List of class IDs for segmentation. Max of 5 IDs
288-
mask_dilation: Defines the dilation percentage of the mask provided.
289-
Float between 0 and 1. Defaults to 0.03
290-
product_position: Defines whether the product should stay fixed or be
291-
repositioned. Supported Values:
292-
* fixed: Fixed position
293-
* reposition: Can be moved (default)
294256
output_mime_type: Which image format should the output be saved as.
295257
Supported values: * image/png: Save as a PNG image * image/jpeg: Save
296258
as a JPEG image
@@ -325,24 +287,6 @@ class ID
325287
"number_of_images_in_batch": number_of_images,
326288
}
327289

328-
if base_image:
329-
instance["image"] = {
330-
"bytesBase64Encoded": base_image._as_base64_string() # pylint: disable=protected-access
331-
}
332-
shared_generation_parameters["base_image_hash"] = hashlib.sha1(
333-
base_image._image_bytes # pylint: disable=protected-access
334-
).hexdigest()
335-
336-
if mask:
337-
instance["mask"] = {
338-
"image": {
339-
"bytesBase64Encoded": mask._as_base64_string() # pylint: disable=protected-access
340-
},
341-
}
342-
shared_generation_parameters["mask_hash"] = hashlib.sha1(
343-
mask._image_bytes # pylint: disable=protected-access
344-
).hexdigest()
345-
346290
parameters = {}
347291
max_size = max(width or 0, height or 0) or None
348292
if aspect_ratio is not None:
@@ -366,29 +310,6 @@ class ID
366310
parameters["language"] = language
367311
shared_generation_parameters["language"] = language
368312

369-
parameters["editConfig"] = {}
370-
if edit_mode is not None:
371-
parameters["editConfig"]["editMode"] = edit_mode
372-
shared_generation_parameters["edit_mode"] = edit_mode
373-
374-
if mask is None and edit_mode != "product-image":
375-
parameters["editConfig"]["maskMode"] = {}
376-
if mask_mode is not None:
377-
parameters["editConfig"]["maskMode"]["maskType"] = mask_mode
378-
shared_generation_parameters["mask_mode"] = mask_mode
379-
380-
if segmentation_classes is not None:
381-
parameters["editConfig"]["maskMode"]["classes"] = segmentation_classes
382-
shared_generation_parameters["classes"] = segmentation_classes
383-
384-
if mask_dilation is not None:
385-
parameters["editConfig"]["maskDilation"] = mask_dilation
386-
shared_generation_parameters["mask_dilation"] = mask_dilation
387-
388-
if product_position is not None:
389-
parameters["editConfig"]["productPosition"] = product_position
390-
shared_generation_parameters["product_position"] = product_position
391-
392313
parameters["outputOptions"] = {}
393314
if output_mime_type is not None:
394315
parameters["outputOptions"]["mimeType"] = output_mime_type
@@ -489,243 +410,6 @@ def generate_images(
489410
person_generation=person_generation,
490411
)
491412

492-
def edit_image(
493-
self,
494-
*,
495-
prompt: str,
496-
base_image: "Image",
497-
mask: Optional["Image"] = None,
498-
negative_prompt: Optional[str] = None,
499-
number_of_images: int = 1,
500-
guidance_scale: Optional[float] = None,
501-
edit_mode: Optional[
502-
Literal["inpainting-insert", "inpainting-remove", "outpainting", "product-image"]
503-
] = None,
504-
mask_mode: Optional[Literal["background", "foreground", "semantic"]] = None,
505-
segmentation_classes: Optional[List[str]] = None,
506-
mask_dilation: Optional[float] = None,
507-
product_position: Optional[Literal["fixed", "reposition"]] = None,
508-
output_mime_type: Optional[Literal["image/png", "image/jpeg"]] = None,
509-
compression_quality: Optional[float] = None,
510-
language: Optional[str] = None,
511-
safety_filter_level: Optional[
512-
Literal["block_most", "block_some", "block_few", "block_fewest"]
513-
] = None,
514-
person_generation: Optional[Literal["dont_allow", "allow_adult", "allow_all"]] = None,
515-
) -> "ImageGenerationResponse":
516-
"""Edits an existing image based on text prompt.
517-
518-
Args:
519-
prompt: Text prompt for the image.
520-
base_image: Base image from which to generate the new image.
521-
mask: Mask for the base image.
522-
negative_prompt: A description of what you want to omit in
523-
the generated images.
524-
number_of_images: Number of images to generate. Range: 1..8.
525-
guidance_scale: Controls the strength of the prompt.
526-
Suggested values are:
527-
* 0-9 (low strength)
528-
* 10-20 (medium strength)
529-
* 21+ (high strength)
530-
edit_mode: Describes the editing mode for the request. Supported values are:
531-
* inpainting-insert: fills the mask area based on the text prompt
532-
(requires mask and text)
533-
* inpainting-remove: removes the object(s) in the mask area.
534-
(requires mask)
535-
* outpainting: extend the image based on the mask area.
536-
(Requires mask)
537-
* product-image: Changes the background for the predominant product
538-
or subject in the image
539-
mask_mode: Solicits generation of the mask (v/s providing mask as an
540-
input). Supported values are:
541-
* background: Automatically generates a mask for all regions except
542-
the primary subject(s) of the image
543-
* foreground: Automatically generates a mask for the primary
544-
subjects(s) of the image.
545-
* semantic: Segment one or more of the segmentation classes using
546-
class ID
547-
segmentation_classes: List of class IDs for segmentation. Max of 5 IDs
548-
mask_dilation: Defines the dilation percentage of the mask provided.
549-
Float between 0 and 1. Defaults to 0.03
550-
product_position: Defines whether the product should stay fixed or be
551-
repositioned. Supported Values:
552-
* fixed: Fixed position
553-
* reposition: Can be moved (default)
554-
output_mime_type: Which image format should the output be saved as.
555-
Supported values:
556-
* image/png: Save as a PNG image
557-
* image/jpeg: Save as a JPEG image
558-
compression_quality: Level of compression if the output mime type is
559-
selected to be image/jpeg. Float between 0 to 100
560-
language: Language of the text prompt for the image. Default: None.
561-
Supported values are `"en"` for English, `"hi"` for Hindi,
562-
`"ja"` for Japanese, `"ko"` for Korean, and `"auto"` for
563-
automatic language detection.
564-
safety_filter_level: Adds a filter level to Safety filtering. Supported
565-
values are:
566-
* "block_most" : Strongest filtering level, most strict
567-
blocking
568-
* "block_some" : Block some problematic prompts and responses
569-
* "block_few" : Block fewer problematic prompts and responses
570-
* "block_fewest" : Block very few problematic prompts and responses
571-
person_generation: Allow generation of people by the model Supported
572-
values are:
573-
* "dont_allow" : Block generation of people
574-
* "allow_adult" : Generate adults, but not children
575-
* "allow_all" : Generate adults and children
576-
577-
Returns:
578-
An `ImageGenerationResponse` object.
579-
"""
580-
return self._generate_images(
581-
prompt=prompt,
582-
negative_prompt=negative_prompt,
583-
number_of_images=number_of_images,
584-
guidance_scale=guidance_scale,
585-
base_image=base_image,
586-
mask=mask,
587-
edit_mode=edit_mode,
588-
mask_mode=mask_mode,
589-
segmentation_classes=segmentation_classes,
590-
mask_dilation=mask_dilation,
591-
product_position=product_position,
592-
output_mime_type=output_mime_type,
593-
compression_quality=compression_quality,
594-
language=language,
595-
safety_filter_level=safety_filter_level,
596-
person_generation=person_generation,
597-
)
598-
599-
def upscale_image(
600-
self,
601-
image: Union["Image", "GeneratedImage"],
602-
new_size: Optional[int] = None,
603-
upscale_factor: Optional[Literal["x2", "x4"]] = None,
604-
output_mime_type: Optional[Literal["image/png", "image/jpeg"]] = "image/png",
605-
output_compression_quality: Optional[int] = None,
606-
) -> "Image":
607-
"""Upscales an image.
608-
609-
This supports upscaling images generated through the `generate_images()`
610-
method, or upscaling a new image.
611-
612-
Examples::
613-
614-
# Upscale a generated image
615-
model = ImageGenerationModel.from_pretrained("imagegeneration@002")
616-
response = model.generate_images(
617-
prompt="Astronaut riding a horse",
618-
)
619-
model.upscale_image(image=response[0])
620-
621-
# Upscale a new 1024x1024 image
622-
my_image = Image.load_from_file("my-image.png")
623-
model.upscale_image(image=my_image)
624-
625-
# Upscale a new arbitrary sized image using a x2 or x4 upscaling factor
626-
my_image = Image.load_from_file("my-image.png")
627-
model.upscale_image(image=my_image, upscale_factor="x2")
628-
629-
# Upscale an image and get the result in JPEG format
630-
my_image = Image.load_from_file("my-image.png")
631-
model.upscale_image(image=my_image, output_mime_type="image/jpeg",
632-
output_compression_quality=90)
633-
634-
Args:
635-
image (Union[GeneratedImage, Image]): Required. The generated image
636-
to upscale.
637-
new_size (int): The size of the biggest dimension of the upscaled
638-
image.
639-
Only 2048 and 4096 are currently supported. Results in a
640-
2048x2048 or 4096x4096 image. Defaults to 2048 if not provided.
641-
upscale_factor: The upscaling factor. Supported values are "x2" and
642-
"x4". Defaults to None.
643-
output_mime_type: The mime type of the output image. Supported values
644-
are "image/png" and "image/jpeg". Defaults to "image/png".
645-
output_compression_quality: The compression quality of the output
646-
image
647-
as an int (0-100). Only applicable if the output mime type is
648-
"image/jpeg". Defaults to None.
649-
650-
Returns:
651-
An `Image` object.
652-
"""
653-
if self._client is None:
654-
self._client = client.get_default_prediction_client()
655-
656-
target_image_size = new_size if new_size else None
657-
longest_dim = max(image._size[0], image._size[1])
658-
659-
if not new_size and not upscale_factor:
660-
raise ValueError("Either new_size or upscale_factor must be provided.")
661-
662-
if not upscale_factor:
663-
x2_factor = 2.0
664-
x4_factor = 4.0
665-
epsilon = 0.1
666-
is_upscaling_x2_request = abs(new_size / longest_dim - x2_factor) < epsilon
667-
is_upscaling_x4_request = abs(new_size / longest_dim - x4_factor) < epsilon
668-
669-
if not is_upscaling_x2_request and not is_upscaling_x4_request:
670-
raise ValueError(
671-
"Only x2 and x4 upscaling are currently supported. Requested"
672-
f" upscaling factor: {new_size / longest_dim}"
673-
)
674-
else:
675-
if upscale_factor == "x2":
676-
target_image_size = longest_dim * 2
677-
else:
678-
target_image_size = longest_dim * 4
679-
if new_size not in _SUPPORTED_UPSCALING_SIZES:
680-
raise ValueError(
681-
"Only the folowing square upscaling sizes are currently supported:"
682-
f" {_SUPPORTED_UPSCALING_SIZES}."
683-
)
684-
685-
instance = {"prompt": ""}
686-
687-
instance["image"] = {
688-
"bytesBase64Encoded": image._as_base64_string() # pylint: disable=protected-access
689-
}
690-
691-
parameters = {
692-
"sampleCount": 1,
693-
"mode": "upscale",
694-
}
695-
696-
if upscale_factor:
697-
parameters["upscaleConfig"] = {"upscaleFactor": upscale_factor}
698-
699-
else:
700-
parameters["sampleImageSize"] = str(new_size)
701-
702-
parameters["outputOptions"] = {"mimeType": output_mime_type}
703-
if output_mime_type == "image/jpeg" and output_compression_quality is not None:
704-
parameters["outputOptions"]["compressionQuality"] = output_compression_quality
705-
706-
707-
pr = protos.PredictRequest.pb()
708-
request = pr(
709-
model=self.model_name, instances=[to_value(instance)], parameters=to_value(parameters)
710-
)
711-
response = self._client.predict(request)
712-
713-
upscaled_image = response.predictions[0]
714-
715-
if isinstance(image, GeneratedImage):
716-
generation_parameters = image.generation_parameters
717-
718-
else:
719-
generation_parameters = {}
720-
721-
generation_parameters["upscaled_image_size"] = target_image_size
722-
723-
encoded_bytes = upscaled_image.get("bytesBase64Encoded")
724-
return GeneratedImage(
725-
image_bytes=base64.b64decode(encoded_bytes) if encoded_bytes else None,
726-
generation_parameters=generation_parameters,
727-
)
728-
729413

730414
@dataclasses.dataclass
731415
class ImageGenerationResponse:

0 commit comments

Comments
 (0)