@@ -34,13 +34,75 @@ def remote_decode(
3434 processor : Optional [Union ["VaeImageProcessor" , "VideoProcessor" ]] = None ,
3535 do_scaling : bool = True ,
3636 output_type : Literal ["mp4" , "pil" , "pt" ] = "pil" ,
37+ return_type : Literal ["mp4" , "pil" , "pt" ] = "pil" ,
3738 image_format : Literal ["png" , "jpg" ] = "jpg" ,
3839 partial_postprocess : bool = False ,
3940 input_tensor_type : Literal ["base64" , "binary" ] = "base64" ,
4041 output_tensor_type : Literal ["base64" , "binary" ] = "base64" ,
4142 height : Optional [int ] = None ,
4243 width : Optional [int ] = None ,
4344) -> Union [Image .Image , List [Image .Image ], bytes , "torch.Tensor" ]:
45+ """
46+ Args:
47+ endpoint (`str`):
48+ Endpoint for Remote Decode.
49+ tensor (`torch.Tensor`):
50+ Tensor to be decoded.
51+ processor (`VaeImageProcessor` or `VideoProcessor`, *optional*):
52+ Used with `return_type="pt"`, and `return_type="pil"` for Video models.
53+ do_scaling (`bool`, default `True`, *optional*):
54+ When `True` scaling e.g. `latents / self.vae.config.scaling_factor` is
55+ applied remotely. If `False`, input must be passed with scaling applied.
56+ output_type (`"mp4"` or `"pil"` or `"pt", default `"pil"):
57+ **Endpoint** output type. Subject to change. Report feedback on preferred type.
58+
59+ `"mp4": Supported by video models. Endpoint returns `bytes` of video.
60+ `"pil"`: Supported by image and video models.
61+ Image models: Endpoint returns `bytes` of an image in `image_format`.
62+ Video models: Endpoint returns `torch.Tensor` with partial `postprocessing` applied.
63+ Requires `processor` as a flag (any `None` value will work).
64+ `"pt"`: Support by image and video models. Endpoint returns `torch.Tensor`.
65+ With `partial_postprocess=True` the tensor is postprocessed `uint8` image tensor.
66+
67+ Recommendations:
68+ `"pt"` with `partial_postprocess=True` is the smallest transfer for full quality.
69+ `"pt"` with `partial_postprocess=False` is the most compatible with third party code.
70+ `"pil"` with `image_format="jpg"` is the smallest transfer overall.
71+
72+ return_type (`"mp4"` or `"pil"` or `"pt", default `"pil"):
73+ **Function** return type.
74+
75+ `"mp4": Function returns `bytes` of video.
76+ `"pil"`: Function returns `PIL.Image.Image`.
77+ With `output_type="pil" no further processing is applied.
78+ With `output_type="pt" a `PIL.Image.Image` is created.
79+ `partial_postprocess=False` `processor` is required.
80+ `partial_postprocess=True` `processor` is **not** required.
81+ `"pt"`: Function returns `torch.Tensor`.
82+ `processor` is **not** required.
83+ `partial_postprocess=False` tensor is `float16` or `bfloat16`, without denormalization.
84+ `partial_postprocess=True` tensor is `uint8`, denormalized.
85+
86+ image_format (`"png"` or `"jpg"`, default `jpg`):
87+ Used with `output_type="pil"`. Endpoint returns `jpg` or `png`.
88+
89+ partial_postprocess (`bool`, default `False`):
90+ Used with `output_type="pt"`.
91+ `partial_postprocess=False` tensor is `float16` or `bfloat16`, without denormalization.
92+ `partial_postprocess=True` tensor is `uint8`, denormalized.
93+
94+ input_tensor_type (`"base64"` or `"binary"`, default `"base64"`):
95+ With `"base64"` `tensor` is sent to endpoint base64 encoded. `"binary"` reduces overhead and transfer.
96+
97+ output_tensor_type (`"base64"` or `"binary"`, default `"base64"`):
98+ With `"base64"` `tensor` returned by endpoint is base64 encoded. `"binary"` reduces overhead and transfer.
99+
100+ height (`int`, **optional**):
101+ Required for `"packed"` latents.
102+
103+ width (`int`, **optional**):
104+ Required for `"packed"` latents.
105+ """
44106 if tensor .ndim == 3 and height is None and width is None :
45107 raise ValueError ("`height` and `width` required for packed latents." )
46108 if output_type == "pt" and partial_postprocess is False and processor is None :
0 commit comments