|
19 | 19 | import torch |
20 | 20 |
|
21 | 21 | from ...configuration_utils import FrozenDict |
| 22 | +from ...guiders import ClassifierFreeGuidance |
22 | 23 | from ...image_processor import VaeImageProcessor |
23 | | -from ...models import AutoencoderKL, ControlNetModel, ControlNetUnionModel |
| 24 | +from ...models import AutoencoderKL, ControlNetModel, ControlNetUnionModel, UNet2DConditionModel |
24 | 25 | from ...pipelines.controlnet.multicontrolnet import MultiControlNetModel |
25 | 26 | from ...schedulers import EulerDiscreteScheduler |
26 | 27 | from ...utils import logging |
@@ -266,37 +267,37 @@ def intermediate_outputs(self) -> List[str]: |
266 | 267 | OutputParam( |
267 | 268 | "prompt_embeds", |
268 | 269 | type_hint=torch.Tensor, |
269 | | - kwargs_type="guider_input_fields", |
| 270 | + kwargs_type="guider_input_fields", # already in intermedites state but declare here again for guider_input_fields |
270 | 271 | description="text embeddings used to guide the image generation", |
271 | 272 | ), |
272 | 273 | OutputParam( |
273 | 274 | "negative_prompt_embeds", |
274 | 275 | type_hint=torch.Tensor, |
275 | | - kwargs_type="guider_input_fields", |
| 276 | + kwargs_type="guider_input_fields", # already in intermedites state but declare here again for guider_input_fields |
276 | 277 | description="negative text embeddings used to guide the image generation", |
277 | 278 | ), |
278 | 279 | OutputParam( |
279 | 280 | "pooled_prompt_embeds", |
280 | 281 | type_hint=torch.Tensor, |
281 | | - kwargs_type="guider_input_fields", |
| 282 | + kwargs_type="guider_input_fields", # already in intermedites state but declare here again for guider_input_fields |
282 | 283 | description="pooled text embeddings used to guide the image generation", |
283 | 284 | ), |
284 | 285 | OutputParam( |
285 | 286 | "negative_pooled_prompt_embeds", |
286 | 287 | type_hint=torch.Tensor, |
287 | | - kwargs_type="guider_input_fields", |
| 288 | + kwargs_type="guider_input_fields", # already in intermedites state but declare here again for guider_input_fields |
288 | 289 | description="negative pooled text embeddings used to guide the image generation", |
289 | 290 | ), |
290 | 291 | OutputParam( |
291 | 292 | "ip_adapter_embeds", |
292 | 293 | type_hint=List[torch.Tensor], |
293 | | - kwargs_type="guider_input_fields", |
| 294 | + kwargs_type="guider_input_fields", # already in intermedites state but declare here again for guider_input_fields |
294 | 295 | description="image embeddings for IP-Adapter", |
295 | 296 | ), |
296 | 297 | OutputParam( |
297 | 298 | "negative_ip_adapter_embeds", |
298 | 299 | type_hint=List[torch.Tensor], |
299 | | - kwargs_type="guider_input_fields", |
| 300 | + kwargs_type="guider_input_fields", # already in intermedites state but declare here again for guider_input_fields |
300 | 301 | description="negative image embeddings for IP-Adapter", |
301 | 302 | ), |
302 | 303 | ] |
@@ -683,12 +684,6 @@ def intermediate_outputs(self) -> List[str]: |
683 | 684 | OutputParam( |
684 | 685 | "latents", type_hint=torch.Tensor, description="The initial latents to use for the denoising process" |
685 | 686 | ), |
686 | | - OutputParam("mask", type_hint=torch.Tensor, description="The mask to use for inpainting generation"), |
687 | | - OutputParam( |
688 | | - "masked_image_latents", |
689 | | - type_hint=torch.Tensor, |
690 | | - description="The masked image latents to use for the inpainting generation (only for inpainting-specific unet)", |
691 | | - ), |
692 | 687 | OutputParam( |
693 | 688 | "noise", |
694 | 689 | type_hint=torch.Tensor, |
@@ -993,6 +988,7 @@ class StableDiffusionXLPrepareLatentsStep(PipelineBlock): |
993 | 988 | def expected_components(self) -> List[ComponentSpec]: |
994 | 989 | return [ |
995 | 990 | ComponentSpec("scheduler", EulerDiscreteScheduler), |
| 991 | + ComponentSpec("vae", AutoencoderKL), |
996 | 992 | ] |
997 | 993 |
|
998 | 994 | @property |
@@ -1105,6 +1101,18 @@ def expected_configs(self) -> List[ConfigSpec]: |
1105 | 1101 | ConfigSpec("requires_aesthetics_score", False), |
1106 | 1102 | ] |
1107 | 1103 |
|
| 1104 | + @property |
| 1105 | + def expected_components(self) -> List[ComponentSpec]: |
| 1106 | + return [ |
| 1107 | + ComponentSpec("unet", UNet2DConditionModel), |
| 1108 | + ComponentSpec( |
| 1109 | + "guider", |
| 1110 | + ClassifierFreeGuidance, |
| 1111 | + config=FrozenDict({"guidance_scale": 7.5}), |
| 1112 | + default_creation_method="from_config", |
| 1113 | + ), |
| 1114 | + ] |
| 1115 | + |
1108 | 1116 | @property |
1109 | 1117 | def description(self) -> str: |
1110 | 1118 | return "Step that prepares the additional conditioning for the image-to-image/inpainting generation process" |
@@ -1315,6 +1323,18 @@ class StableDiffusionXLPrepareAdditionalConditioningStep(PipelineBlock): |
1315 | 1323 | def description(self) -> str: |
1316 | 1324 | return "Step that prepares the additional conditioning for the text-to-image generation process" |
1317 | 1325 |
|
| 1326 | + @property |
| 1327 | + def expected_components(self) -> List[ComponentSpec]: |
| 1328 | + return [ |
| 1329 | + ComponentSpec("unet", UNet2DConditionModel), |
| 1330 | + ComponentSpec( |
| 1331 | + "guider", |
| 1332 | + ClassifierFreeGuidance, |
| 1333 | + config=FrozenDict({"guidance_scale": 7.5}), |
| 1334 | + default_creation_method="from_config", |
| 1335 | + ), |
| 1336 | + ] |
| 1337 | + |
1318 | 1338 | @property |
1319 | 1339 | def inputs(self) -> List[Tuple[str, Any]]: |
1320 | 1340 | return [ |
|
0 commit comments