8
8
from diffusers .schedulers .scheduling_utils import SchedulerMixin , SchedulerOutput
9
9
10
10
if TYPE_CHECKING :
11
- from invokeai .backend .stable_diffusion .diffusion .conditioning_data import TextConditioningData
11
+ from invokeai .backend .stable_diffusion .diffusion .conditioning_data import ConditioningMode , TextConditioningData
12
12
13
13
14
14
@dataclass
@@ -31,92 +31,101 @@ class UNetKwargs:
31
31
32
32
@dataclass
33
33
class DenoiseInputs :
34
- """Initial variables passed to denoise. Supposed to be unchanged.
35
-
36
- Variables:
37
- orig_latents: The latent-space image to denoise.
38
- Shape: [batch, channels, latent_height, latent_width]
39
- - If we are inpainting, this is the initial latent image before noise has been added.
40
- - If we are generating a new image, this should be initialized to zeros.
41
- - In some cases, this may be a partially-noised latent image (e.g. when running the SDXL refiner).
42
- scheduler_step_kwargs: kwargs forwarded to the scheduler.step() method.
43
- conditioning_data: Text conditionging data.
44
- noise: Noise used for two purposes:
45
- Shape: [1 or batch, channels, latent_height, latent_width]
46
- 1. Used by the scheduler to noise the initial `latents` before denoising.
47
- 2. Used to noise the `masked_latents` when inpainting.
48
- `noise` should be None if the `latents` tensor has already been noised.
49
- seed: The seed used to generate the noise for the denoising process.
50
- HACK(ryand): seed is only used in a particular case when `noise` is None, but we need to re-generate the
51
- same noise used earlier in the pipeline. This should really be handled in a clearer way.
52
- timesteps: The timestep schedule for the denoising process.
53
- init_timestep: The first timestep in the schedule. This is used to determine the initial noise level, so
54
- should be populated if you want noise applied *even* if timesteps is empty.
55
- attention_processor_cls: Class of attention processor that is used.
56
- """
34
+ """Initial variables passed to denoise. Supposed to be unchanged."""
57
35
36
+ # The latent-space image to denoise.
37
+ # Shape: [batch, channels, latent_height, latent_width]
38
+ # - If we are inpainting, this is the initial latent image before noise has been added.
39
+ # - If we are generating a new image, this should be initialized to zeros.
40
+ # - In some cases, this may be a partially-noised latent image (e.g. when running the SDXL refiner).
58
41
orig_latents : torch .Tensor
42
+
43
+ # kwargs forwarded to the scheduler.step() method.
59
44
scheduler_step_kwargs : dict [str , Any ]
45
+
46
+ # Text conditionging data.
60
47
conditioning_data : TextConditioningData
48
+
49
+ # Noise used for two purposes:
50
+ # 1. Used by the scheduler to noise the initial `latents` before denoising.
51
+ # 2. Used to noise the `masked_latents` when inpainting.
52
+ # `noise` should be None if the `latents` tensor has already been noised.
53
+ # Shape: [1 or batch, channels, latent_height, latent_width]
61
54
noise : Optional [torch .Tensor ]
55
+
56
+ # The seed used to generate the noise for the denoising process.
57
+ # HACK(ryand): seed is only used in a particular case when `noise` is None, but we need to re-generate the
58
+ # same noise used earlier in the pipeline. This should really be handled in a clearer way.
62
59
seed : int
60
+
61
+ # The timestep schedule for the denoising process.
63
62
timesteps : torch .Tensor
63
+
64
+ # The first timestep in the schedule. This is used to determine the initial noise level, so
65
+ # should be populated if you want noise applied *even* if timesteps is empty.
64
66
init_timestep : torch .Tensor
67
+
68
+ # Class of attention processor that is used.
65
69
attention_processor_cls : Type [Any ]
66
70
67
71
68
72
@dataclass
69
73
class DenoiseContext :
70
- """Context with all variables in denoise
71
-
72
- Variables:
73
- inputs: Initial variables passed to denoise. Supposed to be unchanged.
74
- scheduler: Scheduler which used to apply noise predictions.
75
- unet: UNet model.
76
- latents: Current state of latent-space image in denoising process.
77
- None until `pre_denoise_loop` callback.
78
- Shape: [batch, channels, latent_height, latent_width]
79
- step_index: Current denoising step index.
80
- None until `pre_step` callback.
81
- timestep: Current denoising step timestep.
82
- None until `pre_step` callback.
83
- unet_kwargs: Arguments which will be passed to U Net model.
84
- Available in `pre_unet`/`post_unet` callbacks, otherwice will be None.
85
- step_output: SchedulerOutput class returned from step function(normally, generated by scheduler).
86
- Supposed to be used only in `post_step` callback, otherwice can be None.
87
- latent_model_input: Scaled version of `latents`, which will be passed to unet_kwargs initialization.
88
- Available in events inside step(between `pre_step` and `post_stop`).
89
- Shape: [batch, channels, latent_height, latent_width]
90
- conditioning_mode: [TMP] Defines on which conditionings current unet call will be runned.
91
- Available in `pre_unet`/`post_unet` callbacks, otherwice will be None.
92
- Can be "negative", "positive" or "both"
93
- negative_noise_pred: [TMP] Noise predictions from negative conditioning.
94
- Available in `apply_cfg` and `post_apply_cfg` callbacks, otherwice will be None.
95
- Shape: [batch, channels, latent_height, latent_width]
96
- positive_noise_pred: [TMP] Noise predictions from positive conditioning.
97
- Available in `apply_cfg` and `post_apply_cfg` callbacks, otherwice will be None.
98
- Shape: [batch, channels, latent_height, latent_width]
99
- noise_pred: Combined noise prediction from passed conditionings.
100
- Available in `apply_cfg` and `post_apply_cfg` callbacks, otherwice will be None.
101
- Shape: [batch, channels, latent_height, latent_width]
102
- extra: Dictionary for extensions to pass extra info about denoise process to other extensions.
103
- """
74
+ """Context with all variables in denoise"""
104
75
76
+ # Initial variables passed to denoise. Supposed to be unchanged.
105
77
inputs : DenoiseInputs
106
78
79
+ # Scheduler which used to apply noise predictions.
107
80
scheduler : SchedulerMixin
81
+
82
+ # UNet model.
108
83
unet : Optional [UNet2DConditionModel ] = None
109
84
85
+ # Current state of latent-space image in denoising process.
86
+ # None until `pre_denoise_loop` callback.
87
+ # Shape: [batch, channels, latent_height, latent_width]
110
88
latents : Optional [torch .Tensor ] = None
89
+
90
+ # Current denoising step index.
91
+ # None until `pre_step` callback.
111
92
step_index : Optional [int ] = None
93
+
94
+ # Current denoising step timestep.
95
+ # None until `pre_step` callback.
112
96
timestep : Optional [torch .Tensor ] = None
97
+
98
+ # Arguments which will be passed to UNet model.
99
+ # Available in `pre_unet`/`post_unet` callbacks, otherwise will be None.
113
100
unet_kwargs : Optional [UNetKwargs ] = None
101
+
102
+ # SchedulerOutput class returned from step function(normally, generated by scheduler).
103
+ # Supposed to be used only in `post_step` callback, otherwise can be None.
114
104
step_output : Optional [SchedulerOutput ] = None
115
105
106
+ # Scaled version of `latents`, which will be passed to unet_kwargs initialization.
107
+ # Available in events inside step(between `pre_step` and `post_stop`).
108
+ # Shape: [batch, channels, latent_height, latent_width]
116
109
latent_model_input : Optional [torch .Tensor ] = None
117
- conditioning_mode : Optional [str ] = None
110
+
111
+ # [TMP] Defines on which conditionings current unet call will be runned.
112
+ # Available in `pre_unet`/`post_unet` callbacks, otherwise will be None.
113
+ conditioning_mode : Optional [ConditioningMode ] = None
114
+
115
+ # [TMP] Noise predictions from negative conditioning.
116
+ # Available in `apply_cfg` and `post_apply_cfg` callbacks, otherwise will be None.
117
+ # Shape: [batch, channels, latent_height, latent_width]
118
118
negative_noise_pred : Optional [torch .Tensor ] = None
119
+
120
+ # [TMP] Noise predictions from positive conditioning.
121
+ # Available in `apply_cfg` and `post_apply_cfg` callbacks, otherwise will be None.
122
+ # Shape: [batch, channels, latent_height, latent_width]
119
123
positive_noise_pred : Optional [torch .Tensor ] = None
124
+
125
+ # Combined noise prediction from passed conditionings.
126
+ # Available in `apply_cfg` and `post_apply_cfg` callbacks, otherwise will be None.
127
+ # Shape: [batch, channels, latent_height, latent_width]
120
128
noise_pred : Optional [torch .Tensor ] = None
121
129
130
+ # Dictionary for extensions to pass extra info about denoise process to other extensions.
122
131
extra : dict = field (default_factory = dict )
0 commit comments