Skip to content

Commit 0496a69

Browse files
committed
up
1 parent 0484e77 commit 0496a69

File tree

5 files changed

+33
-48
lines changed

5 files changed

+33
-48
lines changed

src/diffusers/modular_pipelines/flux/before_denoise.py

Lines changed: 20 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -105,26 +105,26 @@ def calculate_shift(
105105

106106
# Copied from diffusers.pipelines.flux.pipeline_flux.FluxPipeline._pack_latents
107107
def _pack_latents(latents, batch_size, num_channels_latents, height, width):
108-
latents = latents.view(batch_size, num_channels_latents, height // 2, 2, width // 2, 2)
109-
latents = latents.permute(0, 2, 4, 1, 3, 5)
110-
latents = latents.reshape(batch_size, (height // 2) * (width // 2), num_channels_latents * 4)
108+
latents = latents.view(batch_size, num_channels_latents, height // 2, 2, width // 2, 2)
109+
latents = latents.permute(0, 2, 4, 1, 3, 5)
110+
latents = latents.reshape(batch_size, (height // 2) * (width // 2), num_channels_latents * 4)
111111

112-
return latents
112+
return latents
113113

114114

115115
# Copied from diffusers.pipelines.flux.pipeline_flux.FluxPipeline._prepare_latent_image_ids
116116
def _prepare_latent_image_ids(batch_size, height, width, device, dtype):
117-
latent_image_ids = torch.zeros(height, width, 3)
118-
latent_image_ids[..., 1] = latent_image_ids[..., 1] + torch.arange(height)[:, None]
119-
latent_image_ids[..., 2] = latent_image_ids[..., 2] + torch.arange(width)[None, :]
117+
latent_image_ids = torch.zeros(height, width, 3)
118+
latent_image_ids[..., 1] = latent_image_ids[..., 1] + torch.arange(height)[:, None]
119+
latent_image_ids[..., 2] = latent_image_ids[..., 2] + torch.arange(width)[None, :]
120120

121-
latent_image_id_height, latent_image_id_width, latent_image_id_channels = latent_image_ids.shape
121+
latent_image_id_height, latent_image_id_width, latent_image_id_channels = latent_image_ids.shape
122122

123-
latent_image_ids = latent_image_ids.reshape(
124-
latent_image_id_height * latent_image_id_width, latent_image_id_channels
125-
)
123+
latent_image_ids = latent_image_ids.reshape(
124+
latent_image_id_height * latent_image_id_width, latent_image_id_channels
125+
)
126126

127-
return latent_image_ids.to(device=device, dtype=dtype)
127+
return latent_image_ids.to(device=device, dtype=dtype)
128128

129129

130130
class FluxInputStep(PipelineBlock):
@@ -180,13 +180,11 @@ def intermediate_outputs(self) -> List[str]:
180180
OutputParam(
181181
"prompt_embeds",
182182
type_hint=torch.Tensor,
183-
# kwargs_type="guider_input_fields", # already in intermedites state but declare here again for guider_input_fields
184183
description="text embeddings used to guide the image generation",
185184
),
186185
OutputParam(
187186
"pooled_prompt_embeds",
188187
type_hint=torch.Tensor,
189-
# kwargs_type="guider_input_fields", # already in intermedites state but declare here again for guider_input_fields
190188
description="pooled text embeddings used to guide the image generation",
191189
),
192190
# TODO: support negative embeddings?
@@ -235,10 +233,10 @@ def description(self) -> str:
235233
def inputs(self) -> List[InputParam]:
236234
return [
237235
InputParam("num_inference_steps", default=50),
238-
InputParam("timesteps"),
236+
InputParam("timesteps"),
239237
InputParam("sigmas"),
240238
InputParam("guidance_scale", default=3.5),
241-
InputParam("latents", type_hint=torch.Tensor)
239+
InputParam("latents", type_hint=torch.Tensor),
242240
]
243241

244242
@property
@@ -261,7 +259,7 @@ def intermediate_outputs(self) -> List[OutputParam]:
261259
type_hint=int,
262260
description="The number of denoising steps to perform at inference time",
263261
),
264-
OutputParam("guidance", type_hint=torch.Tensor, description="Optional guidance to be used.")
262+
OutputParam("guidance", type_hint=torch.Tensor, description="Optional guidance to be used."),
265263
]
266264

267265
@torch.no_grad()
@@ -340,10 +338,11 @@ def intermediate_outputs(self) -> List[OutputParam]:
340338
"latents", type_hint=torch.Tensor, description="The initial latents to use for the denoising process"
341339
),
342340
OutputParam(
343-
"latent_image_ids", type_hint=torch.Tensor, description="IDs computed from the image sequence needed for RoPE"
344-
)
341+
"latent_image_ids",
342+
type_hint=torch.Tensor,
343+
description="IDs computed from the image sequence needed for RoPE",
344+
),
345345
]
346-
347346

348347
@staticmethod
349348
def check_inputs(components, block_state):
@@ -417,7 +416,7 @@ def __call__(self, components: FluxModularPipeline, state: PipelineState) -> Pip
417416
block_state.generator,
418417
block_state.latents,
419418
)
420-
419+
421420
self.set_block_state(state, block_state)
422421

423422
return components, state

src/diffusers/modular_pipelines/flux/decoders.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -31,19 +31,19 @@
3131

3232
# Copied from diffusers.pipelines.flux.pipeline_flux.FluxPipeline._unpack_latents
3333
def _unpack_latents(latents, height, width, vae_scale_factor):
34-
batch_size, num_patches, channels = latents.shape
34+
batch_size, num_patches, channels = latents.shape
3535

36-
# VAE applies 8x compression on images but we must also account for packing which requires
37-
# latent height and width to be divisible by 2.
38-
height = 2 * (int(height) // (vae_scale_factor * 2))
39-
width = 2 * (int(width) // (vae_scale_factor * 2))
36+
# VAE applies 8x compression on images but we must also account for packing which requires
37+
# latent height and width to be divisible by 2.
38+
height = 2 * (int(height) // (vae_scale_factor * 2))
39+
width = 2 * (int(width) // (vae_scale_factor * 2))
4040

41-
latents = latents.view(batch_size, height // 2, width // 2, channels // 4, 2, 2)
42-
latents = latents.permute(0, 3, 1, 4, 2, 5)
41+
latents = latents.view(batch_size, height // 2, width // 2, channels // 4, 2, 2)
42+
latents = latents.permute(0, 3, 1, 4, 2, 5)
4343

44-
latents = latents.reshape(batch_size, channels // (2 * 2), height, width)
44+
latents = latents.reshape(batch_size, channels // (2 * 2), height, width)
4545

46-
return latents
46+
return latents
4747

4848

4949
class FluxDecodeStep(PipelineBlock):

src/diffusers/modular_pipelines/flux/denoise.py

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,6 @@
1919
from ...models import FluxTransformer2DModel
2020
from ...schedulers import FlowMatchEulerDiscreteScheduler
2121
from ...utils import logging
22-
from ...configuration_utils import FrozenDict
23-
from ...guiders import ClassifierFreeGuidance
2422
from ..modular_pipeline import (
2523
BlockState,
2624
LoopSequentialPipelineBlocks,
@@ -39,9 +37,7 @@ class FluxLoopDenoiser(PipelineBlock):
3937

4038
@property
4139
def expected_components(self) -> List[ComponentSpec]:
42-
return [
43-
ComponentSpec("transformer", FluxTransformer2DModel)
44-
]
40+
return [ComponentSpec("transformer", FluxTransformer2DModel)]
4541

4642
@property
4743
def description(self) -> str:
@@ -122,9 +118,7 @@ class FluxLoopAfterDenoiser(PipelineBlock):
122118

123119
@property
124120
def expected_components(self) -> List[ComponentSpec]:
125-
return [
126-
ComponentSpec("scheduler", FlowMatchEulerDiscreteScheduler)
127-
]
121+
return [ComponentSpec("scheduler", FlowMatchEulerDiscreteScheduler)]
128122

129123
@property
130124
def description(self) -> str:
@@ -221,10 +215,7 @@ def __call__(self, components: FluxModularPipeline, state: PipelineState) -> Pip
221215

222216

223217
class FluxDenoiseStep(FluxDenoiseLoopWrapper):
224-
block_classes = [
225-
FluxLoopDenoiser,
226-
FluxLoopAfterDenoiser
227-
]
218+
block_classes = [FluxLoopDenoiser, FluxLoopAfterDenoiser]
228219
block_names = ["denoiser", "after_denoiser"]
229220

230221
@property

src/diffusers/modular_pipelines/flux/encoders.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -84,19 +84,16 @@ def intermediate_outputs(self) -> List[OutputParam]:
8484
OutputParam(
8585
"prompt_embeds",
8686
type_hint=torch.Tensor,
87-
# kwargs_type="guider_input_fields",
8887
description="text embeddings used to guide the image generation",
8988
),
9089
OutputParam(
9190
"pooled_prompt_embeds",
9291
type_hint=torch.Tensor,
93-
# kwargs_type="guider_input_fields",
9492
description="pooled text embeddings used to guide the image generation",
9593
),
9694
OutputParam(
9795
"text_ids",
9896
type_hint=torch.Tensor,
99-
# kwargs_type="guider_input_fields",
10097
description="ids from the text sequence for RoPE",
10198
),
10299
]

src/diffusers/modular_pipelines/flux/modular_blocks.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,7 @@ def description(self):
4646

4747
# before_denoise: all task (text2vid,)
4848
class FluxAutoBeforeDenoiseStep(AutoPipelineBlocks):
49-
block_classes = [
50-
FluxBeforeDenoiseStep
51-
]
49+
block_classes = [FluxBeforeDenoiseStep]
5250
block_names = ["text2image"]
5351
block_trigger_inputs = [None]
5452

0 commit comments

Comments
 (0)