Skip to content

Commit 63e94cb

Browse files
committed
resolve conflicnt
1 parent 2c66fb3 commit 63e94cb

18 files changed

+504
-511
lines changed

docs/source/en/modular_diffusers/end_to_end_guide.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -505,7 +505,7 @@ We provide a auto controlnet input block that you can directly put into your wor
505505

506506

507507
```py
508-
>>> from diffusers.modular_pipelines.stable_diffusion_xl.modular_blocks_presets import StableDiffusionXLAutoControlNetInputStep
508+
>>> from diffusers.modular_pipelines.stable_diffusion_xl.modular_blocks import StableDiffusionXLAutoControlNetInputStep
509509
>>> control_input_block = StableDiffusionXLAutoControlNetInputStep()
510510
>>> print(control_input_block)
511511
```
@@ -613,7 +613,7 @@ to use
613613

614614
You can easily share your differential diffusion workflow on the hub, by creating a modular repo like this https://huggingface.co/YiYiXu/modular-diffdiff
615615

616-
To create a Modular Repo and share on hub, you just need to run. Note that if your pipeline contains custom block, you need to manually upload the code to the hub. But we are working on a command line tool to help you upload it very easily.
616+
To create a Modular Repo and share on hub, you just need to run `save_pretrained()` along with the `push_to_hub=True` flag. Note that if your pipeline contains custom block, you need to manually upload the code to the hub. But we are working on a command line tool to help you upload it very easily.
617617

618618
```py
619619
dd_pipeline.save_pretrained("YiYiXu/test_modular_doc", push_to_hub=True)
@@ -626,7 +626,7 @@ With a modular repo, it is very easy for the community to use the workflow you j
626626
>>> import torch
627627
>>> from diffusers.utils import load_image
628628
>>>
629-
>>> repo_id = "YiYiXu/modular-diffdiff"
629+
>>> repo_id = "YiYiXu/modular-diffdiff-0704"
630630
>>>
631631
>>> components = ComponentsManager()
632632
>>>

docs/source/en/modular_diffusers/getting_started.md

Lines changed: 107 additions & 111 deletions
Large diffs are not rendered by default.

docs/source/en/modular_diffusers/write_own_pipeline_block.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ def __call__(self, components, state):
107107
# You can access them like: block_state.image, block_state.processed_image
108108

109109
# Update the pipeline state with your updated block_states
110-
self.add_block_state(state, block_state)
110+
self.set_block_state(state, block_state)
111111
return components, state
112112
```
113113

@@ -140,7 +140,7 @@ When you convert your blocks into a pipeline using `blocks.init_pipeline()`, the
140140

141141
That's all you need to define in order to create a `PipelineBlock`. There is no hidden complexity. In fact we are going to create a helper function that take exactly these variables as input and return a pipeline block. We will use this helper function through out the tutorial to create test blocks
142142

143-
Note that for `__call__` method, the only part you should implement differently is the part between `self.get_block_state()` and `self.add_block_state()`, which can be abstracted into a simple function that takes `block_state` and returns the updated state. Our helper function accepts a `block_fn` that does exactly that.
143+
Note that for `__call__` method, the only part you should implement differently is the part between `self.get_block_state()` and `self.set_block_state()`, which can be abstracted into a simple function that takes `block_state` and returns the updated state. Our helper function accepts a `block_fn` that does exactly that.
144144

145145
**Helper Function**
146146

@@ -172,7 +172,7 @@ def make_block(inputs=[], intermediate_inputs=[], intermediate_outputs=[], block
172172
block_state = self.get_block_state(state)
173173
if block_fn is not None:
174174
block_state = block_fn(block_state, state)
175-
self.add_block_state(state, block_state)
175+
self.set_block_state(state, block_state)
176176
return components, state
177177

178178
return TestBlock
@@ -403,7 +403,7 @@ class DenoiseLoop(PipelineBlock):
403403
for t in range(block_state.num_inference_steps):
404404
# ... loop logic here
405405
pass
406-
self.add_block_state(state, block_state)
406+
self.set_block_state(state, block_state)
407407
return components, state
408408
```
409409

@@ -455,7 +455,7 @@ class LoopWrapper(LoopSequentialPipelineBlocks):
455455
for i in range(block_state.num_steps):
456456
# loop_step executes all registered blocks in sequence
457457
components, block_state = self.loop_step(components, block_state, i=i)
458-
self.add_block_state(state, block_state)
458+
self.set_block_state(state, block_state)
459459
return components, state
460460
```
461461

@@ -464,7 +464,7 @@ class LoopWrapper(LoopSequentialPipelineBlocks):
464464
Loop blocks are standard `PipelineBlock`s, but their `__call__` method works differently:
465465
* It receives the iteration variable (e.g., `i`) passed by the loop wrapper
466466
* It works directly with `block_state` instead of pipeline state
467-
* No need to call `self.get_block_state()` or `self.add_block_state()`
467+
* No need to call `self.get_block_state()` or `self.set_block_state()`
468468

469469
```py
470470
class LoopBlock(PipelineBlock):

src/diffusers/__init__.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,6 @@
240240
[
241241
"ComponentsManager",
242242
"ComponentSpec",
243-
"ModularLoader",
244243
"ModularPipeline",
245244
"ModularPipelineBlocks",
246245
]
@@ -360,7 +359,7 @@
360359
_import_structure["modular_pipelines"].extend(
361360
[
362361
"StableDiffusionXLAutoBlocks",
363-
"StableDiffusionXLModularLoader",
362+
"StableDiffusionXLModularPipeline",
364363
]
365364
)
366365
_import_structure["pipelines"].extend(
@@ -881,7 +880,6 @@
881880
from .modular_pipelines import (
882881
ComponentsManager,
883882
ComponentSpec,
884-
ModularLoader,
885883
ModularPipeline,
886884
ModularPipelineBlocks,
887885
)
@@ -983,7 +981,7 @@
983981
else:
984982
from .modular_pipelines import (
985983
StableDiffusionXLAutoBlocks,
986-
StableDiffusionXLModularLoader,
984+
StableDiffusionXLModularPipeline,
987985
)
988986
from .pipelines import (
989987
AllegroPipeline,

src/diffusers/modular_pipelines/__init__.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929
"AutoPipelineBlocks",
3030
"SequentialPipelineBlocks",
3131
"LoopSequentialPipelineBlocks",
32-
"ModularLoader",
3332
"PipelineState",
3433
"BlockState",
3534
]
@@ -40,7 +39,7 @@
4039
"OutputParam",
4140
"InsertableDict",
4241
]
43-
_import_structure["stable_diffusion_xl"] = ["StableDiffusionXLAutoBlocks", "StableDiffusionXLModularLoader"]
42+
_import_structure["stable_diffusion_xl"] = ["StableDiffusionXLAutoBlocks", "StableDiffusionXLModularPipeline"]
4443
_import_structure["components_manager"] = ["ComponentsManager"]
4544

4645
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
@@ -55,7 +54,6 @@
5554
AutoPipelineBlocks,
5655
BlockState,
5756
LoopSequentialPipelineBlocks,
58-
ModularLoader,
5957
ModularPipeline,
6058
ModularPipelineBlocks,
6159
PipelineBlock,
@@ -71,7 +69,7 @@
7169
)
7270
from .stable_diffusion_xl import (
7371
StableDiffusionXLAutoBlocks,
74-
StableDiffusionXLModularLoader,
72+
StableDiffusionXLModularPipeline,
7573
)
7674
else:
7775
import sys

src/diffusers/modular_pipelines/components_manager.py

Lines changed: 6 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -38,26 +38,6 @@
3838
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
3939

4040

41-
# YiYi Notes: copied from modeling_utils.py (decide later where to put this)
42-
def get_memory_footprint(self, return_buffers=True):
43-
r"""
44-
Get the memory footprint of a model. This will return the memory footprint of the current model in bytes. Useful to
45-
benchmark the memory footprint of the current model and design some tests. Solution inspired from the PyTorch
46-
discussions: https://discuss.pytorch.org/t/gpu-memory-that-model-uses/56822/2
47-
48-
Arguments:
49-
return_buffers (`bool`, *optional*, defaults to `True`):
50-
Whether to return the size of the buffer tensors in the computation of the memory footprint. Buffers are
51-
tensors that do not require gradients and not registered as parameters. E.g. mean and std in batch norm
52-
layers. Please see: https://discuss.pytorch.org/t/what-pytorch-means-by-buffers/120266/2
53-
"""
54-
mem = sum([param.nelement() * param.element_size() for param in self.parameters()])
55-
if return_buffers:
56-
mem_bufs = sum([buf.nelement() * buf.element_size() for buf in self.buffers()])
57-
mem = mem + mem_bufs
58-
return mem
59-
60-
6141
class CustomOffloadHook(ModelHook):
6242
"""
6343
A hook that offloads a model on the CPU until its forward pass is called. It ensures the model and its inputs are
@@ -170,14 +150,16 @@ class AutoOffloadStrategy:
170150
the available memory on the device.
171151
"""
172152

153+
# YiYi TODO: instead of memory_reserve_margin, we should let user set the maximum_total_models_size to keep on device
154+
# the actual memory usage would be higher. But it's simpler this way, and can be tested
173155
def __init__(self, memory_reserve_margin="3GB"):
174156
self.memory_reserve_margin = convert_file_size_to_int(memory_reserve_margin)
175157

176158
def __call__(self, hooks, model_id, model, execution_device):
177159
if len(hooks) == 0:
178160
return []
179161

180-
current_module_size = get_memory_footprint(model)
162+
current_module_size = model.get_memory_footprint()
181163

182164
mem_on_device = torch.cuda.mem_get_info(execution_device.index)[0]
183165
mem_on_device = mem_on_device - self.memory_reserve_margin
@@ -190,12 +172,13 @@ def __call__(self, hooks, model_id, model, execution_device):
190172
# exlucde models that's not currently loaded on the device
191173
module_sizes = dict(
192174
sorted(
193-
{hook.model_id: get_memory_footprint(hook.model) for hook in hooks}.items(),
175+
{hook.model_id: hook.model.get_memory_footprint() for hook in hooks}.items(),
194176
key=lambda x: x[1],
195177
reverse=True,
196178
)
197179
)
198180

181+
# YiYi/Dhruv TODO: sort smallest to largest, and offload in that order we would tend to keep the larger models on GPU more often
199182
def search_best_candidate(module_sizes, min_memory_offload):
200183
"""
201184
search the optimal combination of models to offload to cpu, given a dictionary of module sizes and a
@@ -652,7 +635,7 @@ def get_model_info(
652635
info.update(
653636
{
654637
"class_name": component.__class__.__name__,
655-
"size_gb": get_memory_footprint(component) / (1024**3),
638+
"size_gb": component.get_memory_footprint() / (1024**3),
656639
"adapters": None, # Default to None
657640
"has_hook": has_hook,
658641
"execution_device": execution_device,

0 commit comments

Comments
 (0)