huggingface
diff --git a/‎docs/source/en/modular_diffusers/end_to_end_guide.md‎
Lines changed: 3 additions & 3 deletions b/‎docs/source/en/modular_diffusers/end_to_end_guide.md‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎docs/source/en/modular_diffusers/getting_started.md‎
Lines changed: 107 additions & 111 deletions b/‎docs/source/en/modular_diffusers/getting_started.md‎
Lines changed: 107 additions & 111 deletions
diff --git a/‎docs/source/en/modular_diffusers/write_own_pipeline_block.md‎
Lines changed: 6 additions & 6 deletions b/‎docs/source/en/modular_diffusers/write_own_pipeline_block.md‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎src/diffusers/__init__.py‎
Lines changed: 2 additions & 4 deletions b/‎src/diffusers/__init__.py‎
Lines changed: 2 additions & 4 deletions
diff --git a/‎src/diffusers/modular_pipelines/__init__.py‎
Lines changed: 2 additions & 4 deletions b/‎src/diffusers/modular_pipelines/__init__.py‎
Lines changed: 2 additions & 4 deletions
diff --git a/‎src/diffusers/modular_pipelines/components_manager.py‎
Lines changed: 6 additions & 23 deletions b/‎src/diffusers/modular_pipelines/components_manager.py‎
Lines changed: 6 additions & 23 deletions
@@ -505,7 +505,7 @@ We provide a auto controlnet input block that you can directly put into your wor
 
 
 ```py
->>> from diffusers.modular_pipelines.stable_diffusion_xl.modular_blocks_presets import StableDiffusionXLAutoControlNetInputStep
+>>> from diffusers.modular_pipelines.stable_diffusion_xl.modular_blocks import StableDiffusionXLAutoControlNetInputStep
 >>> control_input_block = StableDiffusionXLAutoControlNetInputStep()
 >>> print(control_input_block)
 ```
@@ -613,7 +613,7 @@ to use
 
 You can easily share your differential diffusion workflow on the hub, by creating a modular repo like this https://huggingface.co/YiYiXu/modular-diffdiff
 
-To create a Modular Repo and share on hub, you just need to run. Note that if your pipeline contains custom block, you need to manually upload the code to the hub. But we are working on a command line tool to help you upload it very easily.
+To create a Modular Repo and share on hub, you just need to run `save_pretrained()` along with the `push_to_hub=True` flag. Note that if your pipeline contains custom block, you need to manually upload the code to the hub. But we are working on a command line tool to help you upload it very easily.
 
 ```py
 dd_pipeline.save_pretrained("YiYiXu/test_modular_doc", push_to_hub=True)
@@ -626,7 +626,7 @@ With a modular repo, it is very easy for the community to use the workflow you j
 >>> import torch
 >>> from diffusers.utils import load_image
 >>> 
->>> repo_id = "YiYiXu/modular-diffdiff"
+>>> repo_id = "YiYiXu/modular-diffdiff-0704"
 >>> 
 >>> components = ComponentsManager()
 >>> 
 
@@ -107,7 +107,7 @@ def __call__(self, components, state):
     # You can access them like: block_state.image, block_state.processed_image
 
     # Update the pipeline state with your updated block_states
-    self.add_block_state(state, block_state)
+    self.set_block_state(state, block_state)
     return components, state
 ```
 
@@ -140,7 +140,7 @@ When you convert your blocks into a pipeline using `blocks.init_pipeline()`, the
 
 That's all you need to define in order to create a `PipelineBlock`. There is no hidden complexity. In fact we are going to create a helper function that take exactly these variables as input and return a pipeline block. We will use this helper function through out the tutorial to create test blocks
 
-Note that for `__call__` method, the only part you should implement differently is the part between `self.get_block_state()` and `self.add_block_state()`, which can be abstracted into a simple function that takes `block_state` and returns the updated state. Our helper function accepts a `block_fn` that does exactly that.
+Note that for `__call__` method, the only part you should implement differently is the part between `self.get_block_state()` and `self.set_block_state()`, which can be abstracted into a simple function that takes `block_state` and returns the updated state. Our helper function accepts a `block_fn` that does exactly that.
 
 **Helper Function**
 
@@ -172,7 +172,7 @@ def make_block(inputs=[], intermediate_inputs=[], intermediate_outputs=[], block
             block_state = self.get_block_state(state)
             if block_fn is not None:
                 block_state = block_fn(block_state, state)
-            self.add_block_state(state, block_state)
+            self.set_block_state(state, block_state)
             return components, state
 
     return TestBlock
@@ -403,7 +403,7 @@ class DenoiseLoop(PipelineBlock):
         for t in range(block_state.num_inference_steps):
             # ... loop logic here
             pass
-        self.add_block_state(state, block_state)
+        self.set_block_state(state, block_state)
         return components, state
 ```
 
@@ -455,7 +455,7 @@ class LoopWrapper(LoopSequentialPipelineBlocks):
         for i in range(block_state.num_steps):
             # loop_step executes all registered blocks in sequence
             components, block_state = self.loop_step(components, block_state, i=i)
-        self.add_block_state(state, block_state)
+        self.set_block_state(state, block_state)
         return components, state
 ```
 
@@ -464,7 +464,7 @@ class LoopWrapper(LoopSequentialPipelineBlocks):
 Loop blocks are standard `PipelineBlock`s, but their `__call__` method works differently:
 * It receives the iteration variable (e.g., `i`) passed by the loop wrapper
 * It works directly with `block_state` instead of pipeline state
-* No need to call `self.get_block_state()` or `self.add_block_state()`
+* No need to call `self.get_block_state()` or `self.set_block_state()`
 
 ```py
 class LoopBlock(PipelineBlock):
 
@@ -240,7 +240,6 @@
         [
             "ComponentsManager",
             "ComponentSpec",
-            "ModularLoader",
             "ModularPipeline",
             "ModularPipelineBlocks",
         ]
@@ -360,7 +359,7 @@
     _import_structure["modular_pipelines"].extend(
         [
             "StableDiffusionXLAutoBlocks",
-            "StableDiffusionXLModularLoader",
+            "StableDiffusionXLModularPipeline",
         ]
     )
     _import_structure["pipelines"].extend(
@@ -881,7 +880,6 @@
         from .modular_pipelines import (
             ComponentsManager,
             ComponentSpec,
-            ModularLoader,
             ModularPipeline,
             ModularPipelineBlocks,
         )
@@ -983,7 +981,7 @@
     else:
         from .modular_pipelines import (
             StableDiffusionXLAutoBlocks,
-            StableDiffusionXLModularLoader,
+            StableDiffusionXLModularPipeline,
         )
         from .pipelines import (
             AllegroPipeline,
 
@@ -29,7 +29,6 @@
         "AutoPipelineBlocks",
         "SequentialPipelineBlocks",
         "LoopSequentialPipelineBlocks",
-        "ModularLoader",
         "PipelineState",
         "BlockState",
     ]
@@ -40,7 +39,7 @@
         "OutputParam",
         "InsertableDict",
     ]
-    _import_structure["stable_diffusion_xl"] = ["StableDiffusionXLAutoBlocks", "StableDiffusionXLModularLoader"]
+    _import_structure["stable_diffusion_xl"] = ["StableDiffusionXLAutoBlocks", "StableDiffusionXLModularPipeline"]
     _import_structure["components_manager"] = ["ComponentsManager"]
 
 if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
@@ -55,7 +54,6 @@
             AutoPipelineBlocks,
             BlockState,
             LoopSequentialPipelineBlocks,
-            ModularLoader,
             ModularPipeline,
             ModularPipelineBlocks,
             PipelineBlock,
@@ -71,7 +69,7 @@
         )
         from .stable_diffusion_xl import (
             StableDiffusionXLAutoBlocks,
-            StableDiffusionXLModularLoader,
+            StableDiffusionXLModularPipeline,
         )
 else:
     import sys
 
@@ -38,26 +38,6 @@
 logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
 
 
-# YiYi Notes: copied from modeling_utils.py (decide later where to put this)
-def get_memory_footprint(self, return_buffers=True):
-    r"""
-    Get the memory footprint of a model. This will return the memory footprint of the current model in bytes. Useful to
-    benchmark the memory footprint of the current model and design some tests. Solution inspired from the PyTorch
-    discussions: https://discuss.pytorch.org/t/gpu-memory-that-model-uses/56822/2
-
-    Arguments:
-        return_buffers (`bool`, *optional*, defaults to `True`):
-            Whether to return the size of the buffer tensors in the computation of the memory footprint. Buffers are
-            tensors that do not require gradients and not registered as parameters. E.g. mean and std in batch norm
-            layers. Please see: https://discuss.pytorch.org/t/what-pytorch-means-by-buffers/120266/2
-    """
-    mem = sum([param.nelement() * param.element_size() for param in self.parameters()])
-    if return_buffers:
-        mem_bufs = sum([buf.nelement() * buf.element_size() for buf in self.buffers()])
-        mem = mem + mem_bufs
-    return mem
-
-
 class CustomOffloadHook(ModelHook):
     """
     A hook that offloads a model on the CPU until its forward pass is called. It ensures the model and its inputs are
@@ -170,14 +150,16 @@ class AutoOffloadStrategy:
     the available memory on the device.
     """
 
+    # YiYi TODO: instead of memory_reserve_margin, we should let user set the maximum_total_models_size to keep on device 
+    # the actual memory usage would be higher. But it's simpler this way, and can be tested
     def __init__(self, memory_reserve_margin="3GB"):
         self.memory_reserve_margin = convert_file_size_to_int(memory_reserve_margin)
 
     def __call__(self, hooks, model_id, model, execution_device):
         if len(hooks) == 0:
             return []
 
-        current_module_size = get_memory_footprint(model)
+        current_module_size = model.get_memory_footprint()
 
         mem_on_device = torch.cuda.mem_get_info(execution_device.index)[0]
         mem_on_device = mem_on_device - self.memory_reserve_margin
@@ -190,12 +172,13 @@ def __call__(self, hooks, model_id, model, execution_device):
         # exlucde models that's not currently loaded on the device
         module_sizes = dict(
             sorted(
-                {hook.model_id: get_memory_footprint(hook.model) for hook in hooks}.items(),
+                {hook.model_id: hook.model.get_memory_footprint() for hook in hooks}.items(),
                 key=lambda x: x[1],
                 reverse=True,
             )
         )
 
+        # YiYi/Dhruv TODO: sort smallest to largest, and offload in that order we would tend to keep the larger models on GPU more often
         def search_best_candidate(module_sizes, min_memory_offload):
             """
             search the optimal combination of models to offload to cpu, given a dictionary of module sizes and a
@@ -652,7 +635,7 @@ def get_model_info(
             info.update(
                 {
                     "class_name": component.__class__.__name__,
-                    "size_gb": get_memory_footprint(component) / (1024**3),
+                    "size_gb": component.get_memory_footprint() / (1024**3),
                     "adapters": None,  # Default to None
                     "has_hook": has_hook,
                     "execution_device": execution_device,
Original file line number	Diff line number	Diff line change
`@@ -240,7 +240,6 @@`
`240`	`240`	`[`
`241`	`241`	`"ComponentsManager",`
`242`	`242`	`"ComponentSpec",`
`243`		`- "ModularLoader",`
`244`	`243`	`"ModularPipeline",`
`245`	`244`	`"ModularPipelineBlocks",`
`246`	`245`	`]`
`@@ -360,7 +359,7 @@`
`360`	`359`	`_import_structure["modular_pipelines"].extend(`
`361`	`360`	`[`
`362`	`361`	`"StableDiffusionXLAutoBlocks",`
`363`		`- "StableDiffusionXLModularLoader",`
	`362`	`+ "StableDiffusionXLModularPipeline",`
`364`	`363`	`]`
`365`	`364`	`)`
`366`	`365`	`_import_structure["pipelines"].extend(`
`@@ -881,7 +880,6 @@`
`881`	`880`	`from .modular_pipelines import (`
`882`	`881`	`ComponentsManager,`
`883`	`882`	`ComponentSpec,`
`884`		`- ModularLoader,`
`885`	`883`	`ModularPipeline,`
`886`	`884`	`ModularPipelineBlocks,`
`887`	`885`	`)`
`@@ -983,7 +981,7 @@`
`983`	`981`	`else:`
`984`	`982`	`from .modular_pipelines import (`
`985`	`983`	`StableDiffusionXLAutoBlocks,`
`986`		`- StableDiffusionXLModularLoader,`
	`984`	`+ StableDiffusionXLModularPipeline,`
`987`	`985`	`)`
`988`	`986`	`from .pipelines import (`
`989`	`987`	`AllegroPipeline,`