Merge pull request #462 from DefTruth/main

zRzRzRzRzRzRzR · web-flow · commit 4aebdb4b661d · 2024-11-06T11:03:42.000+08:00
[Parallel] Avoid OOM while batch size &gt; 1
diff --git a/tools/parallel_inference/parallel_inference_xdit.py b/tools/parallel_inference/parallel_inference_xdit.py
@@ -61,11 +61,14 @@ def main():
     )
     if args.enable_sequential_cpu_offload:
         pipe.enable_model_cpu_offload(gpu_id=local_rank)
-        pipe.vae.enable_tiling()
     else:
         device = torch.device(f"cuda:{local_rank}")
         pipe = pipe.to(device)
 
+    # Always enable tiling and slicing to avoid VAE OOM while batch size > 1
+    pipe.vae.enable_slicing()
+    pipe.vae.enable_tiling()
+
     torch.cuda.reset_peak_memory_stats()
     start_time = time.time()