From f5b3429b963bea0aa3f5eb54c35651b1b85020a8 Mon Sep 17 00:00:00 2001
From: chengzeyi <ichengzeyi@gmail.com>
Date: Tue, 5 Aug 2025 09:02:12 +0000
Subject: [PATCH] optimize QwenImagePipeline to reduce unnecessary CUDA
 synchronization

---
 src/diffusers/pipelines/qwenimage/pipeline_qwenimage.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/diffusers/pipelines/qwenimage/pipeline_qwenimage.py b/src/diffusers/pipelines/qwenimage/pipeline_qwenimage.py
index 68635782f1fe..772909c21526 100644
--- a/src/diffusers/pipelines/qwenimage/pipeline_qwenimage.py
+++ b/src/diffusers/pipelines/qwenimage/pipeline_qwenimage.py
@@ -635,6 +635,11 @@ def __call__(
         if self.attention_kwargs is None:
             self._attention_kwargs = {}
 
+        txt_seq_lens = prompt_embeds_mask.sum(dim=1).tolist() if prompt_embeds_mask is not None else None
+        negative_txt_seq_lens = (
+            negative_prompt_embeds_mask.sum(dim=1).tolist() if negative_prompt_embeds_mask is not None else None
+        )
+
         # 6. Denoising loop
         self.scheduler.set_begin_index(0)
         with self.progress_bar(total=num_inference_steps) as progress_bar:
@@ -653,7 +658,7 @@ def __call__(
                         encoder_hidden_states_mask=prompt_embeds_mask,
                         encoder_hidden_states=prompt_embeds,
                         img_shapes=img_shapes,
-                        txt_seq_lens=prompt_embeds_mask.sum(dim=1).tolist(),
+                        txt_seq_lens=txt_seq_lens,
                         attention_kwargs=self.attention_kwargs,
                         return_dict=False,
                     )[0]
@@ -667,7 +672,7 @@ def __call__(
                             encoder_hidden_states_mask=negative_prompt_embeds_mask,
                             encoder_hidden_states=negative_prompt_embeds,
                             img_shapes=img_shapes,
-                            txt_seq_lens=negative_prompt_embeds_mask.sum(dim=1).tolist(),
+                            txt_seq_lens=negative_txt_seq_lens,
                             attention_kwargs=self.attention_kwargs,
                             return_dict=False,
                         )[0]