Skip to content

Commit b4af50d

Browse files
committed
fix the tokenizer.padding_side='right' bug;
1 parent b3f7212 commit b4af50d

File tree

1 file changed

+5
-3
lines changed

1 file changed

+5
-3
lines changed

src/diffusers/pipelines/sana/pipeline_sana.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ def encode_prompt(
165165
negative_prompt_attention_mask: Optional[torch.Tensor] = None,
166166
clean_caption: bool = False,
167167
max_sequence_length: int = 300,
168-
complex_human_instruction=None,
168+
complex_human_instruction: Optional[List[str]] = None,
169169
):
170170
r"""
171171
Encodes the prompt into text encoder hidden states.
@@ -206,6 +206,8 @@ def encode_prompt(
206206
else:
207207
batch_size = prompt_embeds.shape[0]
208208

209+
self.tokenizer.padding_side = "right"
210+
209211
# See Section 3.1. of the paper.
210212
max_length = max_sequence_length
211213
select_index = [0] + list(range(-max_length + 1, 0))
@@ -325,7 +327,7 @@ def check_inputs(
325327
negative_prompt_attention_mask=None,
326328
):
327329
if height % 32 != 0 or width % 32 != 0:
328-
raise ValueError(f"`height` and `width` have to be divisible by 8 but are {height} and {width}.")
330+
raise ValueError(f"`height` and `width` have to be divisible by 32 but are {height} and {width}.")
329331

330332
if callback_on_step_end_tensor_inputs is not None and not all(
331333
k in self._callback_tensor_inputs for k in callback_on_step_end_tensor_inputs
@@ -581,7 +583,7 @@ def __call__(
581583
callback_on_step_end_tensor_inputs: List[str] = ["latents"],
582584
max_sequence_length: int = 300,
583585
complex_human_instruction: list[str] = [
584-
'Given a user prompt, generate an "Enhanced prompt" that provides detailed visual descriptions suitable for image generation. Evaluate the level of detail in the user prompt:',
586+
"Given a user prompt, generate an 'Enhanced prompt' that provides detailed visual descriptions suitable for image generation. Evaluate the level of detail in the user prompt:",
585587
"- If the prompt is simple, focus on adding specifics about colors, shapes, sizes, textures, and spatial relationships to create vivid and concrete scenes.",
586588
"- If the prompt is already detailed, refine and enhance the existing details slightly without overcomplicating.",
587589
"Here are examples of how to transform or refine prompts:",

0 commit comments

Comments
 (0)