Skip to content

Commit a0acbdc

Browse files
bghirabghiraa-r-r-o-wsayakpaul
authored
fix for huggingface#7365, prevent pipelines from overriding provided prompt embeds (huggingface#7926)
* fix for huggingface#7365, prevent pipelines from overriding provided prompt embeds * fix-copies * fix implementation * update --------- Co-authored-by: bghira <[email protected]> Co-authored-by: Aryan <[email protected]> Co-authored-by: sayakpaul <[email protected]>
1 parent 5655b22 commit a0acbdc

27 files changed

+154
-54
lines changed

examples/community/lpw_stable_diffusion_xl.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -827,7 +827,9 @@ def encode_prompt(
827827
)
828828

829829
# We are only ALWAYS interested in the pooled output of the final text encoder
830-
pooled_prompt_embeds = prompt_embeds[0]
830+
if pooled_prompt_embeds is None and prompt_embeds[0].ndim == 2:
831+
pooled_prompt_embeds = prompt_embeds[0]
832+
831833
prompt_embeds = prompt_embeds.hidden_states[-2]
832834

833835
prompt_embeds_list.append(prompt_embeds)
@@ -879,7 +881,8 @@ def encode_prompt(
879881
output_hidden_states=True,
880882
)
881883
# We are only ALWAYS interested in the pooled output of the final text encoder
882-
negative_pooled_prompt_embeds = negative_prompt_embeds[0]
884+
if negative_pooled_prompt_embeds is None and negative_prompt_embeds[0].ndim == 2:
885+
negative_pooled_prompt_embeds = negative_prompt_embeds[0]
883886
negative_prompt_embeds = negative_prompt_embeds.hidden_states[-2]
884887

885888
negative_prompt_embeds_list.append(negative_prompt_embeds)

examples/community/pipeline_demofusion_sdxl.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,9 @@ def encode_prompt(
290290
)
291291

292292
# We are only ALWAYS interested in the pooled output of the final text encoder
293-
pooled_prompt_embeds = prompt_embeds[0]
293+
if pooled_prompt_embeds is None and prompt_embeds[0].ndim == 2:
294+
pooled_prompt_embeds = prompt_embeds[0]
295+
294296
prompt_embeds = prompt_embeds.hidden_states[-2]
295297

296298
prompt_embeds_list.append(prompt_embeds)
@@ -342,7 +344,8 @@ def encode_prompt(
342344
output_hidden_states=True,
343345
)
344346
# We are only ALWAYS interested in the pooled output of the final text encoder
345-
negative_pooled_prompt_embeds = negative_prompt_embeds[0]
347+
if negative_pooled_prompt_embeds is None and negative_prompt_embeds[0].ndim == 2:
348+
negative_pooled_prompt_embeds = negative_prompt_embeds[0]
346349
negative_prompt_embeds = negative_prompt_embeds.hidden_states[-2]
347350

348351
negative_prompt_embeds_list.append(negative_prompt_embeds)

examples/community/pipeline_sdxl_style_aligned.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -628,7 +628,9 @@ def encode_prompt(
628628
prompt_embeds = text_encoder(text_input_ids.to(device), output_hidden_states=True)
629629

630630
# We are only ALWAYS interested in the pooled output of the final text encoder
631-
pooled_prompt_embeds = prompt_embeds[0]
631+
if pooled_prompt_embeds is None and prompt_embeds[0].ndim == 2:
632+
pooled_prompt_embeds = prompt_embeds[0]
633+
632634
if clip_skip is None:
633635
prompt_embeds = prompt_embeds.hidden_states[-2]
634636
else:
@@ -688,7 +690,8 @@ def encode_prompt(
688690
output_hidden_states=True,
689691
)
690692
# We are only ALWAYS interested in the pooled output of the final text encoder
691-
negative_pooled_prompt_embeds = negative_prompt_embeds[0]
693+
if negative_pooled_prompt_embeds is None and negative_prompt_embeds[0].ndim == 2:
694+
negative_pooled_prompt_embeds = negative_prompt_embeds[0]
692695
negative_prompt_embeds = negative_prompt_embeds.hidden_states[-2]
693696

694697
negative_prompt_embeds_list.append(negative_prompt_embeds)

examples/community/pipeline_stable_diffusion_xl_controlnet_adapter.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -359,7 +359,9 @@ def encode_prompt(
359359
prompt_embeds = text_encoder(text_input_ids.to(device), output_hidden_states=True)
360360

361361
# We are only ALWAYS interested in the pooled output of the final text encoder
362-
pooled_prompt_embeds = prompt_embeds[0]
362+
if pooled_prompt_embeds is None and prompt_embeds[0].ndim == 2:
363+
pooled_prompt_embeds = prompt_embeds[0]
364+
363365
if clip_skip is None:
364366
prompt_embeds = prompt_embeds.hidden_states[-2]
365367
else:
@@ -419,7 +421,8 @@ def encode_prompt(
419421
output_hidden_states=True,
420422
)
421423
# We are only ALWAYS interested in the pooled output of the final text encoder
422-
negative_pooled_prompt_embeds = negative_prompt_embeds[0]
424+
if negative_pooled_prompt_embeds is None and negative_prompt_embeds[0].ndim == 2:
425+
negative_pooled_prompt_embeds = negative_prompt_embeds[0]
423426
negative_prompt_embeds = negative_prompt_embeds.hidden_states[-2]
424427

425428
negative_prompt_embeds_list.append(negative_prompt_embeds)

examples/community/pipeline_stable_diffusion_xl_controlnet_adapter_inpaint.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -507,7 +507,9 @@ def encode_prompt(
507507
prompt_embeds = text_encoder(text_input_ids.to(device), output_hidden_states=True)
508508

509509
# We are only ALWAYS interested in the pooled output of the final text encoder
510-
pooled_prompt_embeds = prompt_embeds[0]
510+
if pooled_prompt_embeds is None and prompt_embeds[0].ndim == 2:
511+
pooled_prompt_embeds = prompt_embeds[0]
512+
511513
if clip_skip is None:
512514
prompt_embeds = prompt_embeds.hidden_states[-2]
513515
else:
@@ -567,7 +569,8 @@ def encode_prompt(
567569
output_hidden_states=True,
568570
)
569571
# We are only ALWAYS interested in the pooled output of the final text encoder
570-
negative_pooled_prompt_embeds = negative_prompt_embeds[0]
572+
if negative_pooled_prompt_embeds is None and negative_prompt_embeds[0].ndim == 2:
573+
negative_pooled_prompt_embeds = negative_prompt_embeds[0]
571574
negative_prompt_embeds = negative_prompt_embeds.hidden_states[-2]
572575

573576
negative_prompt_embeds_list.append(negative_prompt_embeds)

examples/community/pipeline_stable_diffusion_xl_differential_img2img.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -394,7 +394,9 @@ def encode_prompt(
394394
prompt_embeds = text_encoder(text_input_ids.to(device), output_hidden_states=True)
395395

396396
# We are only ALWAYS interested in the pooled output of the final text encoder
397-
pooled_prompt_embeds = prompt_embeds[0]
397+
if pooled_prompt_embeds is None and prompt_embeds[0].ndim == 2:
398+
pooled_prompt_embeds = prompt_embeds[0]
399+
398400
if clip_skip is None:
399401
prompt_embeds = prompt_embeds.hidden_states[-2]
400402
else:
@@ -454,7 +456,8 @@ def encode_prompt(
454456
output_hidden_states=True,
455457
)
456458
# We are only ALWAYS interested in the pooled output of the final text encoder
457-
negative_pooled_prompt_embeds = negative_prompt_embeds[0]
459+
if negative_pooled_prompt_embeds is None and negative_prompt_embeds[0].ndim == 2:
460+
negative_pooled_prompt_embeds = negative_prompt_embeds[0]
458461
negative_prompt_embeds = negative_prompt_embeds.hidden_states[-2]
459462

460463
negative_prompt_embeds_list.append(negative_prompt_embeds)

examples/community/pipeline_stable_diffusion_xl_ipex.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -390,7 +390,9 @@ def encode_prompt(
390390
prompt_embeds = text_encoder(text_input_ids.to(device), output_hidden_states=True)
391391

392392
# We are only ALWAYS interested in the pooled output of the final text encoder
393-
pooled_prompt_embeds = prompt_embeds[0]
393+
if pooled_prompt_embeds is None and prompt_embeds[0].ndim == 2:
394+
pooled_prompt_embeds = prompt_embeds[0]
395+
394396
if clip_skip is None:
395397
prompt_embeds = prompt_embeds.hidden_states[-2]
396398
else:
@@ -450,7 +452,8 @@ def encode_prompt(
450452
output_hidden_states=True,
451453
)
452454
# We are only ALWAYS interested in the pooled output of the final text encoder
453-
negative_pooled_prompt_embeds = negative_prompt_embeds[0]
455+
if negative_pooled_prompt_embeds is None and negative_prompt_embeds[0].ndim == 2:
456+
negative_pooled_prompt_embeds = negative_prompt_embeds[0]
454457
negative_prompt_embeds = negative_prompt_embeds.hidden_states[-2]
455458

456459
negative_prompt_embeds_list.append(negative_prompt_embeds)

src/diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -438,7 +438,9 @@ def encode_prompt(
438438
prompt_embeds = text_encoder(text_input_ids.to(device), output_hidden_states=True)
439439

440440
# We are only ALWAYS interested in the pooled output of the final text encoder
441-
pooled_prompt_embeds = prompt_embeds[0]
441+
if pooled_prompt_embeds is None and prompt_embeds[0].ndim == 2:
442+
pooled_prompt_embeds = prompt_embeds[0]
443+
442444
if clip_skip is None:
443445
prompt_embeds = prompt_embeds.hidden_states[-2]
444446
else:
@@ -497,8 +499,10 @@ def encode_prompt(
497499
uncond_input.input_ids.to(device),
498500
output_hidden_states=True,
499501
)
502+
500503
# We are only ALWAYS interested in the pooled output of the final text encoder
501-
negative_pooled_prompt_embeds = negative_prompt_embeds[0]
504+
if negative_pooled_prompt_embeds is None and negative_prompt_embeds[0].ndim == 2:
505+
negative_pooled_prompt_embeds = negative_prompt_embeds[0]
502506
negative_prompt_embeds = negative_prompt_embeds.hidden_states[-2]
503507

504508
negative_prompt_embeds_list.append(negative_prompt_embeds)

src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -406,7 +406,9 @@ def encode_prompt(
406406
prompt_embeds = text_encoder(text_input_ids.to(device), output_hidden_states=True)
407407

408408
# We are only ALWAYS interested in the pooled output of the final text encoder
409-
pooled_prompt_embeds = prompt_embeds[0]
409+
if pooled_prompt_embeds is None and prompt_embeds[0].ndim == 2:
410+
pooled_prompt_embeds = prompt_embeds[0]
411+
410412
if clip_skip is None:
411413
prompt_embeds = prompt_embeds.hidden_states[-2]
412414
else:
@@ -465,8 +467,10 @@ def encode_prompt(
465467
uncond_input.input_ids.to(device),
466468
output_hidden_states=True,
467469
)
470+
468471
# We are only ALWAYS interested in the pooled output of the final text encoder
469-
negative_pooled_prompt_embeds = negative_prompt_embeds[0]
472+
if negative_pooled_prompt_embeds is None and negative_prompt_embeds[0].ndim == 2:
473+
negative_pooled_prompt_embeds = negative_prompt_embeds[0]
470474
negative_prompt_embeds = negative_prompt_embeds.hidden_states[-2]
471475

472476
negative_prompt_embeds_list.append(negative_prompt_embeds)

src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -415,7 +415,9 @@ def encode_prompt(
415415
prompt_embeds = text_encoder(text_input_ids.to(device), output_hidden_states=True)
416416

417417
# We are only ALWAYS interested in the pooled output of the final text encoder
418-
pooled_prompt_embeds = prompt_embeds[0]
418+
if pooled_prompt_embeds is None and prompt_embeds[0].ndim == 2:
419+
pooled_prompt_embeds = prompt_embeds[0]
420+
419421
if clip_skip is None:
420422
prompt_embeds = prompt_embeds.hidden_states[-2]
421423
else:
@@ -474,8 +476,10 @@ def encode_prompt(
474476
uncond_input.input_ids.to(device),
475477
output_hidden_states=True,
476478
)
479+
477480
# We are only ALWAYS interested in the pooled output of the final text encoder
478-
negative_pooled_prompt_embeds = negative_prompt_embeds[0]
481+
if negative_pooled_prompt_embeds is None and negative_prompt_embeds[0].ndim == 2:
482+
negative_pooled_prompt_embeds = negative_prompt_embeds[0]
479483
negative_prompt_embeds = negative_prompt_embeds.hidden_states[-2]
480484

481485
negative_prompt_embeds_list.append(negative_prompt_embeds)

0 commit comments

Comments
 (0)