Skip to content

Commit 69d804b

Browse files
committed
add copy remarks + fix i2v
1 parent 6f2f9be commit 69d804b

File tree

4 files changed

+112
-13
lines changed

4 files changed

+112
-13
lines changed

src/diffusers/pipelines/kandinsky5/pipeline_kandinsky.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -89,26 +89,39 @@
8989

9090

9191
def basic_clean(text):
92-
"""Clean text using ftfy if available and unescape HTML entities."""
92+
"""
93+
Copied from https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/wan/pipeline_wan.py
94+
95+
Clean text using ftfy if available and unescape HTML entities.
96+
"""
9397
if is_ftfy_available():
9498
text = ftfy.fix_text(text)
9599
text = html.unescape(html.unescape(text))
96100
return text.strip()
97101

98102

99103
def whitespace_clean(text):
100-
"""Normalize whitespace in text by replacing multiple spaces with single space."""
104+
"""
105+
Copied from https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/wan/pipeline_wan.py
106+
107+
Normalize whitespace in text by replacing multiple spaces with single space.
108+
"""
101109
text = re.sub(r"\s+", " ", text)
102110
text = text.strip()
103111
return text
104112

105113

106114
def prompt_clean(text):
107-
"""Apply both basic cleaning and whitespace normalization to prompts."""
115+
"""
116+
Copied from https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/wan/pipeline_wan.py
117+
118+
Apply both basic cleaning and whitespace normalization to prompts.
119+
"""
108120
text = whitespace_clean(basic_clean(text))
109121
return text
110122

111123

124+
112125
class Kandinsky5T2VPipeline(DiffusionPipeline, KandinskyLoraLoaderMixin):
113126
r"""
114127
Pipeline for text-to-video generation using Kandinsky 5.0.

src/diffusers/pipelines/kandinsky5/pipeline_kandinsky_i2i.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -87,22 +87,34 @@
8787

8888

8989
def basic_clean(text):
90-
"""Clean text using ftfy if available and unescape HTML entities."""
90+
"""
91+
Copied from https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/wan/pipeline_wan.py
92+
93+
Clean text using ftfy if available and unescape HTML entities.
94+
"""
9195
if is_ftfy_available():
9296
text = ftfy.fix_text(text)
9397
text = html.unescape(html.unescape(text))
9498
return text.strip()
9599

96100

97101
def whitespace_clean(text):
98-
"""Normalize whitespace in text by replacing multiple spaces with single space."""
102+
"""
103+
Copied from https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/wan/pipeline_wan.py
104+
105+
Normalize whitespace in text by replacing multiple spaces with single space.
106+
"""
99107
text = re.sub(r"\s+", " ", text)
100108
text = text.strip()
101109
return text
102110

103111

104112
def prompt_clean(text):
105-
"""Apply both basic cleaning and whitespace normalization to prompts."""
113+
"""
114+
Copied from https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/wan/pipeline_wan.py
115+
116+
Apply both basic cleaning and whitespace normalization to prompts.
117+
"""
106118
text = whitespace_clean(basic_clean(text))
107119
return text
108120

src/diffusers/pipelines/kandinsky5/pipeline_kandinsky_i2v.py

Lines changed: 66 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -94,22 +94,34 @@
9494

9595

9696
def basic_clean(text):
97-
"""Clean text using ftfy if available and unescape HTML entities."""
97+
"""
98+
Copied from https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/wan/pipeline_wan.py
99+
100+
Clean text using ftfy if available and unescape HTML entities.
101+
"""
98102
if is_ftfy_available():
99103
text = ftfy.fix_text(text)
100104
text = html.unescape(html.unescape(text))
101105
return text.strip()
102106

103107

104108
def whitespace_clean(text):
105-
"""Normalize whitespace in text by replacing multiple spaces with single space."""
109+
"""
110+
Copied from https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/wan/pipeline_wan.py
111+
112+
Normalize whitespace in text by replacing multiple spaces with single space.
113+
"""
106114
text = re.sub(r"\s+", " ", text)
107115
text = text.strip()
108116
return text
109117

110118

111119
def prompt_clean(text):
112-
"""Apply both basic cleaning and whitespace normalization to prompts."""
120+
"""
121+
Copied from https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/wan/pipeline_wan.py
122+
123+
Apply both basic cleaning and whitespace normalization to prompts.
124+
"""
113125
text = whitespace_clean(basic_clean(text))
114126
return text
115127

@@ -396,6 +408,53 @@ def _encode_prompt_clip(
396408
pooled_embed = self.text_encoder_2(**inputs)["pooler_output"]
397409

398410
return pooled_embed.to(dtype)
411+
412+
@staticmethod
413+
def adaptive_mean_std_normalization(source, reference):
414+
source_mean = source.mean(dim=(1,2,3,4),keepdim=True)
415+
source_std = source.std(dim=(1,2,3,4),keepdim=True)
416+
#magic constants - limit changes in latents
417+
clump_mean_low = 0.05
418+
clump_mean_high = 0.1
419+
clump_std_low = 0.1
420+
clump_std_high = 0.25
421+
422+
reference_mean = torch.clamp(reference.mean(), source_mean - clump_mean_low, source_mean + clump_mean_high)
423+
reference_std = torch.clamp(reference.std(), source_std - clump_std_low, source_std + clump_std_high)
424+
425+
# normalization
426+
normalized = (source - source_mean) / source_std
427+
normalized = normalized * reference_std + reference_mean
428+
429+
return normalized
430+
431+
def normalize_first_frame(self, latents, reference_frames=5, clump_values=False):
432+
latents_copy = latents.clone()
433+
samples = latents_copy
434+
435+
if samples.shape[1] <= 1:
436+
return (latents, "Only one frame, no normalization needed")
437+
438+
nFr = 4
439+
first_frames = samples.clone()[:, :nFr]
440+
reference_frames_data = samples[:, nFr:nFr + min(reference_frames, samples.shape[1] - 1)]
441+
442+
print(samples.shape, first_frames.shape, reference_frames_data.shape, nFr, min(reference_frames, samples.shape[1] - 1))
443+
444+
print(reference_frames_data.mean(), reference_frames_data.std(), reference_frames_data.shape)
445+
446+
print("First frame stats - Mean:", first_frames.mean(dim=(1,2,3)), "Std: ", first_frames.std(dim=(1,2,3)))
447+
print(f"Reference frames stats - Mean: {reference_frames_data.mean().item():.4f}, Std: {reference_frames_data.std().item():.4f}")
448+
449+
normalized_first = self.adaptive_mean_std_normalization(first_frames, reference_frames_data)
450+
if clump_values:
451+
min_val = reference_frames_data.min()
452+
max_val = reference_frames_data.max()
453+
normalized_first = torch.clamp(normalized_first, min_val, max_val)
454+
455+
samples[:, :nFr] = normalized_first
456+
457+
return samples
399458

400459
def encode_prompt(
401460
self,
@@ -973,8 +1032,11 @@ def __call__(
9731032

9741033
# 9. Post-processing - extract main latents
9751034
latents = latents[:, :, :, :, :num_channels_latents]
1035+
1036+
# 10. fix mesh artifacts
1037+
latents = self.normalize_first_frame(latents)
9761038

977-
# 10. Decode latents to video
1039+
# 11. Decode latents to video
9781040
if output_type != "latent":
9791041
latents = latents.to(self.vae.dtype)
9801042
# Reshape and normalize latents

src/diffusers/pipelines/kandinsky5/pipeline_kandinsky_t2i.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -83,22 +83,34 @@
8383

8484

8585
def basic_clean(text):
86-
"""Clean text using ftfy if available and unescape HTML entities."""
86+
"""
87+
Copied from https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/wan/pipeline_wan.py
88+
89+
Clean text using ftfy if available and unescape HTML entities.
90+
"""
8791
if is_ftfy_available():
8892
text = ftfy.fix_text(text)
8993
text = html.unescape(html.unescape(text))
9094
return text.strip()
9195

9296

9397
def whitespace_clean(text):
94-
"""Normalize whitespace in text by replacing multiple spaces with single space."""
98+
"""
99+
Copied from https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/wan/pipeline_wan.py
100+
101+
Normalize whitespace in text by replacing multiple spaces with single space.
102+
"""
95103
text = re.sub(r"\s+", " ", text)
96104
text = text.strip()
97105
return text
98106

99107

100108
def prompt_clean(text):
101-
"""Apply both basic cleaning and whitespace normalization to prompts."""
109+
"""
110+
Copied from https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/wan/pipeline_wan.py
111+
112+
Apply both basic cleaning and whitespace normalization to prompts.
113+
"""
102114
text = whitespace_clean(basic_clean(text))
103115
return text
104116

0 commit comments

Comments
 (0)