Skip to content

Commit 4b25130

Browse files
charliewwdevclaude
andcommitted
fix portrait selection: score candidates to avoid NaN-corrupted frames
The T2V portrait generation occasionally produces frames with VAE decode corruption (NaN artifacts). Previously the script always picked candidate_0 regardless of quality. Now scores all candidates using a combined metric (row smoothness + inter-channel edge correlation + channel correlation) and selects the highest-scoring one. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 5a88d7f commit 4b25130

File tree

1 file changed

+43
-9
lines changed

1 file changed

+43
-9
lines changed

scripts/produce_trailer_v2.py

Lines changed: 43 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,39 @@
5555
BGM_FILENAME = "PeriTune-Wuxia3.mp3"
5656

5757

58+
def _score_portrait(img):
59+
"""Score portrait quality (higher = better). Detects NaN/VAE corruption.
60+
61+
Uses multiple heuristics since VAE corruption can be statistically subtle:
62+
1. Row-to-row pixel jumps (banding artifacts)
63+
2. Inter-channel edge correlation (color fringing detection)
64+
3. Channel correlation (RGB decorrelation from NaN)
65+
"""
66+
import numpy as np
67+
arr = np.array(img, dtype=np.float32)
68+
69+
# 1. Row smoothness (lower row_jumps = smoother = better)
70+
row_jumps = np.abs(arr[1:] - arr[:-1]).mean()
71+
72+
# 2. Inter-channel Laplacian correlation (edges should be aligned across RGB)
73+
laps = []
74+
for c in range(3):
75+
ch = arr[:, :, c]
76+
lap = ch[2:, 1:-1] + ch[:-2, 1:-1] + ch[1:-1, 2:] + ch[1:-1, :-2] - 4 * ch[1:-1, 1:-1]
77+
laps.append(lap.flatten())
78+
rg_corr = np.corrcoef(laps[0], laps[1])[0, 1]
79+
rb_corr = np.corrcoef(laps[0], laps[2])[0, 1]
80+
edge_corr = (rg_corr + rb_corr) / 2 # higher = better
81+
82+
# 3. Channel correlation (RGB should be correlated in natural images)
83+
r, g, b = arr[:, :, 0].flatten(), arr[:, :, 1].flatten(), arr[:, :, 2].flatten()
84+
ch_corr = (np.corrcoef(r, g)[0, 1] + np.corrcoef(r, b)[0, 1]) / 2
85+
86+
# Combined score: weighted sum (all terms: higher = better)
87+
score = -row_jumps + 10 * edge_corr + 5 * ch_corr
88+
return score
89+
90+
5891
# ============================================================================
5992
# Phase 1: Local Preparation (Mac MPS)
6093
# ============================================================================
@@ -281,8 +314,7 @@ def phase2_generate_on_gpu():
281314
print(f" {name}: Generating portrait...")
282315
print(f" Prompt: {prompt[:70]}...")
283316

284-
best_frame = None
285-
best_seed = -1
317+
candidates = []
286318
for seed_offset in range(3): # 3 candidates
287319
seed = 42 + seed_offset
288320
output = pipeline.generate(
@@ -300,15 +332,17 @@ def phase2_generate_on_gpu():
300332
candidate = output.frames[mid]
301333
candidate_path = PORTRAIT_DIR / f"{name}_candidate_{seed_offset}.png"
302334
candidate.save(str(candidate_path))
303-
print(f" Candidate {seed_offset} (seed={output.seed}): {candidate_path}")
304-
if best_frame is None:
305-
best_frame = candidate
306-
best_seed = output.seed
307-
308-
if best_frame:
335+
score = _score_portrait(candidate)
336+
candidates.append((candidate, output.seed, score))
337+
print(f" Candidate {seed_offset} (seed={output.seed}): score={score:.2f}")
338+
339+
if candidates:
340+
# Pick the candidate with highest quality score
341+
candidates.sort(key=lambda x: x[2], reverse=True)
342+
best_frame, best_seed, best_score = candidates[0]
309343
best_frame.save(str(portrait_path))
310344
portrait_results[name] = str(portrait_path)
311-
print(f" → Default: {portrait_path} (seed={best_seed})")
345+
print(f" → Best: {portrait_path} (seed={best_seed}, score={best_score:.2f})")
312346

313347
# --- Step 2.3: Reload as I2V pipeline for shot generation ---
314348
print("\n--- Step 2.3: Reloading pipeline in I2V mode ---")

0 commit comments

Comments
 (0)