5555BGM_FILENAME = "PeriTune-Wuxia3.mp3"
5656
5757
58+ def _score_portrait (img ):
59+ """Score portrait quality (higher = better). Detects NaN/VAE corruption.
60+
61+ Uses multiple heuristics since VAE corruption can be statistically subtle:
62+ 1. Row-to-row pixel jumps (banding artifacts)
63+ 2. Inter-channel edge correlation (color fringing detection)
64+ 3. Channel correlation (RGB decorrelation from NaN)
65+ """
66+ import numpy as np
67+ arr = np .array (img , dtype = np .float32 )
68+
69+ # 1. Row smoothness (lower row_jumps = smoother = better)
70+ row_jumps = np .abs (arr [1 :] - arr [:- 1 ]).mean ()
71+
72+ # 2. Inter-channel Laplacian correlation (edges should be aligned across RGB)
73+ laps = []
74+ for c in range (3 ):
75+ ch = arr [:, :, c ]
76+ lap = ch [2 :, 1 :- 1 ] + ch [:- 2 , 1 :- 1 ] + ch [1 :- 1 , 2 :] + ch [1 :- 1 , :- 2 ] - 4 * ch [1 :- 1 , 1 :- 1 ]
77+ laps .append (lap .flatten ())
78+ rg_corr = np .corrcoef (laps [0 ], laps [1 ])[0 , 1 ]
79+ rb_corr = np .corrcoef (laps [0 ], laps [2 ])[0 , 1 ]
80+ edge_corr = (rg_corr + rb_corr ) / 2 # higher = better
81+
82+ # 3. Channel correlation (RGB should be correlated in natural images)
83+ r , g , b = arr [:, :, 0 ].flatten (), arr [:, :, 1 ].flatten (), arr [:, :, 2 ].flatten ()
84+ ch_corr = (np .corrcoef (r , g )[0 , 1 ] + np .corrcoef (r , b )[0 , 1 ]) / 2
85+
86+ # Combined score: weighted sum (all terms: higher = better)
87+ score = - row_jumps + 10 * edge_corr + 5 * ch_corr
88+ return score
89+
90+
5891# ============================================================================
5992# Phase 1: Local Preparation (Mac MPS)
6093# ============================================================================
@@ -281,8 +314,7 @@ def phase2_generate_on_gpu():
281314 print (f" { name } : Generating portrait..." )
282315 print (f" Prompt: { prompt [:70 ]} ..." )
283316
284- best_frame = None
285- best_seed = - 1
317+ candidates = []
286318 for seed_offset in range (3 ): # 3 candidates
287319 seed = 42 + seed_offset
288320 output = pipeline .generate (
@@ -300,15 +332,17 @@ def phase2_generate_on_gpu():
300332 candidate = output .frames [mid ]
301333 candidate_path = PORTRAIT_DIR / f"{ name } _candidate_{ seed_offset } .png"
302334 candidate .save (str (candidate_path ))
303- print (f" Candidate { seed_offset } (seed={ output .seed } ): { candidate_path } " )
304- if best_frame is None :
305- best_frame = candidate
306- best_seed = output .seed
307-
308- if best_frame :
335+ score = _score_portrait (candidate )
336+ candidates .append ((candidate , output .seed , score ))
337+ print (f" Candidate { seed_offset } (seed={ output .seed } ): score={ score :.2f} " )
338+
339+ if candidates :
340+ # Pick the candidate with highest quality score
341+ candidates .sort (key = lambda x : x [2 ], reverse = True )
342+ best_frame , best_seed , best_score = candidates [0 ]
309343 best_frame .save (str (portrait_path ))
310344 portrait_results [name ] = str (portrait_path )
311- print (f" → Default : { portrait_path } (seed={ best_seed } )" )
345+ print (f" → Best : { portrait_path } (seed={ best_seed } , score= { best_score :.2f } )" )
312346
313347 # --- Step 2.3: Reload as I2V pipeline for shot generation ---
314348 print ("\n --- Step 2.3: Reloading pipeline in I2V mode ---" )
0 commit comments