@@ -464,8 +464,12 @@ export function findDelayLinearPhase(x: RawAudioData, y: RawAudioData, freq_band
464464 const x_mono = x . samples ;
465465 const y_mono = y . samples ;
466466
467+ // For codec delay detection, we want to look at the first few seconds
468+ // where the codec delays are most apparent
469+ const INITIAL_WINDOW = x . sampleRate * 5 ; // 5 seconds for initial codec delay detection
470+
467471 // Ensure equal-sized chunks for initial processing
468- const minLen = Math . min ( x_mono . length , y_mono . length ) ;
472+ const minLen = Math . min ( x_mono . length , y_mono . length , INITIAL_WINDOW ) ;
469473 const x_slice = x_mono . subarray ( 0 , minLen ) ;
470474 const y_slice = y_mono . subarray ( 0 , minLen ) ;
471475
@@ -476,43 +480,24 @@ export function findDelayLinearPhase(x: RawAudioData, y: RawAudioData, freq_band
476480 const delays : number [ ] = [ ] ;
477481 const weights : number [ ] = [ ] ;
478482
479- // Max length for individual band signals when performing cross-correlation
480- // 2 seconds is usually sufficient for finding codec/metadata delays
481- const MAX_LEN_XCORR_BAND = x . sampleRate * 2 ; // 2 seconds at signal's sample rate
482-
483483 // Process each frequency band
484484 for ( let i = 0 ; i < freq_bands ; i ++ ) {
485- const xb_full = x_bands [ i ] ;
486- const yb_full = y_bands [ i ] ;
487-
488- // For cross-correlation, we can use a shorter window to focus on transients
489- // but we'll use the full signal for coherence calculation
490- let xb_for_xcorr = xb_full ;
491- let yb_for_xcorr = yb_full ;
492-
493- if ( xb_full . length > MAX_LEN_XCORR_BAND ) {
494- xb_for_xcorr = xb_full . subarray ( 0 , MAX_LEN_XCORR_BAND ) ;
495- }
496- if ( yb_full . length > MAX_LEN_XCORR_BAND ) {
497- yb_for_xcorr = yb_full . subarray ( 0 , MAX_LEN_XCORR_BAND ) ;
498- }
499-
500- // Ensure they are the same length for cross-correlation
501- const commonLenXcorr = Math . min ( xb_for_xcorr . length , yb_for_xcorr . length ) ;
502- if ( commonLenXcorr === 0 ) {
503- console . log ( `Band ${ i + 1 } /${ freq_bands } : Skipped due to zero length for xcorr.` ) ;
504- delays . push ( 0 ) ;
505- weights . push ( 0 ) ;
506- continue ;
507- }
485+ const xb = x_bands [ i ] ;
486+ const yb = y_bands [ i ] ;
508487
509488 // Calculate cross-correlation using FFT
510- const correlationResult = calculateCrossCorrelation ( xb_for_xcorr , yb_for_xcorr ) ;
489+ const correlationResult = calculateCrossCorrelation ( xb , yb ) ;
511490
512491 // Find the delay that maximizes correlation
513492 let maxIdx = 0 ;
514493 let maxVal = - Infinity ;
515- for ( let j = 0 ; j < correlationResult . length ; j ++ ) {
494+
495+ // Focus on the center portion of the correlation result
496+ // This helps avoid false matches at the edges
497+ const centerStart = Math . floor ( correlationResult . length * 0.25 ) ;
498+ const centerEnd = Math . floor ( correlationResult . length * 0.75 ) ;
499+
500+ for ( let j = centerStart ; j < centerEnd ; j ++ ) {
516501 if ( correlationResult [ j ] > maxVal ) {
517502 maxVal = correlationResult [ j ] ;
518503 maxIdx = j ;
@@ -523,8 +508,8 @@ export function findDelayLinearPhase(x: RawAudioData, y: RawAudioData, freq_band
523508 const mid = Math . floor ( correlationResult . length / 2 ) ;
524509 const delay = maxIdx - mid ;
525510
526- // Calculate phase coherence using the full band signals for better accuracy
527- const coherence = getPhaseCoherence ( xb_full , yb_full ) ;
511+ // Calculate phase coherence for weighting
512+ const coherence = getPhaseCoherence ( xb , yb ) ;
528513
529514 delays . push ( delay ) ;
530515 weights . push ( coherence ) ;
@@ -548,8 +533,12 @@ export function findDelayLinearPhase(x: RawAudioData, y: RawAudioData, freq_band
548533
549534 const finalDelayUncapped = Math . round ( weightedSum / weightSum ) ;
550535
551- // Cap the final delay to a reasonable maximum (e.g., 2 seconds) for codec/metadata delays
552- const MAX_EXPECTED_DELAY_SAMPLES = x . sampleRate * 2 ; // 2 seconds
536+ // Cap the final delay to a reasonable maximum for codec delays
537+ // Different codecs have different typical delays:
538+ // - MP3: ~576 samples
539+ // - AAC/M4A: ~1024 samples
540+ // - Opus: ~120 samples
541+ const MAX_EXPECTED_DELAY_SAMPLES = x . sampleRate * 0.1 ; // 100ms should cover most codec delays
553542 let finalDelay = finalDelayUncapped ;
554543 if ( Math . abs ( finalDelayUncapped ) > MAX_EXPECTED_DELAY_SAMPLES ) {
555544 console . warn ( `Calculated delay (${ finalDelayUncapped } samples) exceeds max expected (${ MAX_EXPECTED_DELAY_SAMPLES } samples). Capping.` ) ;
0 commit comments