@@ -139,53 +139,59 @@ export async function loadAudio(
139139 target_sr ?: number
140140) : Promise < RawAudioData > {
141141 console . log ( `Loading audio file: ${ file . name } ` ) ;
142- const tempInputFilename = `input_${ Date . now ( ) } _${ file . name } ` ;
143- const tempOutputFilename = `output_${ Date . now ( ) } .pcm_f32le` ; // PCM float 32-bit little-endian
142+ const uniqueId = Date . now ( ) ;
143+ const tempInputFilename = `input_${ uniqueId } _${ file . name } ` ;
144+ const tempDecodedPcmFilename = `decoded_${ uniqueId } .pcm_f32le` ;
145+ const tempTrimmedPcmFilename = `trimmed_${ uniqueId } .pcm_f32le` ;
144146
145147 await ffmpeg . writeFile ( tempInputFilename , await fetchFile ( file ) ) ;
146148
147- const command : string [ ] = [ '-i' , tempInputFilename ] ;
149+ // Step 1: Decode to raw PCM (f32le)
150+ const decodeCommand : string [ ] = [ '-i' , tempInputFilename ] ;
148151 if ( target_sr ) {
149- command . push ( '-ar' , target_sr . toString ( ) ) ;
152+ decodeCommand . push ( '-ar' , target_sr . toString ( ) ) ;
150153 }
151- // Add format specific commands for PCM f32le output
152- // -f s16le (16-bit signed little-endian)
153- // -f f32le (32-bit float little-endian)
154- // -acodec pcm_s16le / pcm_f32le
155- command . push ( '-f' , 'f32le' , '-acodec' , 'pcm_f32le' , tempOutputFilename ) ;
154+ // Decode to pcm_f32le, always set 2 channels for intermediate processing uniformity if original is mono
155+ // The final mix will handle channel layout if needed, but internal processing benefits from consistent channel count.
156+ decodeCommand . push ( '-ac' , '2' , '-f' , 'f32le' , '-acodec' , 'pcm_f32le' , tempDecodedPcmFilename ) ;
156157
157- console . log ( 'Executing FFmpeg command:' , command . join ( ' ' ) ) ;
158- await ffmpeg . exec ( command ) ;
158+ console . log ( 'Executing FFmpeg decode command:' , decodeCommand . join ( ' ' ) ) ;
159+ await ffmpeg . exec ( decodeCommand ) ;
159160
160- const data = await ffmpeg . readFile ( tempOutputFilename ) ;
161-
162- // TODO: Need to get sampleRate and channels from ffmpeg.probe or ffprobe command
163- // For now, assuming we know it or can derive it. If target_sr is set, use it.
164- // FFmpeg typically outputs raw PCM without a header, so metadata needs to be known.
165- // One way is to run an ffprobe command first, or parse ffmpeg output logs if they contain this info.
166- const sampleRate = target_sr || 48000 ; // Placeholder, MUST BE DETERMINED
167- const channels = 2 ; // Placeholder, MUST BE DETERMINED
161+ // Determine actual sample rate and channels *after* decoding (and potential resampling)
162+ // This is crucial for the silenceremove filter and subsequent processing.
163+ // For now, if target_sr is set, we use it. Otherwise, we need to probe.
164+ // Ideally, ffprobe would be used here on tempInputFilename if target_sr is not given.
165+ // Let's assume target_sr is always provided for now, or default to a common rate.
166+ const currentSampleRate = target_sr || 48000 ; // Fallback, but should be derived
167+ const currentChannels = 2 ; // We forced stereo output in the decode step
168168
169- // A more robust way would be to use ffprobe (if available as a separate command or via ffmpeg.wasm complex commands)
170- // or parse ffmpeg's stderr output for stream information.
171- // For now, we'll use a placeholder. This needs to be addressed for correctness.
172- // Example of how one might try to get info (conceptual):
173- // await ffmpeg.exec(['-i', tempInputFilename, '-hide_banner', '-f', 'null', '-']); // This prints info to logs
174- // Then parse logs for sample_rate and channels from the input stream.
175- // The output sample rate would be target_sr if specified.
169+ // Step 2: Apply silenceremove to the decoded PCM
170+ // Parameters for silenceremove:
171+ // start_periods=1: Detect silence at the start.
172+ // start_duration=0.02: Minimum duration of 20ms silence to be removed.
173+ // start_threshold=-50dB: Silence threshold.
174+ // These might need tuning.
175+ const silenceRemoveCommand : string [ ] = [
176+ '-f' , 'f32le' , '-ar' , currentSampleRate . toString ( ) , '-ac' , currentChannels . toString ( ) , '-i' , tempDecodedPcmFilename ,
177+ '-af' , 'silenceremove=start_periods=1:start_duration=0.02:start_threshold=-50dB' ,
178+ '-f' , 'f32le' , '-ar' , currentSampleRate . toString ( ) , '-ac' , currentChannels . toString ( ) , '-acodec' , 'pcm_f32le' , tempTrimmedPcmFilename
179+ ] ;
176180
177- // If target_sr IS NOT set, we need to find the original sample rate.
178- // This is a CRITICAL part to implement correctly.
179- // For now, we assume the calling code will handle or know the sample rate.
181+ console . log ( 'Executing FFmpeg silenceremove command:' , silenceRemoveCommand . join ( ' ' ) ) ;
182+ await ffmpeg . exec ( silenceRemoveCommand ) ;
180183
184+ const data = await ffmpeg . readFile ( tempTrimmedPcmFilename ) ;
185+
181186 // Clean up temporary files in virtual FS
182187 await ffmpeg . deleteFile ( tempInputFilename ) ;
183- await ffmpeg . deleteFile ( tempOutputFilename ) ;
188+ await ffmpeg . deleteFile ( tempDecodedPcmFilename ) ;
189+ await ffmpeg . deleteFile ( tempTrimmedPcmFilename ) ;
184190
185191 return {
186192 samples : new Float32Array ( ( data as Uint8Array ) . buffer ) ,
187- sampleRate,
188- channels,
193+ sampleRate : currentSampleRate , // Use the rate confirmed/set during decoding
194+ channels : currentChannels , // Use the channels confirmed/set during decoding
189195 } ;
190196}
191197
@@ -737,22 +743,48 @@ export async function mixAndNormalize(
737743 // Check phase coherence AFTER alignment (this is critical to verify alignment quality)
738744 await reportProgress ( "Checking post-alignment coherence..." , 40 ) ;
739745
740- const weights : number [ ] = [ 1.0 ] ; // Start with reference weight (always 1.0)
741- console . log ( "\nPhase coherence after alignment:" ) ;
746+ // Calculate RMS levels for each track to help with mixing weights
747+ const calculateRMS = ( samples : Float32Array ) : number => {
748+ let sum = 0 ;
749+ for ( let i = 0 ; i < samples . length ; i ++ ) {
750+ sum += samples [ i ] * samples [ i ] ;
751+ }
752+ return Math . sqrt ( sum / samples . length ) ;
753+ } ;
754+
755+ const refRMS = calculateRMS ( ref . samples ) ;
756+ console . log ( `Reference track RMS: ${ refRMS . toFixed ( 4 ) } ` ) ;
757+
758+ // Start with reference weight (always 1.0)
759+ const weights : number [ ] = [ 1.0 ] ;
760+ console . log ( "\nPhase coherence and RMS after alignment:" ) ;
742761
743762 for ( let i = 0 ; i < alignedTracks . length ; i ++ ) {
744763 const track = alignedTracks [ i ] ;
745764 const postAlignCoherence = getPhaseCoherence ( ref . samples , track . samples ) ;
746- console . log ( `Track ${ i + 1 } post-alignment coherence: ${ postAlignCoherence . toFixed ( 4 ) } ` ) ;
765+ const trackRMS = calculateRMS ( track . samples ) ;
766+ console . log ( `Track ${ i + 1 } post-alignment coherence: ${ postAlignCoherence . toFixed ( 4 ) } , RMS: ${ trackRMS . toFixed ( 4 ) } ` ) ;
767+
768+ // Calculate weight based on both coherence and RMS ratio
769+ let weight = postAlignCoherence ;
747770
748- // Only use tracks with decent coherence after alignment
749- if ( postAlignCoherence < 0.1 ) {
771+ // Adjust weight based on RMS ratio to prevent quiet tracks
772+ if ( trackRMS > 0 ) {
773+ const rmsRatio = refRMS / trackRMS ;
774+ // If the track is significantly quieter, boost its weight
775+ if ( rmsRatio > 1.5 ) {
776+ weight *= Math . min ( rmsRatio , 2.0 ) ; // Cap the boost at 2x
777+ }
778+ }
779+
780+ // Ensure minimum contribution
781+ if ( weight < 0.1 ) {
750782 console . log ( `Warning: Very low post-alignment coherence for track ${ i + 1 } . Using minimal weight.` ) ;
751- weights . push ( 0.1 ) ; // Minimal contribution to avoid completely dropping it
752- } else {
753- weights . push ( postAlignCoherence ) ;
783+ weight = 0.1 ;
754784 }
755785
786+ weights . push ( weight ) ;
787+
756788 await reportProgress ( `Checked aligned track ${ i + 1 } /${ alignedTracks . length } ` , 40 + 10 * ( i / alignedTracks . length ) ) ;
757789 }
758790
0 commit comments