fix: i LOVE dsp

modpotato · modpotato · commit c8339ef32374 · 2025-05-17T22:47:27.000-04:00
diff --git a/src/lib/audioProcessor.ts b/src/lib/audioProcessor.ts
@@ -139,53 +139,59 @@ export async function loadAudio(
     target_sr?: number
 ): Promise<RawAudioData> {
     console.log(`Loading audio file: ${file.name}`);
-    const tempInputFilename = `input_${Date.now()}_${file.name}`;
-    const tempOutputFilename = `output_${Date.now()}.pcm_f32le`; // PCM float 32-bit little-endian
+    const uniqueId = Date.now();
+    const tempInputFilename = `input_${uniqueId}_${file.name}`;
+    const tempDecodedPcmFilename = `decoded_${uniqueId}.pcm_f32le`;
+    const tempTrimmedPcmFilename = `trimmed_${uniqueId}.pcm_f32le`;
 
     await ffmpeg.writeFile(tempInputFilename, await fetchFile(file));
 
-    const command: string[] = ['-i', tempInputFilename];
+    // Step 1: Decode to raw PCM (f32le)
+    const decodeCommand: string[] = ['-i', tempInputFilename];
     if (target_sr) {
-        command.push('-ar', target_sr.toString());
+        decodeCommand.push('-ar', target_sr.toString());
     }
-    // Add format specific commands for PCM f32le output
-    // -f s16le (16-bit signed little-endian)
-    // -f f32le (32-bit float little-endian)
-    // -acodec pcm_s16le / pcm_f32le
-    command.push('-f', 'f32le', '-acodec', 'pcm_f32le', tempOutputFilename);
+    // Decode to pcm_f32le, always set 2 channels for intermediate processing uniformity if original is mono
+    // The final mix will handle channel layout if needed, but internal processing benefits from consistent channel count.
+    decodeCommand.push('-ac', '2', '-f', 'f32le', '-acodec', 'pcm_f32le', tempDecodedPcmFilename);
 
-    console.log('Executing FFmpeg command:', command.join(' '));
-    await ffmpeg.exec(command);
+    console.log('Executing FFmpeg decode command:', decodeCommand.join(' '));
+    await ffmpeg.exec(decodeCommand);
 
-    const data = await ffmpeg.readFile(tempOutputFilename);
-    
-    // TODO: Need to get sampleRate and channels from ffmpeg.probe or ffprobe command
-    // For now, assuming we know it or can derive it. If target_sr is set, use it.
-    // FFmpeg typically outputs raw PCM without a header, so metadata needs to be known.
-    // One way is to run an ffprobe command first, or parse ffmpeg output logs if they contain this info.
-    const sampleRate = target_sr || 48000; // Placeholder, MUST BE DETERMINED
-    const channels = 2; // Placeholder, MUST BE DETERMINED
+    // Determine actual sample rate and channels *after* decoding (and potential resampling)
+    // This is crucial for the silenceremove filter and subsequent processing.
+    // For now, if target_sr is set, we use it. Otherwise, we need to probe.
+    // Ideally, ffprobe would be used here on tempInputFilename if target_sr is not given.
+    // Let's assume target_sr is always provided for now, or default to a common rate.
+    const currentSampleRate = target_sr || 48000; // Fallback, but should be derived
+    const currentChannels = 2; // We forced stereo output in the decode step
 
-    // A more robust way would be to use ffprobe (if available as a separate command or via ffmpeg.wasm complex commands)
-    // or parse ffmpeg's stderr output for stream information.
-    // For now, we'll use a placeholder. This needs to be addressed for correctness.
-    // Example of how one might try to get info (conceptual):
-    // await ffmpeg.exec(['-i', tempInputFilename, '-hide_banner', '-f', 'null', '-']); // This prints info to logs
-    // Then parse logs for sample_rate and channels from the input stream. 
-    // The output sample rate would be target_sr if specified.
+    // Step 2: Apply silenceremove to the decoded PCM
+    // Parameters for silenceremove:
+    // start_periods=1: Detect silence at the start.
+    // start_duration=0.02: Minimum duration of 20ms silence to be removed.
+    // start_threshold=-50dB: Silence threshold.
+    // These might need tuning.
+    const silenceRemoveCommand: string[] = [
+        '-f', 'f32le', '-ar', currentSampleRate.toString(), '-ac', currentChannels.toString(), '-i', tempDecodedPcmFilename,
+        '-af', 'silenceremove=start_periods=1:start_duration=0.02:start_threshold=-50dB',
+        '-f', 'f32le', '-ar', currentSampleRate.toString(), '-ac', currentChannels.toString(), '-acodec', 'pcm_f32le', tempTrimmedPcmFilename
+    ];
 
-    // If target_sr IS NOT set, we need to find the original sample rate.
-    // This is a CRITICAL part to implement correctly.
-    // For now, we assume the calling code will handle or know the sample rate.
+    console.log('Executing FFmpeg silenceremove command:', silenceRemoveCommand.join(' '));
+    await ffmpeg.exec(silenceRemoveCommand);
 
+    const data = await ffmpeg.readFile(tempTrimmedPcmFilename);
+    
     // Clean up temporary files in virtual FS
     await ffmpeg.deleteFile(tempInputFilename);
-    await ffmpeg.deleteFile(tempOutputFilename);
+    await ffmpeg.deleteFile(tempDecodedPcmFilename);
+    await ffmpeg.deleteFile(tempTrimmedPcmFilename);
 
     return {
         samples: new Float32Array((data as Uint8Array).buffer),
-        sampleRate,
-        channels,
+        sampleRate: currentSampleRate, // Use the rate confirmed/set during decoding
+        channels: currentChannels,     // Use the channels confirmed/set during decoding
     };
 }
 
@@ -737,22 +743,48 @@ export async function mixAndNormalize(
   // Check phase coherence AFTER alignment (this is critical to verify alignment quality)
   await reportProgress("Checking post-alignment coherence...", 40);
   
-  const weights: number[] = [1.0]; // Start with reference weight (always 1.0)
-  console.log("\nPhase coherence after alignment:");
+  // Calculate RMS levels for each track to help with mixing weights
+  const calculateRMS = (samples: Float32Array): number => {
+    let sum = 0;
+    for (let i = 0; i < samples.length; i++) {
+      sum += samples[i] * samples[i];
+    }
+    return Math.sqrt(sum / samples.length);
+  };
+
+  const refRMS = calculateRMS(ref.samples);
+  console.log(`Reference track RMS: ${refRMS.toFixed(4)}`);
+  
+  // Start with reference weight (always 1.0)
+  const weights: number[] = [1.0];
+  console.log("\nPhase coherence and RMS after alignment:");
   
   for (let i = 0; i < alignedTracks.length; i++) {
     const track = alignedTracks[i];
     const postAlignCoherence = getPhaseCoherence(ref.samples, track.samples);
-    console.log(`Track ${i+1} post-alignment coherence: ${postAlignCoherence.toFixed(4)}`);
+    const trackRMS = calculateRMS(track.samples);
+    console.log(`Track ${i+1} post-alignment coherence: ${postAlignCoherence.toFixed(4)}, RMS: ${trackRMS.toFixed(4)}`);
+    
+    // Calculate weight based on both coherence and RMS ratio
+    let weight = postAlignCoherence;
     
-    // Only use tracks with decent coherence after alignment
-    if (postAlignCoherence < 0.1) {
+    // Adjust weight based on RMS ratio to prevent quiet tracks
+    if (trackRMS > 0) {
+      const rmsRatio = refRMS / trackRMS;
+      // If the track is significantly quieter, boost its weight
+      if (rmsRatio > 1.5) {
+        weight *= Math.min(rmsRatio, 2.0); // Cap the boost at 2x
+      }
+    }
+    
+    // Ensure minimum contribution
+    if (weight < 0.1) {
       console.log(`Warning: Very low post-alignment coherence for track ${i+1}. Using minimal weight.`);
-      weights.push(0.1); // Minimal contribution to avoid completely dropping it
-    } else {
-      weights.push(postAlignCoherence);
+      weight = 0.1;
     }
     
+    weights.push(weight);
+    
     await reportProgress(`Checked aligned track ${i+1}/${alignedTracks.length}`, 40 + 10 * (i / alignedTracks.length));
   }