Skip to content

Commit c8339ef

Browse files
committed
fix: i LOVE dsp
1 parent 1dae7aa commit c8339ef

File tree

1 file changed

+72
-40
lines changed

1 file changed

+72
-40
lines changed

src/lib/audioProcessor.ts

Lines changed: 72 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -139,53 +139,59 @@ export async function loadAudio(
139139
target_sr?: number
140140
): Promise<RawAudioData> {
141141
console.log(`Loading audio file: ${file.name}`);
142-
const tempInputFilename = `input_${Date.now()}_${file.name}`;
143-
const tempOutputFilename = `output_${Date.now()}.pcm_f32le`; // PCM float 32-bit little-endian
142+
const uniqueId = Date.now();
143+
const tempInputFilename = `input_${uniqueId}_${file.name}`;
144+
const tempDecodedPcmFilename = `decoded_${uniqueId}.pcm_f32le`;
145+
const tempTrimmedPcmFilename = `trimmed_${uniqueId}.pcm_f32le`;
144146

145147
await ffmpeg.writeFile(tempInputFilename, await fetchFile(file));
146148

147-
const command: string[] = ['-i', tempInputFilename];
149+
// Step 1: Decode to raw PCM (f32le)
150+
const decodeCommand: string[] = ['-i', tempInputFilename];
148151
if (target_sr) {
149-
command.push('-ar', target_sr.toString());
152+
decodeCommand.push('-ar', target_sr.toString());
150153
}
151-
// Add format specific commands for PCM f32le output
152-
// -f s16le (16-bit signed little-endian)
153-
// -f f32le (32-bit float little-endian)
154-
// -acodec pcm_s16le / pcm_f32le
155-
command.push('-f', 'f32le', '-acodec', 'pcm_f32le', tempOutputFilename);
154+
// Decode to pcm_f32le, always set 2 channels for intermediate processing uniformity if original is mono
155+
// The final mix will handle channel layout if needed, but internal processing benefits from consistent channel count.
156+
decodeCommand.push('-ac', '2', '-f', 'f32le', '-acodec', 'pcm_f32le', tempDecodedPcmFilename);
156157

157-
console.log('Executing FFmpeg command:', command.join(' '));
158-
await ffmpeg.exec(command);
158+
console.log('Executing FFmpeg decode command:', decodeCommand.join(' '));
159+
await ffmpeg.exec(decodeCommand);
159160

160-
const data = await ffmpeg.readFile(tempOutputFilename);
161-
162-
// TODO: Need to get sampleRate and channels from ffmpeg.probe or ffprobe command
163-
// For now, assuming we know it or can derive it. If target_sr is set, use it.
164-
// FFmpeg typically outputs raw PCM without a header, so metadata needs to be known.
165-
// One way is to run an ffprobe command first, or parse ffmpeg output logs if they contain this info.
166-
const sampleRate = target_sr || 48000; // Placeholder, MUST BE DETERMINED
167-
const channels = 2; // Placeholder, MUST BE DETERMINED
161+
// Determine actual sample rate and channels *after* decoding (and potential resampling)
162+
// This is crucial for the silenceremove filter and subsequent processing.
163+
// For now, if target_sr is set, we use it. Otherwise, we need to probe.
164+
// Ideally, ffprobe would be used here on tempInputFilename if target_sr is not given.
165+
// Let's assume target_sr is always provided for now, or default to a common rate.
166+
const currentSampleRate = target_sr || 48000; // Fallback, but should be derived
167+
const currentChannels = 2; // We forced stereo output in the decode step
168168

169-
// A more robust way would be to use ffprobe (if available as a separate command or via ffmpeg.wasm complex commands)
170-
// or parse ffmpeg's stderr output for stream information.
171-
// For now, we'll use a placeholder. This needs to be addressed for correctness.
172-
// Example of how one might try to get info (conceptual):
173-
// await ffmpeg.exec(['-i', tempInputFilename, '-hide_banner', '-f', 'null', '-']); // This prints info to logs
174-
// Then parse logs for sample_rate and channels from the input stream.
175-
// The output sample rate would be target_sr if specified.
169+
// Step 2: Apply silenceremove to the decoded PCM
170+
// Parameters for silenceremove:
171+
// start_periods=1: Detect silence at the start.
172+
// start_duration=0.02: Minimum duration of 20ms silence to be removed.
173+
// start_threshold=-50dB: Silence threshold.
174+
// These might need tuning.
175+
const silenceRemoveCommand: string[] = [
176+
'-f', 'f32le', '-ar', currentSampleRate.toString(), '-ac', currentChannels.toString(), '-i', tempDecodedPcmFilename,
177+
'-af', 'silenceremove=start_periods=1:start_duration=0.02:start_threshold=-50dB',
178+
'-f', 'f32le', '-ar', currentSampleRate.toString(), '-ac', currentChannels.toString(), '-acodec', 'pcm_f32le', tempTrimmedPcmFilename
179+
];
176180

177-
// If target_sr IS NOT set, we need to find the original sample rate.
178-
// This is a CRITICAL part to implement correctly.
179-
// For now, we assume the calling code will handle or know the sample rate.
181+
console.log('Executing FFmpeg silenceremove command:', silenceRemoveCommand.join(' '));
182+
await ffmpeg.exec(silenceRemoveCommand);
180183

184+
const data = await ffmpeg.readFile(tempTrimmedPcmFilename);
185+
181186
// Clean up temporary files in virtual FS
182187
await ffmpeg.deleteFile(tempInputFilename);
183-
await ffmpeg.deleteFile(tempOutputFilename);
188+
await ffmpeg.deleteFile(tempDecodedPcmFilename);
189+
await ffmpeg.deleteFile(tempTrimmedPcmFilename);
184190

185191
return {
186192
samples: new Float32Array((data as Uint8Array).buffer),
187-
sampleRate,
188-
channels,
193+
sampleRate: currentSampleRate, // Use the rate confirmed/set during decoding
194+
channels: currentChannels, // Use the channels confirmed/set during decoding
189195
};
190196
}
191197

@@ -737,22 +743,48 @@ export async function mixAndNormalize(
737743
// Check phase coherence AFTER alignment (this is critical to verify alignment quality)
738744
await reportProgress("Checking post-alignment coherence...", 40);
739745

740-
const weights: number[] = [1.0]; // Start with reference weight (always 1.0)
741-
console.log("\nPhase coherence after alignment:");
746+
// Calculate RMS levels for each track to help with mixing weights
747+
const calculateRMS = (samples: Float32Array): number => {
748+
let sum = 0;
749+
for (let i = 0; i < samples.length; i++) {
750+
sum += samples[i] * samples[i];
751+
}
752+
return Math.sqrt(sum / samples.length);
753+
};
754+
755+
const refRMS = calculateRMS(ref.samples);
756+
console.log(`Reference track RMS: ${refRMS.toFixed(4)}`);
757+
758+
// Start with reference weight (always 1.0)
759+
const weights: number[] = [1.0];
760+
console.log("\nPhase coherence and RMS after alignment:");
742761

743762
for (let i = 0; i < alignedTracks.length; i++) {
744763
const track = alignedTracks[i];
745764
const postAlignCoherence = getPhaseCoherence(ref.samples, track.samples);
746-
console.log(`Track ${i+1} post-alignment coherence: ${postAlignCoherence.toFixed(4)}`);
765+
const trackRMS = calculateRMS(track.samples);
766+
console.log(`Track ${i+1} post-alignment coherence: ${postAlignCoherence.toFixed(4)}, RMS: ${trackRMS.toFixed(4)}`);
767+
768+
// Calculate weight based on both coherence and RMS ratio
769+
let weight = postAlignCoherence;
747770

748-
// Only use tracks with decent coherence after alignment
749-
if (postAlignCoherence < 0.1) {
771+
// Adjust weight based on RMS ratio to prevent quiet tracks
772+
if (trackRMS > 0) {
773+
const rmsRatio = refRMS / trackRMS;
774+
// If the track is significantly quieter, boost its weight
775+
if (rmsRatio > 1.5) {
776+
weight *= Math.min(rmsRatio, 2.0); // Cap the boost at 2x
777+
}
778+
}
779+
780+
// Ensure minimum contribution
781+
if (weight < 0.1) {
750782
console.log(`Warning: Very low post-alignment coherence for track ${i+1}. Using minimal weight.`);
751-
weights.push(0.1); // Minimal contribution to avoid completely dropping it
752-
} else {
753-
weights.push(postAlignCoherence);
783+
weight = 0.1;
754784
}
755785

786+
weights.push(weight);
787+
756788
await reportProgress(`Checked aligned track ${i+1}/${alignedTracks.length}`, 40 + 10 * (i / alignedTracks.length));
757789
}
758790

0 commit comments

Comments
 (0)