Skip to content

Commit b773741

Browse files
committed
feat: Add RMS-based silence detection and improve noise handling with new keywords and UI updates.
1 parent 7a9b838 commit b773741

File tree

1 file changed

+65
-5
lines changed

1 file changed

+65
-5
lines changed

application/src/main.js

Lines changed: 65 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,6 @@ function loadConfig() {
108108
};
109109

110110
console.log("MAIN: Store Path:", store.path);
111-
log.info("Loaded Config:", config);
112111
return config;
113112
}
114113

@@ -647,6 +646,44 @@ async function transcribe() {
647646
});
648647
}
649648

649+
function calculateRMS(buffer) {
650+
// WAV files have a 44-byte header. Skip it to read only audio samples.
651+
const WAV_HEADER_SIZE = 44;
652+
653+
// Convert Uint8Array (bytes) to Int16Array (samples)
654+
// We assume 16-bit audio (2 bytes per sample)
655+
const audioDataStart = WAV_HEADER_SIZE;
656+
const audioDataLength = buffer.length - WAV_HEADER_SIZE;
657+
658+
if (audioDataLength <= 0) {
659+
return 0; // No audio data
660+
}
661+
662+
const numSamples = Math.floor(audioDataLength / 2);
663+
const samples = new Int16Array(numSamples);
664+
665+
for (let i = 0; i < numSamples; i++) {
666+
// Little-endian conversion: byte[0] + (byte[1] << 8)
667+
const byteIndex = audioDataStart + i * 2;
668+
const byte1 = buffer[byteIndex];
669+
const byte2 = buffer[byteIndex + 1];
670+
// Use DataView or manual shift for correct signed 16-bit integer
671+
const val = byte1 | (byte2 << 8);
672+
// Handle the signed bit (if 16th bit is 1, it's negative)
673+
samples[i] = val >= 0x8000 ? val - 0x10000 : val;
674+
}
675+
676+
let sumSquares = 0;
677+
for (let i = 0; i < numSamples; i++) {
678+
sumSquares += samples[i] * samples[i];
679+
}
680+
681+
// Mean Square
682+
const meanSquare = sumSquares / numSamples;
683+
// Root Mean Square
684+
return Math.sqrt(meanSquare);
685+
}
686+
650687
// ============================================================================
651688
// DIRECT TEXT INSERTION (no clipboard)
652689
// ============================================================================
@@ -739,6 +776,10 @@ async function handleRecordingComplete() {
739776
"(music)",
740777
"[silence]",
741778
"[silêncio]",
779+
"música",
780+
"music",
781+
"sous-titres",
782+
"subtitle",
742783
"...",
743784
];
744785

@@ -748,11 +789,9 @@ async function handleRecordingComplete() {
748789

749790
if (isNoise) {
750791
logToRenderer(`⚠️ Ruído ignorado: "${text}"`);
751-
mainWindow?.webContents.send("transcription", {
752-
text: "Não consegui ouvir. Tente novamente.",
753-
isNoise: true,
754-
});
792+
hideOverlay();
755793
// Do NOT copy to clipboard or show system notification
794+
// Do NOT send error text to renderer
756795
} else {
757796
// Valid transcription
758797
// console.log("📝 Transcription:", text);
@@ -991,6 +1030,27 @@ ipcMain.handle("transcribe-audio", async (event, audioDataArray) => {
9911030
`📥 Recebido áudio do renderer: ${audioDataArray.length} bytes`
9921031
);
9931032

1033+
if (audioDataArray.length < 1000) {
1034+
logToRenderer("⚠️ Áudio muito curto/vazio (zero). Cancelando.");
1035+
hideOverlay();
1036+
return false;
1037+
}
1038+
1039+
// Calculate RMS (Root Mean Square) to detect silence
1040+
const rms = calculateRMS(audioDataArray);
1041+
logToRenderer(`📊 Energia do áudio (RMS): ${rms.toFixed(2)}`);
1042+
1043+
// Threshold for silence - 1500 is aggressive enough to filter typical room noise
1044+
const SILENCE_THRESHOLD = 1500;
1045+
1046+
if (rms < SILENCE_THRESHOLD) {
1047+
logToRenderer(
1048+
`⚠️ Silêncio detectado (RMS < ${SILENCE_THRESHOLD}). Abortando.`
1049+
);
1050+
hideOverlay();
1051+
return false;
1052+
}
1053+
9941054
try {
9951055
// Convert array back to buffer
9961056
const audioData = new Uint8Array(audioDataArray);

0 commit comments

Comments
 (0)