Speech to text new note type: integrate whisper-like client #7320
Replies: 3 comments
-
|
For now we are not planning on adding new note types, due to the considerable maintenance burden, however it's an interesting idea to be implemented as some kind of plugin. |
Beta Was this translation helpful? Give feedback.
-
|
@rmkni I created this little RenderNote combo, that uses an openai compatible API to create notes with the transcribed content.
Notes:
HTML Note <style>
.voice-recorder { max-width: 600px; margin: 20px auto; padding: 20px; }
.voice-recorder button { padding: 10px 20px; margin: 5px; }
.voice-recorder button:disabled { opacity: 0.5; cursor: not-allowed; }
.voice-recorder .message { padding: 10px; margin: 10px 0; border: 1px solid; }
.voice-recorder .success { background: #d4edda; color: #155724; border-color: #c3e6cb; }
.voice-recorder .error { background: #f8d7da; color: #721c24; border-color: #f5c6cb; }
.voice-recorder audio { width: 100%; margin: 10px 0; }
</style>
<div class="voice-recorder">
<button id="startBtn">🔴 Start Recording</button>
<button id="stopBtn" disabled>⏸️ Stop</button>
<button id="stopAndUploadBtn" disabled>✅ Stop & Upload</button>
<div id="timer">00:00</div>
<div id="audioPreview"></div>
<div id="message"></div>
</div>
<!-- voice-recorder.js is included via trilium note-child feature -->JS Frontend: // Installation:
// Create labels: `#OPENAI_API_BASE="https://url" #OPENAI_API_KEY=sk-abc`
const AUDIO_FORMAT = 'audio/webm';
let mediaRecorder, audioChunks = [], startTime, timerInterval, audioBlob;
let $, timer, audioPreview, message;
async function initVoiceRecorder() {
$ = (sel) => api.$container.find(sel);
[timer, audioPreview, message] = ['#timer', '#audioPreview', '#message'].map(s => $(s)[0]);
$('#startBtn').on('click', startRecording);
$('#stopBtn').on('click', stopRecording);
$('#stopAndUploadBtn').on('click', stopAndUpload);
}
const updateTimer = () => {
const elapsed = Date.now() - startTime;
const totalSeconds = Math.floor(elapsed / 1000);
const minutes = Math.floor(totalSeconds / 60);
const seconds = totalSeconds % 60;
let dots = '';
for (let i = 0; i < totalSeconds; i++) {
if (i > 0 && i % 30 === 0) {
dots += '<br />';
}
dots += '.';
}
timer.innerHTML = `${String(minutes).padStart(2, '0')}:${String(seconds).padStart(2, '0')} ${dots}`;
};
const showMessage = (text, type) => {
message.className = `message ${type}`;
message.innerHTML = text;
};
const hideMessage = () => message.className = message.innerHTML = '';
async function startRecording() {
try {
hideMessage();
audioPreview.innerHTML = '';
audioChunks = [];
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
mediaRecorder = new MediaRecorder(stream, MediaRecorder.isTypeSupported(AUDIO_FORMAT) ? { mimeType: AUDIO_FORMAT } : {});
if (!MediaRecorder.isTypeSupported(AUDIO_FORMAT)) {
showMessage(`Format ${AUDIO_FORMAT} not supported. Using default.`, 'error');
}
mediaRecorder.ondataavailable = e => audioChunks.push(e.data);
mediaRecorder.onstop = () => {
audioBlob = new Blob(audioChunks, { type: mediaRecorder.mimeType });
audioPreview.innerHTML = `
<h3>Recording Raw</h3>
<audio controls src="${URL.createObjectURL(audioBlob)}"></audio>
<p>Size: ${(audioBlob.size / 1024).toFixed(2)} KB | Format: WEBM</p>
<button id="uploadBtn">Upload to Trilium</button>
`;
$('#uploadBtn').on('click', transcribeAndSave);
};
mediaRecorder.start();
startTime = Date.now();
timerInterval = setInterval(updateTimer, 100);
setButtonStates(true, false, false, 'Recording...');
} catch (error) {
showMessage(`Error: ${error.message}`, 'error');
}
}
const setButtonStates = (start, stop, upload, statusText) => {
$('#startBtn')[0].disabled = start;
$('#stopBtn')[0].disabled = stop;
$('#stopAndUploadBtn')[0].disabled = upload;
};
function stopRecording() {
if (mediaRecorder?.state === 'recording') {
mediaRecorder.stop();
mediaRecorder.stream.getTracks().forEach(t => t.stop());
clearInterval(timerInterval);
setButtonStates(false, true, true, 'Recording stopped');
}
}
async function stopAndUpload() {
if (mediaRecorder?.state === 'recording') {
mediaRecorder.stop();
mediaRecorder.stream.getTracks().forEach(t => t.stop());
clearInterval(timerInterval);
setButtonStates(true, true, true, 'Processing...');
await new Promise(resolve => mediaRecorder.addEventListener('stop', resolve, { once: true }));
await new Promise(resolve => setTimeout(resolve, 100));
if (audioBlob) await transcribeAndSave();
}
}
async function transcribeAudio(audioBlob, openaiApiKey, openaiApiBase) {
const formData = new FormData();
formData.append('file', audioBlob, 'recording.webm');
formData.append('model', 'openai/whisper-1');
const res = await fetch(`${openaiApiBase}/v1/audio/transcriptions`, {
method: 'POST',
headers: { 'Authorization': `Bearer ${openaiApiKey}` },
body: formData
});
if (!res.ok) throw new Error(`Transcription failed: ${res.status} - ${await res.text()}`);
return (await res.json()).text;
}
async function createVoiceNote(transcription, parentNoteId = '') {
const recordedDate = new Date();
const timestamp = recordedDate.toISOString().replace(/[:.]/g, '-').slice(0, -5);
const noteTitle = `Voice Note ${recordedDate.toLocaleString()}`;
const noteContent = `<h2>Transcription</h2><p>${transcription}</p><hr><p><em>Recorded on ${recordedDate.toLocaleString()}</em></p>`;
const voiceNoteId = await api.runOnBackend((parentNoteId, noteTitle, noteContent) => {
return api.createTextNote(parentNoteId, noteTitle, noteContent).note.noteId;
}, [parentNoteId, noteTitle, noteContent]);
await api.waitUntilSynced();
return voiceNoteId;
}
async function transcribeAndSave() {
const openaiApiKey = api.currentNote.getLabelValue('OPENAI_API_KEY');
const openaiApiBase = api.currentNote.getLabelValue('OPENAI_API_BASE') || 'https://api.openai.com';
if (!openaiApiKey) {
showMessage('Please add label #OPENAI_API_KEY to this note with your API key.', 'error');
return;
}
const uploadBtn = $('#uploadBtn');
uploadBtn.prop('disabled', true).text('Transcribing...');
try {
const transcription = await transcribeAudio(audioBlob, openaiApiKey, openaiApiBase);
uploadBtn.text('Creating note...');
const dayNote = await api.getTodayNote();
const voiceNoteId = await createVoiceNote(transcription, dayNote.noteId);
await api.activateNewNote(voiceNoteId);
api.showMessage('Created voice note');
showMessage(
`Successfully transcribed and uploaded!<br><strong>Transcription:</strong> "${transcription.substring(0, 100)}${transcription.length > 100 ? '...' : ''}"`,
'success'
);
setTimeout(() => {
audioPreview.innerHTML = '';
timer.textContent = '00:00';
hideMessage();
}, 5000);
} catch (error) {
showMessage(`Upload failed: ${error.message}`, 'error');
uploadBtn.prop('disabled', false).text('Upload to Trilium');
}
}
initVoiceRecorder(); |
Beta Was this translation helpful? Give feedback.
-
|
A more convenient implementation: |
Beta Was this translation helpful? Give feedback.


Uh oh!
There was an error while loading. Please reload this page.
-
Describe feature
Revolutionize note-taking by introducing a novel note type that triggers a voice command, capturing audio input and leveraging APIs for transcription and AI-powered reformulation, streamlining the process and boosting productivity.
Proposition to reuse an existing project like Whispering https://github.com/epicenter-so/epicenter/tree/main/apps/whispering. This tool runs only in the browser locally and directly calls APIs. Could be easy to integrate?
Additional Information
No response
Beta Was this translation helpful? Give feedback.
All reactions