|
| 1 | +// Live Audio Transcription Example — Foundry Local JS SDK |
| 2 | +// |
| 3 | +// Demonstrates real-time microphone-to-text using the JS SDK. |
| 4 | +// Requires: npm install foundry-local-sdk naudiodon2 |
| 5 | +// |
| 6 | +// Usage: node app.js |
| 7 | + |
| 8 | +import { FoundryLocalManager } from 'foundry-local-sdk'; |
| 9 | + |
| 10 | +console.log('╔══════════════════════════════════════════════════════════╗'); |
| 11 | +console.log('║ Foundry Local — Live Audio Transcription (JS SDK) ║'); |
| 12 | +console.log('╚══════════════════════════════════════════════════════════╝'); |
| 13 | +console.log(); |
| 14 | + |
| 15 | +// Initialize the Foundry Local SDK |
| 16 | +console.log('Initializing Foundry Local SDK...'); |
| 17 | +const manager = FoundryLocalManager.create({ |
| 18 | + appName: 'foundry_local_live_audio', |
| 19 | + logLevel: 'info' |
| 20 | +}); |
| 21 | +console.log('✓ SDK initialized'); |
| 22 | + |
| 23 | +// Get and load the nemotron model |
| 24 | +const modelAlias = 'nemotron'; |
| 25 | +let model = await manager.catalog.getModel(modelAlias); |
| 26 | +if (!model) { |
| 27 | + console.error(`ERROR: Model "${modelAlias}" not found in catalog.`); |
| 28 | + process.exit(1); |
| 29 | +} |
| 30 | + |
| 31 | +console.log(`Found model: ${model.id}`); |
| 32 | +console.log('Downloading model (if needed)...'); |
| 33 | +await model.download((progress) => { |
| 34 | + process.stdout.write(`\rDownloading... ${progress.toFixed(2)}%`); |
| 35 | +}); |
| 36 | +console.log('\n✓ Model downloaded'); |
| 37 | + |
| 38 | +console.log('Loading model...'); |
| 39 | +await model.load(); |
| 40 | +console.log('✓ Model loaded'); |
| 41 | + |
| 42 | +// Create live transcription session |
| 43 | +const audioClient = model.createAudioClient(); |
| 44 | +const session = audioClient.createLiveTranscriptionSession(); |
| 45 | +session.settings.sampleRate = 16000; // Default is 16000; shown here for clarity |
| 46 | +session.settings.channels = 1; |
| 47 | +session.settings.bitsPerSample = 16; |
| 48 | +session.settings.language = 'en'; |
| 49 | + |
| 50 | +console.log('Starting streaming session...'); |
| 51 | +await session.start(); |
| 52 | +console.log('✓ Session started'); |
| 53 | + |
| 54 | +// Read transcription results in background |
| 55 | +const readPromise = (async () => { |
| 56 | + try { |
| 57 | + for await (const result of session.getTranscriptionStream()) { |
| 58 | + const text = result.content?.[0]?.text; |
| 59 | + if (result.is_final) { |
| 60 | + console.log(); |
| 61 | + console.log(` [FINAL] ${text}`); |
| 62 | + } else if (text) { |
| 63 | + process.stdout.write(text); |
| 64 | + } |
| 65 | + } |
| 66 | + } catch (err) { |
| 67 | + if (err.name !== 'AbortError') { |
| 68 | + console.error('Stream error:', err.message); |
| 69 | + } |
| 70 | + } |
| 71 | +})(); |
| 72 | + |
| 73 | +// --- Microphone capture --- |
| 74 | +// This example uses naudiodon2 for cross-platform audio capture. |
| 75 | +// Install with: npm install naudiodon2 |
| 76 | +// |
| 77 | +// If you prefer a different audio library, just push PCM bytes |
| 78 | +// (16-bit signed LE, mono, 16kHz) via session.append(). |
| 79 | + |
| 80 | +let audioInput; |
| 81 | +try { |
| 82 | + const { default: portAudio } = await import('naudiodon2'); |
| 83 | + |
| 84 | + audioInput = portAudio.AudioIO({ |
| 85 | + inOptions: { |
| 86 | + channelCount: session.settings.channels, |
| 87 | + sampleFormat: session.settings.bitsPerSample === 16 |
| 88 | + ? portAudio.SampleFormat16Bit |
| 89 | + : portAudio.SampleFormat32Bit, |
| 90 | + sampleRate: session.settings.sampleRate, |
| 91 | + framesPerBuffer: 1600, // 100ms chunks |
| 92 | + maxQueue: 15 // buffer during event-loop blocks from sync FFI calls |
| 93 | + } |
| 94 | + }); |
| 95 | + |
| 96 | + let appendPending = false; |
| 97 | + audioInput.on('data', (buffer) => { |
| 98 | + if (appendPending) return; // drop frame while backpressured |
| 99 | + const pcm = new Uint8Array(buffer); |
| 100 | + appendPending = true; |
| 101 | + session.append(pcm).then(() => { |
| 102 | + appendPending = false; |
| 103 | + }).catch((err) => { |
| 104 | + appendPending = false; |
| 105 | + console.error('append error:', err.message); |
| 106 | + }); |
| 107 | + }); |
| 108 | + |
| 109 | + console.log(); |
| 110 | + console.log('════════════════════════════════════════════════════════════'); |
| 111 | + console.log(' LIVE TRANSCRIPTION ACTIVE'); |
| 112 | + console.log(' Speak into your microphone.'); |
| 113 | + console.log(' Press Ctrl+C to stop.'); |
| 114 | + console.log('════════════════════════════════════════════════════════════'); |
| 115 | + console.log(); |
| 116 | + |
| 117 | + audioInput.start(); |
| 118 | +} catch (err) { |
| 119 | + console.warn('⚠ Could not initialize microphone (naudiodon2 may not be installed).'); |
| 120 | + console.warn(' Install with: npm install naudiodon2'); |
| 121 | + console.warn(' Falling back to synthetic audio test...'); |
| 122 | + console.warn(); |
| 123 | + |
| 124 | + // Fallback: push 2 seconds of synthetic PCM (440Hz sine wave) |
| 125 | + const sampleRate = session.settings.sampleRate; |
| 126 | + const duration = 2; |
| 127 | + const totalSamples = sampleRate * duration; |
| 128 | + const pcmBytes = new Uint8Array(totalSamples * 2); |
| 129 | + for (let i = 0; i < totalSamples; i++) { |
| 130 | + const t = i / sampleRate; |
| 131 | + const sample = Math.round(32767 * 0.5 * Math.sin(2 * Math.PI * 440 * t)); |
| 132 | + pcmBytes[i * 2] = sample & 0xFF; |
| 133 | + pcmBytes[i * 2 + 1] = (sample >> 8) & 0xFF; |
| 134 | + } |
| 135 | + |
| 136 | + // Push in 100ms chunks |
| 137 | + const chunkSize = (sampleRate / 10) * 2; |
| 138 | + for (let offset = 0; offset < pcmBytes.length; offset += chunkSize) { |
| 139 | + const len = Math.min(chunkSize, pcmBytes.length - offset); |
| 140 | + await session.append(pcmBytes.slice(offset, offset + len)); |
| 141 | + } |
| 142 | + |
| 143 | + console.log('✓ Synthetic audio pushed'); |
| 144 | +} |
| 145 | + |
| 146 | +// Handle graceful shutdown |
| 147 | +process.on('SIGINT', async () => { |
| 148 | + console.log('\n\nStopping...'); |
| 149 | + if (audioInput) { |
| 150 | + audioInput.quit(); |
| 151 | + } |
| 152 | + await session.stop(); |
| 153 | + await readPromise; |
| 154 | + await model.unload(); |
| 155 | + console.log('✓ Done'); |
| 156 | + process.exit(0); |
| 157 | +}); |
0 commit comments