BasedHardware · beastoin · Mar 6, 2026 · Mar 6, 2026 · Mar 6, 2026 · Mar 6, 2026
diff --git a/desktop/.env.example b/desktop/.env.example
@@ -17,9 +17,6 @@
 # Production: https://api.omi.me
 OMI_API_URL=http://localhost:8080
 
-# DeepGram API key — required for real-time transcription
-DEEPGRAM_API_KEY=
-
 # ─── AI (optional) ──────────────────────────────────────────────────
 # Gemini API key for proactive assistants and embeddings
 # Falls back to backend-side processing if not set

diff --git a/desktop/CHANGELOG.json b/desktop/CHANGELOG.json
@@ -1,5 +1,7 @@
 {
-  "unreleased": [],
+  "unreleased": [
+    "Removed client-side Deepgram API key — transcription now routes securely through the Omi backend"
+  ],
   "releases": [
     {
       "version": "0.11.90",

diff --git a/desktop/Desktop/Sources/AppState.swift b/desktop/Desktop/Sources/AppState.swift
diff --git a/desktop/Desktop/Sources/Audio/AudioSourceManager.swift b/desktop/Desktop/Sources/Audio/AudioSourceManager.swift
@@ -301,7 +301,6 @@ final class AudioSourceManager: ObservableObject {
         // Start BLE audio processing with direct audio callback and WAL recording
         await bleAudioService.startProcessing(
             from: connection,
-            transcriptionService: nil,  // We'll handle routing ourselves
             audioDataHandler: { [weak self] pcmData in
                 // Convert decoded PCM mono to stereo and forward
                 self?.handleBleAudio(pcmData)

diff --git a/desktop/Desktop/Sources/Audio/BleAudioService.swift b/desktop/Desktop/Sources/Audio/BleAudioService.swift
@@ -27,7 +27,7 @@ final class BleAudioService: ObservableObject {
     private var cancellables = Set<AnyCancellable>()
 
     // Audio delivery
-    private var transcriptionService: TranscriptionService?
+    private var audioSink: ((Data) -> Void)?
     private var audioDataHandler: ((Data) -> Void)?
     private var rawFrameHandler: ((Data) -> Void)?
 
@@ -44,12 +44,12 @@ final class BleAudioService: ObservableObject {
     /// Start processing audio from a device connection
     /// - Parameters:
     ///   - connection: The device connection to get audio from
-    ///   - transcriptionService: Optional transcription service to send audio to
+    ///   - audioSink: Optional closure to receive decoded mono PCM audio (e.g., send to transcription service)
     ///   - audioDataHandler: Optional handler for decoded PCM data (alternative to transcription)
     ///   - rawFrameHandler: Optional handler for raw encoded frames (for WAL recording)
     func startProcessing(
         from connection: DeviceConnection,
-        transcriptionService: TranscriptionService? = nil,
+        audioSink: ((Data) -> Void)? = nil,
         audioDataHandler: ((Data) -> Void)? = nil,
         rawFrameHandler: ((Data) -> Void)? = nil
     ) async {
@@ -58,7 +58,7 @@ final class BleAudioService: ObservableObject {
             return
         }
 
-        self.transcriptionService = transcriptionService
+        self.audioSink = audioSink
         self.audioDataHandler = audioDataHandler
         self.rawFrameHandler = rawFrameHandler
 
@@ -126,7 +126,7 @@ final class BleAudioService: ObservableObject {
         cancellables.removeAll()
 
         isProcessing = false
-        transcriptionService = nil
+        audioSink = nil
         audioDataHandler = nil
         rawFrameHandler = nil
 
@@ -194,37 +194,13 @@ final class BleAudioService: ObservableObject {
         // Calculate audio level
         updateAudioLevel(from: pcmData)
 
-        // Send to transcription service (mono channel)
-        if let transcription = transcriptionService {
-            // TranscriptionService expects stereo (2 channels) for multichannel transcription
-            // For BLE device audio, we duplicate to both channels (device is the "user")
-            let stereoData = convertToStereo(pcmData)
-            transcription.sendAudio(stereoData)
-        }
+        // Send decoded mono PCM to audio sink (e.g., transcription service)
+        audioSink?(pcmData)
 
         // Send to custom handler
         audioDataHandler?(pcmData)
     }
 
-    /// Convert mono PCM to stereo (duplicate to both channels)
-    private func convertToStereo(_ monoData: Data) -> Data {
-        // Mono: [S0, S1, S2, ...]
-        // Stereo: [S0, S0, S1, S1, S2, S2, ...] (interleaved)
-        var stereoData = Data(capacity: monoData.count * 2)
-
-        monoData.withUnsafeBytes { bytes in
-            let samples = bytes.bindMemory(to: Int16.self)
-            for i in 0..<samples.count {
-                var sample = samples[i]
-                // Write same sample to both channels
-                stereoData.append(Data(bytes: &sample, count: 2))
-                stereoData.append(Data(bytes: &sample, count: 2))
-            }
-        }
-
-        return stereoData
-    }
-
     /// Calculate RMS audio level from PCM data
     private func updateAudioLevel(from data: Data) {
         var sumSquares: Float = 0

diff --git a/desktop/Desktop/Sources/AudioMixer.swift b/desktop/Desktop/Sources/AudioMixer.swift
@@ -1,19 +1,26 @@
 import Foundation
 
-/// Mixes microphone and system audio into a stereo stream for multichannel transcription
+/// Mixes microphone and system audio into a combined stream for transcription.
+/// Supports stereo (interleaved mic+system) or mono (averaged) output.
 /// Channel 0 (left) = Microphone (user)
 /// Channel 1 (right) = System audio (others)
 class AudioMixer {
 
     // MARK: - Types
 
-    /// Callback for receiving stereo audio chunks
+    enum OutputMode {
+        case stereo  // Interleaved [mic0, sys0, mic1, sys1, ...] — for Deepgram multichannel
+        case mono    // Averaged (mic + system) / 2 — for backend /v4/listen
+    }
+
+    /// Callback for receiving mixed audio chunks
     typealias StereoAudioHandler = (Data) -> Void
 
     // MARK: - Properties
 
     private var onStereoChunk: StereoAudioHandler?
     private var isRunning = false
+    private(set) var outputMode: OutputMode = .stereo
 
     // Audio buffers (16kHz mono Int16 PCM)
     private var micBuffer = Data()
@@ -29,15 +36,18 @@ class AudioMixer {
     // MARK: - Public Methods
 
     /// Start the mixer
-    /// - Parameter onStereoChunk: Callback receiving interleaved stereo 16-bit PCM at 16kHz
-    func start(onStereoChunk: @escaping StereoAudioHandler) {
+    /// - Parameters:
+    ///   - outputMode: `.stereo` for interleaved multichannel, `.mono` for averaged single-channel
+    ///   - onStereoChunk: Callback receiving mixed 16-bit PCM at 16kHz
+    func start(outputMode: OutputMode = .stereo, onStereoChunk: @escaping StereoAudioHandler) {
         bufferLock.lock()
+        self.outputMode = outputMode
         self.onStereoChunk = onStereoChunk
         self.isRunning = true
         micBuffer = Data()
         systemBuffer = Data()
         bufferLock.unlock()
-        log("AudioMixer: Started")
+        log("AudioMixer: Started (output=\(outputMode))")
     }
 
     /// Stop the mixer and flush remaining audio
@@ -105,12 +115,17 @@ class AudioMixer {
         if flush {
             // When flushing, process whatever is available
             bytesToProcess = max(micBuffer.count, systemBuffer.count)
+        } else if micBuffer.count >= minBufferBytes && systemBuffer.count >= minBufferBytes {
+            // Both buffers have data — use shorter to stay in sync
+            bytesToProcess = (min(micBuffer.count, systemBuffer.count) / 2) * 2
+        } else if micBuffer.count >= minBufferBytes {
+            // Only mic has data (system audio disabled/unavailable) — pad system with silence
+            bytesToProcess = (micBuffer.count / 2) * 2
+        } else if systemBuffer.count >= minBufferBytes {
+            // Only system has data — pad mic with silence
+            bytesToProcess = (systemBuffer.count / 2) * 2
         } else {
-            // Normal operation: process when both have data
-            let minAvailable = min(micBuffer.count, systemBuffer.count)
-            guard minAvailable >= minBufferBytes else { return }
-            // Align to sample boundary (2 bytes per Int16 sample)
-            bytesToProcess = (minAvailable / 2) * 2
+            return
         }
 
         guard bytesToProcess >= 2 else { return }
@@ -137,11 +152,17 @@ class AudioMixer {
             systemBuffer = Data()
         }
 
-        // Interleave into stereo
-        let stereoData = interleave(mic: micData, system: sysData)
+        // Mix according to output mode
+        let mixedData: Data
+        switch outputMode {
+        case .stereo:
+            mixedData = interleave(mic: micData, system: sysData)
+        case .mono:
+            mixedData = mixToMono(mic: micData, system: sysData)
+        }
 
         // Send to callback
-        onStereoChunk?(stereoData)
+        onStereoChunk?(mixedData)
     }
 
     /// Interleave two mono Int16 streams into stereo
@@ -174,4 +195,32 @@ class AudioMixer {
             Data(buffer: buffer)
         }
     }
+
+    /// Average two mono Int16 streams into a single mono stream
+    /// Output format: [(mic0+sys0)/2, (mic1+sys1)/2, ...]
+    private func mixToMono(mic: Data, system: Data) -> Data {
+        let sampleCount = mic.count / 2
+
+        var monoSamples = [Int16]()
+        monoSamples.reserveCapacity(sampleCount)
+
+        mic.withUnsafeBytes { micPtr in
+            system.withUnsafeBytes { sysPtr in
+                let micSamples = micPtr.bindMemory(to: Int16.self)
+                let sysSamples = sysPtr.bindMemory(to: Int16.self)
+
+                for i in 0..<sampleCount {
+                    let micSample = Int32(i < micSamples.count ? micSamples[i] : 0)
+                    let sysSample = Int32(i < sysSamples.count ? sysSamples[i] : 0)
+                    // Average and clamp to Int16 range
+                    let mixed = (micSample + sysSample) / 2
+                    monoSamples.append(Int16(clamping: mixed))
+                }
+            }
+        }
+
+        return monoSamples.withUnsafeBufferPointer { buffer in
+            Data(buffer: buffer)
+        }
+    }
 }