Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions desktop/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,6 @@
# Production: https://api.omi.me
OMI_API_URL=http://localhost:8080

# DeepGram API key — required for real-time transcription
DEEPGRAM_API_KEY=

# ─── AI (optional) ──────────────────────────────────────────────────
# Gemini API key for proactive assistants and embeddings
# Falls back to backend-side processing if not set
Expand Down
4 changes: 3 additions & 1 deletion desktop/CHANGELOG.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
{
"unreleased": [],
"unreleased": [
"Removed client-side Deepgram API key — transcription now routes securely through the Omi backend"
],
"releases": [
{
"version": "0.11.90",
Expand Down
316 changes: 148 additions & 168 deletions desktop/Desktop/Sources/AppState.swift

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion desktop/Desktop/Sources/Audio/AudioSourceManager.swift
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,6 @@ final class AudioSourceManager: ObservableObject {
// Start BLE audio processing with direct audio callback and WAL recording
await bleAudioService.startProcessing(
from: connection,
transcriptionService: nil, // We'll handle routing ourselves
audioDataHandler: { [weak self] pcmData in
// Convert decoded PCM mono to stereo and forward
self?.handleBleAudio(pcmData)
Expand Down
38 changes: 7 additions & 31 deletions desktop/Desktop/Sources/Audio/BleAudioService.swift
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ final class BleAudioService: ObservableObject {
private var cancellables = Set<AnyCancellable>()

// Audio delivery
private var transcriptionService: TranscriptionService?
private var audioSink: ((Data) -> Void)?
private var audioDataHandler: ((Data) -> Void)?
private var rawFrameHandler: ((Data) -> Void)?

Expand All @@ -44,12 +44,12 @@ final class BleAudioService: ObservableObject {
/// Start processing audio from a device connection
/// - Parameters:
/// - connection: The device connection to get audio from
/// - transcriptionService: Optional transcription service to send audio to
/// - audioSink: Optional closure to receive decoded mono PCM audio (e.g., send to transcription service)
/// - audioDataHandler: Optional handler for decoded PCM data (alternative to transcription)
/// - rawFrameHandler: Optional handler for raw encoded frames (for WAL recording)
func startProcessing(
from connection: DeviceConnection,
transcriptionService: TranscriptionService? = nil,
audioSink: ((Data) -> Void)? = nil,
audioDataHandler: ((Data) -> Void)? = nil,
rawFrameHandler: ((Data) -> Void)? = nil
) async {
Expand All @@ -58,7 +58,7 @@ final class BleAudioService: ObservableObject {
return
}

self.transcriptionService = transcriptionService
self.audioSink = audioSink
self.audioDataHandler = audioDataHandler
self.rawFrameHandler = rawFrameHandler

Expand Down Expand Up @@ -126,7 +126,7 @@ final class BleAudioService: ObservableObject {
cancellables.removeAll()

isProcessing = false
transcriptionService = nil
audioSink = nil
audioDataHandler = nil
rawFrameHandler = nil

Expand Down Expand Up @@ -194,37 +194,13 @@ final class BleAudioService: ObservableObject {
// Calculate audio level
updateAudioLevel(from: pcmData)

// Send to transcription service (mono channel)
if let transcription = transcriptionService {
// TranscriptionService expects stereo (2 channels) for multichannel transcription
// For BLE device audio, we duplicate to both channels (device is the "user")
let stereoData = convertToStereo(pcmData)
transcription.sendAudio(stereoData)
}
// Send decoded mono PCM to audio sink (e.g., transcription service)
audioSink?(pcmData)

// Send to custom handler
audioDataHandler?(pcmData)
}

/// Convert mono PCM to stereo (duplicate to both channels)
private func convertToStereo(_ monoData: Data) -> Data {
// Mono: [S0, S1, S2, ...]
// Stereo: [S0, S0, S1, S1, S2, S2, ...] (interleaved)
var stereoData = Data(capacity: monoData.count * 2)

monoData.withUnsafeBytes { bytes in
let samples = bytes.bindMemory(to: Int16.self)
for i in 0..<samples.count {
var sample = samples[i]
// Write same sample to both channels
stereoData.append(Data(bytes: &sample, count: 2))
stereoData.append(Data(bytes: &sample, count: 2))
}
}

return stereoData
}

/// Calculate RMS audio level from PCM data
private func updateAudioLevel(from data: Data) {
var sumSquares: Float = 0
Expand Down
75 changes: 62 additions & 13 deletions desktop/Desktop/Sources/AudioMixer.swift
Original file line number Diff line number Diff line change
@@ -1,19 +1,26 @@
import Foundation

/// Mixes microphone and system audio into a stereo stream for multichannel transcription
/// Mixes microphone and system audio into a combined stream for transcription.
/// Supports stereo (interleaved mic+system) or mono (averaged) output.
/// Channel 0 (left) = Microphone (user)
/// Channel 1 (right) = System audio (others)
class AudioMixer {

// MARK: - Types

/// Callback for receiving stereo audio chunks
enum OutputMode {
case stereo // Interleaved [mic0, sys0, mic1, sys1, ...] — for Deepgram multichannel
case mono // Averaged (mic + system) / 2 — for backend /v4/listen
}

/// Callback for receiving mixed audio chunks
typealias StereoAudioHandler = (Data) -> Void

// MARK: - Properties

private var onStereoChunk: StereoAudioHandler?
private var isRunning = false
private(set) var outputMode: OutputMode = .stereo

// Audio buffers (16kHz mono Int16 PCM)
private var micBuffer = Data()
Expand All @@ -29,15 +36,18 @@ class AudioMixer {
// MARK: - Public Methods

/// Start the mixer
/// - Parameter onStereoChunk: Callback receiving interleaved stereo 16-bit PCM at 16kHz
func start(onStereoChunk: @escaping StereoAudioHandler) {
/// - Parameters:
/// - outputMode: `.stereo` for interleaved multichannel, `.mono` for averaged single-channel
/// - onStereoChunk: Callback receiving mixed 16-bit PCM at 16kHz
func start(outputMode: OutputMode = .stereo, onStereoChunk: @escaping StereoAudioHandler) {
bufferLock.lock()
self.outputMode = outputMode
self.onStereoChunk = onStereoChunk
self.isRunning = true
micBuffer = Data()
systemBuffer = Data()
bufferLock.unlock()
log("AudioMixer: Started")
log("AudioMixer: Started (output=\(outputMode))")
}

/// Stop the mixer and flush remaining audio
Expand Down Expand Up @@ -105,12 +115,17 @@ class AudioMixer {
if flush {
// When flushing, process whatever is available
bytesToProcess = max(micBuffer.count, systemBuffer.count)
} else if micBuffer.count >= minBufferBytes && systemBuffer.count >= minBufferBytes {
// Both buffers have data — use shorter to stay in sync
bytesToProcess = (min(micBuffer.count, systemBuffer.count) / 2) * 2
} else if micBuffer.count >= minBufferBytes {
// Only mic has data (system audio disabled/unavailable) — pad system with silence
bytesToProcess = (micBuffer.count / 2) * 2
} else if systemBuffer.count >= minBufferBytes {
// Only system has data — pad mic with silence
bytesToProcess = (systemBuffer.count / 2) * 2
} else {
// Normal operation: process when both have data
let minAvailable = min(micBuffer.count, systemBuffer.count)
guard minAvailable >= minBufferBytes else { return }
// Align to sample boundary (2 bytes per Int16 sample)
bytesToProcess = (minAvailable / 2) * 2
return
}

guard bytesToProcess >= 2 else { return }
Expand All @@ -137,11 +152,17 @@ class AudioMixer {
systemBuffer = Data()
}

// Interleave into stereo
let stereoData = interleave(mic: micData, system: sysData)
// Mix according to output mode
let mixedData: Data
switch outputMode {
case .stereo:
mixedData = interleave(mic: micData, system: sysData)
case .mono:
mixedData = mixToMono(mic: micData, system: sysData)
}

// Send to callback
onStereoChunk?(stereoData)
onStereoChunk?(mixedData)
}

/// Interleave two mono Int16 streams into stereo
Expand Down Expand Up @@ -174,4 +195,32 @@ class AudioMixer {
Data(buffer: buffer)
}
}

/// Average two mono Int16 streams into a single mono stream
/// Output format: [(mic0+sys0)/2, (mic1+sys1)/2, ...]
private func mixToMono(mic: Data, system: Data) -> Data {
let sampleCount = mic.count / 2

var monoSamples = [Int16]()
monoSamples.reserveCapacity(sampleCount)

mic.withUnsafeBytes { micPtr in
system.withUnsafeBytes { sysPtr in
let micSamples = micPtr.bindMemory(to: Int16.self)
let sysSamples = sysPtr.bindMemory(to: Int16.self)

for i in 0..<sampleCount {
let micSample = Int32(i < micSamples.count ? micSamples[i] : 0)
let sysSample = Int32(i < sysSamples.count ? sysSamples[i] : 0)
// Average and clamp to Int16 range
let mixed = (micSample + sysSample) / 2
monoSamples.append(Int16(clamping: mixed))
}
}
}

return monoSamples.withUnsafeBufferPointer { buffer in
Data(buffer: buffer)
}
}
}
Loading