Skip to content

Commit c8a65b5

Browse files
authored
Replace local whisper.cpp with Groq cloud API for transcription (#2)
* Replace local whisper.cpp with Groq cloud API for transcription Switch from local whisper.cpp speech-to-text to Groq's cloud Whisper Large V3 API for significantly better accuracy and simpler builds. - Add GroqEngine with multipart WAV upload to Groq transcription endpoint - Add secure file-based API key storage in Application Support - Update onboarding to require API key setup (mic + accessibility + key) - Update settings with Groq API key field and model picker - Remove whisper.cpp submodule, WhisperWrapper, build scripts, bridging header - Simplify Package.swift and Makefile (no more cmake/dylib handling) - Update README with Groq setup docs, privacy changes, and link to v1.1.1 for local-only version - Prevent onboarding window from closing without completing all setup steps * Update CI workflows and swiftlint config for Groq-only build - Remove whisper.cpp, cmake, dylib references from CI and release workflows - Remove WhisperWrapper from swiftlint included paths - Update release notes template to mention Groq and link to v1.1.1 for local version * Replace hardcoded Groq rate limits with links to their docs Rate limits and pricing may change — link to the source of truth instead of hardcoding values that can go stale.
1 parent 599d6e2 commit c8a65b5

21 files changed

+810
-1427
lines changed

.github/workflows/ci.yml

Lines changed: 1 addition & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ jobs:
2828
run: brew install swiftlint
2929

3030
- name: Run SwiftLint
31-
run: swiftlint lint AudioType WhisperWrapper
31+
run: swiftlint lint AudioType
3232

3333
build:
3434
name: Build
@@ -38,28 +38,12 @@ jobs:
3838
steps:
3939
- name: Checkout repository
4040
uses: actions/checkout@v4
41-
with:
42-
submodules: recursive
4341

4442
- name: Setup Xcode
4543
uses: maxim-lobanov/setup-xcode@v1
4644
with:
4745
xcode-version: latest-stable
4846

49-
- name: Install dependencies
50-
run: brew install cmake
51-
52-
- name: Build whisper.cpp
53-
run: |
54-
cd whisper.cpp
55-
mkdir -p build
56-
cd build
57-
cmake .. \
58-
-DCMAKE_BUILD_TYPE=Release \
59-
-DGGML_METAL=ON \
60-
-DWHISPER_BUILD_EXAMPLES=ON
61-
cmake --build . --config Release -j$(sysctl -n hw.ncpu)
62-
6347
- name: Build AudioType (Debug)
6448
run: swift build
6549

@@ -73,16 +57,11 @@ jobs:
7357
run: |
7458
# Check that all required files exist
7559
test -f AudioType.app/Contents/MacOS/AudioType
76-
test -f AudioType.app/Contents/MacOS/whisper-cli
77-
test -f AudioType.app/Contents/MacOS/libwhisper.1.dylib
7860
test -f AudioType.app/Contents/Info.plist
7961
8062
# Verify code signature
8163
codesign -v AudioType.app
8264
83-
# Verify whisper-cli can find its libraries
84-
otool -L AudioType.app/Contents/MacOS/whisper-cli | grep -q "@executable_path"
85-
8665
echo "App bundle verification passed!"
8766
8867
test:
@@ -93,8 +72,6 @@ jobs:
9372
steps:
9473
- name: Checkout repository
9574
uses: actions/checkout@v4
96-
with:
97-
submodules: recursive
9875

9976
- name: Setup Xcode
10077
uses: maxim-lobanov/setup-xcode@v1

.github/workflows/release.yml

Lines changed: 25 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,9 @@ on:
77
workflow_dispatch:
88
inputs:
99
version:
10-
description: 'Version to release (e.g., 1.0.0)'
10+
description: 'Version to release (e.g., 2.0.0)'
1111
required: true
12-
default: '1.0.0'
12+
default: '2.0.0'
1313

1414
permissions:
1515
contents: write
@@ -32,7 +32,7 @@ jobs:
3232
run: brew install swiftlint
3333

3434
- name: Run SwiftLint
35-
run: swiftlint lint --strict AudioType WhisperWrapper
35+
run: swiftlint lint --strict AudioType
3636

3737
build:
3838
runs-on: macos-14
@@ -41,32 +41,14 @@ jobs:
4141
steps:
4242
- name: Checkout repository
4343
uses: actions/checkout@v4
44-
with:
45-
submodules: recursive
4644

4745
- name: Setup Xcode
4846
uses: maxim-lobanov/setup-xcode@v1
4947
with:
5048
xcode-version: latest-stable
5149

52-
- name: Install dependencies
53-
run: |
54-
brew install cmake
55-
56-
- name: Build whisper.cpp
57-
run: |
58-
cd whisper.cpp
59-
mkdir -p build
60-
cd build
61-
cmake .. \
62-
-DCMAKE_BUILD_TYPE=Release \
63-
-DGGML_METAL=ON \
64-
-DWHISPER_BUILD_EXAMPLES=ON
65-
cmake --build . --config Release -j$(sysctl -n hw.ncpu)
66-
6750
- name: Build AudioType
68-
run: |
69-
swift build -c release
51+
run: swift build -c release
7052

7153
- name: Create app bundle
7254
run: |
@@ -75,55 +57,29 @@ jobs:
7557
# Copy main binary
7658
cp .build/release/AudioType AudioType.app/Contents/MacOS/
7759
78-
# Copy whisper-cli and dylibs
79-
cp whisper.cpp/build/bin/whisper-cli AudioType.app/Contents/MacOS/
80-
cp whisper.cpp/build/src/libwhisper.1.dylib AudioType.app/Contents/MacOS/
81-
cp whisper.cpp/build/ggml/src/libggml.0.dylib AudioType.app/Contents/MacOS/
82-
cp whisper.cpp/build/ggml/src/libggml-base.0.dylib AudioType.app/Contents/MacOS/
83-
cp whisper.cpp/build/ggml/src/libggml-cpu.0.dylib AudioType.app/Contents/MacOS/
84-
cp whisper.cpp/build/ggml/src/ggml-blas/libggml-blas.0.dylib AudioType.app/Contents/MacOS/
85-
cp whisper.cpp/build/ggml/src/ggml-metal/libggml-metal.0.dylib AudioType.app/Contents/MacOS/
86-
87-
# Copy Metal shaders
88-
cp whisper.cpp/build/bin/ggml-metal.metal AudioType.app/Contents/MacOS/ 2>/dev/null || true
89-
cp whisper.cpp/build/bin/ggml-common.h AudioType.app/Contents/MacOS/ 2>/dev/null || true
90-
9160
# Copy Info.plist and icon
9261
cp Resources/Info.plist AudioType.app/Contents/Info.plist
9362
cp Resources/AppIcon.icns AudioType.app/Contents/Resources/
9463
95-
# Fix library paths
96-
install_name_tool -change @rpath/libwhisper.1.dylib @executable_path/libwhisper.1.dylib AudioType.app/Contents/MacOS/whisper-cli
97-
install_name_tool -change @rpath/libggml.0.dylib @executable_path/libggml.0.dylib AudioType.app/Contents/MacOS/whisper-cli
98-
install_name_tool -change @rpath/libggml-cpu.0.dylib @executable_path/libggml-cpu.0.dylib AudioType.app/Contents/MacOS/whisper-cli
99-
install_name_tool -change @rpath/libggml-blas.0.dylib @executable_path/libggml-blas.0.dylib AudioType.app/Contents/MacOS/whisper-cli
100-
install_name_tool -change @rpath/libggml-metal.0.dylib @executable_path/libggml-metal.0.dylib AudioType.app/Contents/MacOS/whisper-cli
101-
install_name_tool -change @rpath/libggml-base.0.dylib @executable_path/libggml-base.0.dylib AudioType.app/Contents/MacOS/whisper-cli
102-
103-
# Fix dylib inter-dependencies
104-
install_name_tool -change @rpath/libggml-base.0.dylib @executable_path/libggml-base.0.dylib AudioType.app/Contents/MacOS/libwhisper.1.dylib
105-
install_name_tool -change @rpath/libggml.0.dylib @executable_path/libggml.0.dylib AudioType.app/Contents/MacOS/libwhisper.1.dylib
106-
install_name_tool -change @rpath/libggml-base.0.dylib @executable_path/libggml-base.0.dylib AudioType.app/Contents/MacOS/libggml.0.dylib
107-
install_name_tool -change @rpath/libggml-base.0.dylib @executable_path/libggml-base.0.dylib AudioType.app/Contents/MacOS/libggml-cpu.0.dylib
108-
install_name_tool -change @rpath/libggml-base.0.dylib @executable_path/libggml-base.0.dylib AudioType.app/Contents/MacOS/libggml-blas.0.dylib
109-
install_name_tool -change @rpath/libggml-base.0.dylib @executable_path/libggml-base.0.dylib AudioType.app/Contents/MacOS/libggml-metal.0.dylib
110-
11164
# Sign the app
11265
codesign --force --deep --sign - AudioType.app
11366
67+
- name: Verify app bundle
68+
run: |
69+
test -f AudioType.app/Contents/MacOS/AudioType
70+
test -f AudioType.app/Contents/Info.plist
71+
codesign -v AudioType.app
72+
echo "App bundle verification passed!"
73+
11474
- name: Create DMG
11575
run: |
116-
# Create a temporary directory for DMG contents
11776
mkdir -p dmg_contents
11877
cp -R AudioType.app dmg_contents/
11978
ln -s /Applications dmg_contents/Applications
120-
121-
# Create DMG
12279
hdiutil create -volname "AudioType" -srcfolder dmg_contents -ov -format UDZO AudioType.dmg
12380
12481
- name: Create ZIP
125-
run: |
126-
zip -r AudioType.zip AudioType.app
82+
run: zip -r AudioType.zip AudioType.app
12783

12884
- name: Get version
12985
id: version
@@ -142,17 +98,27 @@ jobs:
14298
body: |
14399
## AudioType v${{ steps.version.outputs.version }}
144100
145-
Voice-to-text for macOS using Whisper AI.
101+
Voice-to-text for macOS powered by Groq cloud transcription (Whisper Large V3).
102+
103+
### What's New
104+
- Cloud-powered transcription via Groq API for significantly better accuracy
105+
- Self-serve: bring your own free Groq API key
106+
- Simplified build — no more whisper.cpp compilation required
146107
147108
### Installation
148109
1. Download `AudioType.dmg` or `AudioType.zip`
149110
2. Extract and move `AudioType.app` to your Applications folder
150111
3. Open the app and grant Microphone and Accessibility permissions
151-
4. Hold the fn key (or your configured hotkey) to dictate
112+
4. Enter your free Groq API key (get one at https://console.groq.com/keys)
113+
5. Hold the fn key to dictate
152114
153115
### Requirements
154-
- macOS 13.0 or later
116+
- macOS 14.0 or later
155117
- Apple Silicon or Intel Mac
118+
- Internet connection
119+
- Free Groq API key
120+
121+
> Looking for the offline/local version? See [v1.1.1](https://github.com/PatelUtkarsh/audio-type/releases/tag/v1.1.1)
156122
files: |
157123
AudioType.dmg
158124
AudioType.zip

.gitignore

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -30,15 +30,6 @@ build/
3030
*.dylib
3131
*.o
3232

33-
# Models (downloaded separately)
34-
Models/*.bin
35-
Models/*.mlmodelc/
36-
*.ggml
37-
ggml-*.bin
38-
39-
# whisper.cpp build artifacts
40-
whisper.cpp/build/
41-
4233
# macOS
4334
.DS_Store
4435
.AppleDouble

.gitmodules

Lines changed: 0 additions & 3 deletions
This file was deleted.

.swiftlint.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,8 @@
22

33
included:
44
- AudioType
5-
- WhisperWrapper
65

76
excluded:
8-
- whisper.cpp
97
- .build
108
- AudioType.app
119

AudioType/App/AudioTypeApp.swift

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,21 +44,21 @@ class AppDelegate: NSObject, NSApplicationDelegate {
4444
let micPermission = await Permissions.checkMicrophone()
4545
let accessibilityPermission = Permissions.checkAccessibility()
4646

47-
if !micPermission || !accessibilityPermission {
47+
if !micPermission || !accessibilityPermission || !GroqEngine.isConfigured {
4848
// Show onboarding window
4949
DispatchQueue.main.async {
5050
self.showOnboarding()
5151
}
5252
} else {
53-
// Load model and start listening for hotkey
53+
// All set — start listening for hotkey
5454
await transcriptionManager.initialize()
5555
}
5656
}
5757

5858
private func showOnboarding() {
5959
let window = NSWindow(
60-
contentRect: NSRect(x: 0, y: 0, width: 450, height: 350),
61-
styleMask: [.titled, .closable],
60+
contentRect: NSRect(x: 0, y: 0, width: 450, height: 480),
61+
styleMask: [.titled],
6262
backing: .buffered,
6363
defer: false
6464
)

AudioType/App/TranscriptionManager.swift

Lines changed: 32 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -26,37 +26,14 @@ class TranscriptionManager: ObservableObject {
2626
@Published private(set) var state: TranscriptionState = .idle
2727
@Published private(set) var isInitialized = false
2828

29-
private var whisperEngine: WhisperEngine?
29+
private var groqEngine: GroqEngine?
3030
private var audioRecorder: AudioRecorder?
3131
private var hotKeyManager: HotKeyManager?
3232
private var textInserter: TextInserter?
3333

3434
private let logger = Logger(subsystem: "com.audiotype", category: "TranscriptionManager")
3535

36-
private init() {
37-
// Listen for model changes
38-
NotificationCenter.default.addObserver(
39-
self,
40-
selector: #selector(handleModelChanged),
41-
name: .modelChanged,
42-
object: nil
43-
)
44-
}
45-
46-
@objc private func handleModelChanged(_ notification: Notification) {
47-
guard let model = notification.object as? WhisperModel else { return }
48-
logger.info("Model changed to: \(model.displayName)")
49-
50-
// Reload whisper engine with new model
51-
Task {
52-
do {
53-
whisperEngine = try await WhisperEngine.load(model: model)
54-
logger.info("Whisper engine reloaded with \(model.displayName)")
55-
} catch {
56-
logger.error("Failed to reload whisper engine: \(error.localizedDescription)")
57-
}
58-
}
59-
}
36+
private init() {}
6037

6138
func initialize() async {
6239
logger.info("Initializing TranscriptionManager...")
@@ -65,14 +42,14 @@ class TranscriptionManager: ObservableObject {
6542
audioRecorder = AudioRecorder()
6643
textInserter = TextInserter()
6744

68-
// Load whisper model
69-
do {
70-
whisperEngine = try await WhisperEngine.load()
71-
logger.info("Whisper model loaded successfully")
72-
} catch {
73-
logger.error("Failed to load whisper model: \(error.localizedDescription)")
74-
setState(.error("Failed to load model: \(error.localizedDescription)"))
75-
return
45+
// Initialize Groq engine (lightweight — no model download needed)
46+
groqEngine = GroqEngine()
47+
48+
if !GroqEngine.isConfigured {
49+
logger.warning("Groq API key not configured")
50+
setState(.error("API key required — open Settings"))
51+
} else {
52+
logger.info("Groq engine ready")
7653
}
7754

7855
// Start hotkey listener
@@ -84,16 +61,28 @@ class TranscriptionManager: ObservableObject {
8461
hotKeyManager?.startListening()
8562

8663
isInitialized = true
87-
setState(.idle)
64+
if GroqEngine.isConfigured {
65+
setState(.idle)
66+
}
8867
logger.info("TranscriptionManager initialized successfully")
8968
}
9069

9170
func cleanup() {
9271
hotKeyManager?.stopListening()
93-
whisperEngine = nil
72+
groqEngine = nil
9473
audioRecorder = nil
9574
}
9675

76+
/// Called when the user saves an API key — re-validate and clear error state.
77+
func onApiKeyChanged() {
78+
if GroqEngine.isConfigured {
79+
setState(.idle)
80+
logger.info("API key configured, engine ready")
81+
} else {
82+
setState(.error("API key required — open Settings"))
83+
}
84+
}
85+
9786
private func handleHotKeyEvent(_ event: HotKeyEvent) {
9887
switch event {
9988
case .keyDown:
@@ -109,6 +98,11 @@ class TranscriptionManager: ObservableObject {
10998
return
11099
}
111100

101+
guard GroqEngine.isConfigured else {
102+
setState(.error("API key required — open Settings"))
103+
return
104+
}
105+
112106
do {
113107
try audioRecorder?.startRecording()
114108
setState(.recording)
@@ -141,17 +135,17 @@ class TranscriptionManager: ObservableObject {
141135
}
142136

143137
private func transcribeAndInsert(samples: [Float]) async {
144-
guard let whisperEngine = whisperEngine else {
138+
guard let groqEngine = groqEngine else {
145139
await MainActor.run {
146-
self.setState(.error("Whisper engine not initialized"))
140+
self.setState(.error("Groq engine not initialized"))
147141
}
148142
return
149143
}
150144

151145
let startTime = CFAbsoluteTimeGetCurrent()
152146

153147
do {
154-
let text = try whisperEngine.transcribe(samples: samples)
148+
let text = try await groqEngine.transcribe(samples: samples)
155149
let elapsed = CFAbsoluteTimeGetCurrent() - startTime
156150
logger.info("Transcription completed in \(elapsed, format: .fixed(precision: 2))s: \(text)")
157151

0 commit comments

Comments
 (0)