Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 32 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -731,7 +731,37 @@ let package = Package(
)
]
)
```

Follow these steps to try the spotlight‑style transcription overlay using the
optimized **Turbo** model on macOS:

1. Clone the repository and build the XCFramework:

```bash
git clone https://github.com/ggml-org/whisper.cpp.git
cd whisper.cpp
./build-xcframework.sh
```

The script places `whisper.xcframework` inside the `build` folder.

2. Open the `examples/WhisperSpotlight` package in Xcode:

```bash
open examples/WhisperSpotlight/Package.swift
```

3. In Xcode, add the generated `whisper.xcframework` to the
**WhisperSpotlight** target (File → Add Files to “WhisperSpotlight” …).

4. Build and run the app. On first launch it will download the
`ggml-large-v3-turbo.bin` model (~1.6 GB) into
`~/Library/Application Support/WhisperSpotlight/`.

5. Press **Option‑Space** to toggle the overlay. Speak while the window shows
“Listening”. Once transcription finishes, the recognized text is copied to the
clipboard and briefly shown on screen.


## Voice Activity Detection (VAD)
Support for Voice Activity Detection (VAD) can be enabled using the `--vad`
Expand Down Expand Up @@ -831,6 +861,7 @@ Some of the examples are even ported to run in the browser using WebAssembly. Ch
| [whisper-talk-llama](examples/talk-llama) | | Talk with a LLaMA bot |
| [whisper.objc](examples/whisper.objc) | | iOS mobile application using whisper.cpp |
| [whisper.swiftui](examples/whisper.swiftui) | | SwiftUI iOS / macOS application using whisper.cpp |
| [WhisperSpotlight](examples/WhisperSpotlight) | | macOS overlay with global hotkey for speech transcription |
| [whisper.android](examples/whisper.android) | | Android mobile application using whisper.cpp |
| [whisper.nvim](examples/whisper.nvim) | | Speech-to-text plugin for Neovim |
| [generate-karaoke.sh](examples/generate-karaoke.sh) | | Helper script to easily [generate a karaoke video](https://youtu.be/uj7hVta4blM) of raw audio capture |
Expand Down
25 changes: 25 additions & 0 deletions examples/WhisperSpotlight/GlobalHotkey.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import Carbon
import Foundation

class GlobalHotkey {
var handler: (() -> Void)?
private var ref: EventHotKeyRef?

init(keyCode: UInt32, modifiers: UInt32) {
var hotKeyID = EventHotKeyID(signature: OSType(0x1234), id: UInt32(keyCode))
RegisterEventHotKey(keyCode, modifiers, hotKeyID, GetApplicationEventTarget(), 0, &ref)
let eventSpec = EventTypeSpec(eventClass: OSType(kEventClassKeyboard), eventKind: UInt32(kEventHotKeyPressed))
InstallEventHandler(GetApplicationEventTarget(), { _, evt, ctx in
let hotKeyIDPtr = UnsafeMutablePointer<EventHotKeyID>.allocate(capacity: 1)
GetEventParameter(evt!, EventParamName(kEventParamDirectObject), EventParamType(typeEventHotKeyID), nil, MemoryLayout<EventHotKeyID>.size, nil, hotKeyIDPtr)
Unmanaged<GlobalHotkey>.fromOpaque(ctx!).takeUnretainedValue().handler?()
return noErr
}, 1, [eventSpec], Unmanaged.passUnretained(self).toOpaque(), nil)
}

deinit {
if let ref { UnregisterEventHotKey(ref) }
}
}

let optionKey: UInt32 = UInt32(cmdKey) >> 8
52 changes: 52 additions & 0 deletions examples/WhisperSpotlight/ModelManager.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import Foundation

struct ModelManager {
private let fileManager = FileManager.default
private let modelFile = "ggml-large-v3-turbo.bin"
private let url = URL(string: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3-turbo.bin")!

func modelPath() -> URL {
let app = fileManager.urls(for: .applicationSupportDirectory, in: .userDomainMask)[0]
.appending(path: "WhisperSpotlight")
try? fileManager.createDirectory(at: app, withIntermediateDirectories: true)
return app.appending(path: modelFile)
}

func ensureModel(progress: ((Double) -> Void)? = nil) async throws {
let path = modelPath()
if fileManager.fileExists(atPath: path.path) { return }
try await downloadModel(to: path, progress: progress)
}

private func downloadModel(to path: URL, progress: ((Double) -> Void)?) async throws {
let request = URLRequest(url: url)
for _ in 0..<3 {
do {
let (temp, response) = try await URLSession.shared.download(for: request, delegate: ProgressDelegate(progress))
guard let http = response as? HTTPURLResponse, (200...299).contains(http.statusCode) else { throw URLError(.badServerResponse) }
try fileManager.moveItem(at: temp, to: path)
let attr = try fileManager.attributesOfItem(atPath: path.path)
if let size = attr[.size] as? NSNumber, size.intValue > 1_400_000_000 { return }
} catch {
try? fileManager.removeItem(at: path)
continue
}
}
throw URLError(.cannotCreateFile)
}
}

private class ProgressDelegate: NSObject, URLSessionTaskDelegate {
let callback: ((Double) -> Void)?
init(_ cb: ((Double) -> Void)?) { self.callback = cb }
func urlSession(_ session: URLSession, task: URLSessionTask, didSendBodyData bytesSent: Int64, totalBytesSent: Int64, totalBytesExpectedToSend: Int64) {
if totalBytesExpectedToSend > 0 {
callback?(Double(totalBytesSent)/Double(totalBytesExpectedToSend))
}
}
func urlSession(_ session: URLSession, downloadTask: URLSessionDownloadTask, didWriteData bytesWritten: Int64, totalBytesWritten: Int64, totalBytesExpectedToWrite: Int64) {
if totalBytesExpectedToWrite > 0 {
callback?(Double(totalBytesWritten)/Double(totalBytesExpectedToWrite))
}
}
}
81 changes: 81 additions & 0 deletions examples/WhisperSpotlight/OverlayView.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
import SwiftUI
import AppKit

enum OverlayState {
case idle, listening, transcribing(String), done(String)
}

struct OverlayView: View {
@State private var state: OverlayState = .idle
@State private var recorder = Recorder()
@State private var modelURL: URL? = nil
@State private var manager = ModelManager()
@State private var task: Task<Void, Never>? = nil

var body: some View {
VStack {
switch state {
case .idle:
Image(systemName: "mic")
.onTapGesture { toggleListening() }
case .listening:
ProgressView("Listening…")
.onAppear { startRecording() }
case .transcribing(let text):
ProgressView(text)
case .done(let text):
Text(text)
}
}
.frame(width: 320, height: 200)
.background(Material.thick)
.cornerRadius(12)
.onReceive(NotificationCenter.default.publisher(for: .toggleOverlay)) { _ in
toggleListening()
}
}

private func toggleListening() {
switch state {
case .idle: state = .listening
case .listening: stopRecording()
default: break
}
}

private func startRecording() {
task = Task {
do {
try await manager.ensureModel()
let file = try FileManager.default
.temporaryDirectory.appending(path: "record.wav")
try await recorder.startRecording(toOutputFile: file, delegate: nil)
} catch {}
}
}

private func stopRecording() {
task?.cancel()
Task {
recorder.stopRecording()
if let url = recorder.currentFile {
state = .transcribing("Transcribing…")
let ctx = try? WhisperContext.createContext(path: manager.modelPath().path())
if let data = try? decodeWaveFile(url) {
ctx?.fullTranscribe(samples: data, language: "")
let text = ctx?.getTranscription() ?? ""
NSPasteboard.general.clearContents()
NSPasteboard.general.setString(text, forType: .string)
state = .done(text)
}
}
DispatchQueue.main.asyncAfter(deadline: .now() + 2) {
state = .idle
}
}
}
}

extension Notification.Name {
static let toggleOverlay = Notification.Name("ToggleOverlay")
}
15 changes: 15 additions & 0 deletions examples/WhisperSpotlight/Package.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
// swift-tools-version:5.7
import PackageDescription

let package = Package(
name: "WhisperSpotlight",
platforms: [.macOS(.v13)],
products: [
.library(name: "WhisperSpotlight", targets: ["WhisperSpotlight"])
],
dependencies: [],
targets: [
.target(name: "WhisperSpotlight", dependencies: [], path: "", exclude: ["Tests"]),
.testTarget(name: "WhisperSpotlightTests", dependencies: ["WhisperSpotlight"], path: "Tests")
]
)
26 changes: 26 additions & 0 deletions examples/WhisperSpotlight/Recorder.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import Foundation
import AVFoundation

actor Recorder {
private var recorder: AVAudioRecorder?
private(set) var currentFile: URL?

func startRecording(toOutputFile url: URL, delegate: AVAudioRecorderDelegate?) throws {
currentFile = url
let settings: [String: Any] = [
AVFormatIDKey: Int(kAudioFormatLinearPCM),
AVSampleRateKey: 16000.0,
AVNumberOfChannelsKey: 1,
AVEncoderAudioQualityKey: AVAudioQuality.high.rawValue
]
let rec = try AVAudioRecorder(url: url, settings: settings)
rec.delegate = delegate
guard rec.record() else { throw NSError(domain: "rec", code: 1) }
recorder = rec
}

func stopRecording() {
recorder?.stop()
recorder = nil
}
}
12 changes: 12 additions & 0 deletions examples/WhisperSpotlight/RiffWaveUtils.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import Foundation

func decodeWaveFile(_ url: URL) throws -> [Float] {
let data = try Data(contentsOf: url)
let floats = stride(from: 44, to: data.count, by: 2).map {
return data[$0..<$0 + 2].withUnsafeBytes {
let short = Int16(littleEndian: $0.load(as: Int16.self))
return max(-1.0, min(Float(short) / 32767.0, 1.0))
}
}
return floats
}
10 changes: 10 additions & 0 deletions examples/WhisperSpotlight/Tests/IntegrationTests.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import XCTest
@testable import WhisperSpotlight

final class IntegrationTests: XCTestCase {
func testClipboardWrite() throws {
NSPasteboard.general.clearContents()
NSPasteboard.general.setString("hello", forType: .string)
XCTAssertEqual(NSPasteboard.general.string(forType: .string), "hello")
}
}
10 changes: 10 additions & 0 deletions examples/WhisperSpotlight/Tests/ModelManagerTests.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import XCTest
@testable import WhisperSpotlight

final class ModelManagerTests: XCTestCase {
func testModelPathCreation() throws {
let manager = ModelManager()
let path = manager.modelPath()
XCTAssertTrue(path.path.contains("WhisperSpotlight"))
}
}
13 changes: 13 additions & 0 deletions examples/WhisperSpotlight/Tests/RecorderTests.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import XCTest
@testable import WhisperSpotlight

final class RecorderTests: XCTestCase {
func testWavHeader() async throws {
let recorder = Recorder()
let url = FileManager.default.temporaryDirectory.appending(path: "test.wav")
try await recorder.startRecording(toOutputFile: url, delegate: nil)
recorder.stopRecording()
let data = try Data(contentsOf: url)
XCTAssertEqual(String(data: data.prefix(4), encoding: .ascii), "RIFF")
}
}
21 changes: 21 additions & 0 deletions examples/WhisperSpotlight/WhisperSpotlightApp.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import SwiftUI

@main
struct WhisperSpotlightApp: App {
@NSApplicationDelegateAdaptor(AppDelegate.self) var delegate
var body: some Scene {
WindowGroup {
OverlayView()
}
}
}

class AppDelegate: NSObject, NSApplicationDelegate {
private var hotkey: GlobalHotkey?
func applicationDidFinishLaunching(_ notification: Notification) {
hotkey = GlobalHotkey(keyCode: kVK_Space, modifiers: optionKey)
hotkey?.handler = {
NotificationCenter.default.post(name: .toggleOverlay, object: nil)
}
}
}
Loading
Loading