Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
7606f35
WIP: Added load voices request
michalrentka Jan 14, 2026
ffbea9f
WIP
michalrentka Jan 16, 2026
6281f7f
Finished speech manager refactoring, api changed, storage changes
michalrentka Jan 20, 2026
858f6e8
UI Refactoring, bug fixes
michalrentka Jan 22, 2026
2005a08
More refactoring, implement remote voices settings
michalrentka Jan 23, 2026
a376e8d
Cleanup & refactoring, added remote voice samples
michalrentka Feb 5, 2026
7b17d05
Implementing remote voice processor
michalrentka Feb 5, 2026
2334423
Implemented remote voice speech processor missing functionality
michalrentka Feb 6, 2026
3eb2e77
Implemented text highlighting
michalrentka Feb 10, 2026
4d7a285
Improved text tokenizer to split sentences properly, added a limit
michalrentka Feb 10, 2026
d2840bc
Refactoring
michalrentka Feb 10, 2026
49888ba
Updated pdf worker
michalrentka Feb 10, 2026
04363cc
Fixed highlighting in document controller to highlight whole lines in…
michalrentka Feb 10, 2026
00dc774
Re-layout speech highlight properly
michalrentka Feb 10, 2026
6ff0aeb
Added remaining time countdown to remote voices
michalrentka Feb 11, 2026
26212ef
Implement out of credits state, show low credits visually, added supp…
michalrentka Feb 12, 2026
c109bb0
Improved speech audio caching, cache 2 sentences ahead for fluent pla…
michalrentka Feb 12, 2026
845008c
Fix switching voices
michalrentka Feb 13, 2026
624d193
API changes implemented
michalrentka Feb 13, 2026
2f1d471
Cleanup
michalrentka Feb 13, 2026
4af9cc0
Added basic key commands, backward / forward while paused WIP
michalrentka Feb 13, 2026
8f4498a
Updated pdf worker
michalrentka Feb 16, 2026
69cdc52
Fixes paused forward/backward
michalrentka Feb 16, 2026
91f77e4
Support for media controls, fixed voice switching bug
michalrentka Feb 16, 2026
56e24d2
Added debounce to forward/backward to limit audio requests
michalrentka Feb 16, 2026
30ba09b
Highlight whole paragraph when speaking a sentence.
michalrentka Feb 17, 2026
47f96f4
Fixed pspdfkit uimenu creation, show speech controls if speech is sta…
michalrentka Feb 18, 2026
ecce83f
Fixed mapping selected text in document to extracted text
michalrentka Feb 18, 2026
2efe3af
Refactoring
michalrentka Feb 18, 2026
c822165
Bug fixes
michalrentka Feb 18, 2026
0798367
New API changes implemented
michalrentka Feb 19, 2026
bb9b28f
Voice picker improvements, added advanced / basic / local, added rema…
michalrentka Feb 19, 2026
1a1eca5
Further voice and language picker improvements, added language variat…
michalrentka Feb 19, 2026
f0e0ab7
Properly utilise voice tiers when detecting language in speech manager
michalrentka Feb 19, 2026
cee52d1
Improved language detection
michalrentka Feb 19, 2026
181ac17
API credits/tiers changes implemented
michalrentka Feb 20, 2026
4be5540
Store separate remote voice for separate tiers
michalrentka Feb 20, 2026
1979bc1
Refactoring & improvements
michalrentka Feb 20, 2026
216abae
Renamed tiers
michalrentka Feb 23, 2026
474a28f
Distinguish "free" voices by tier not by creditsPerMinute
michalrentka Feb 23, 2026
e296c7b
Implemented ogg converter for remote voices
michalrentka Feb 23, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 74 additions & 3 deletions Zotero.xcodeproj/project.pbxproj

Large diffs are not rendered by default.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions Zotero/Assets/en.lproj/Localizable.strings
Original file line number Diff line number Diff line change
Expand Up @@ -598,6 +598,10 @@
"accessibility.speech.forward" = "Skip forward";
"accessibility.speech.backward" = "Skip backward";
"accessibility.speech.title" = "Read Aloud";
"accessibility.speech.unknown_voice" = "Voice";
"accessibility.speech.speak" = "Speak";
"accessibility.speech.automatic" = "Automatic";
"accessibility.speech.no_voices_for_tier" = "No voices found for detected language %@ in this tier. Pick a different tier or choose language manually.";
"accessibility.show_reader" = "Reading Mode";
"accessibility.show_reader_accessibility_label" = "Switch to reading mode";
"accessibility.show_speech" = "Read Aloud";
Expand Down
33 changes: 33 additions & 0 deletions Zotero/Controllers/API/Requests/CreditsRequest.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
//
// CreditsRequest.swift
// Zotero
//
// Created by Michal Rentka on 13.02.2026.
// Copyright © 2026 Corporation for Digital Scholarship. All rights reserved.
//

import Foundation

struct CreditsRequest: ApiResponseRequest {
typealias Response = CreditsResponse

var endpoint: ApiEndpoint {
return .zotero(path: "tts/credits")
}

var httpMethod: ApiHttpMethod {
return .get
}

var encoding: ApiParameterEncoding {
return .url
}

var parameters: [String: Any]? {
return nil
}

var headers: [String: String]? {
return nil
}
}
58 changes: 58 additions & 0 deletions Zotero/Controllers/API/Requests/ReadAloudAudioRequest.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
//
// ReadAloudAudioRequest.swift
// Zotero
//
// Created by Michal Rentka on 15.01.2026.
// Copyright © 2026 Corporation for Digital Scholarship. All rights reserved.
//

struct ReadAloudAudioRequest: ApiRequest {
let voiceId: String
let text: String
let language: String

var endpoint: ApiEndpoint {
return .zotero(path: "tts/speak")
}

var httpMethod: ApiHttpMethod {
return .get
}

var encoding: ApiParameterEncoding {
return .url
}

var parameters: [String: Any]? {
return ["text": text, "voice": voiceId, "lang": language]
}

var headers: [String: String]? {
return nil
}
}

struct ReadAloudSampleRequest: ApiRequest {
let voiceId: String
let language: String

var endpoint: ApiEndpoint {
return .zotero(path: "tts/sample")
}

var httpMethod: ApiHttpMethod {
return .get
}

var encoding: ApiParameterEncoding {
return .url
}

var parameters: [String: Any]? {
return ["voice": voiceId, "lang": language]
}

var headers: [String: String]? {
return nil
}
}
33 changes: 33 additions & 0 deletions Zotero/Controllers/API/Requests/VoicesRequest.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
//
// VoicesRequest.swift
// Zotero
//
// Created by Michal Rentka on 14.01.2026.
// Copyright © 2026 Corporation for Digital Scholarship. All rights reserved.
//

import Foundation

struct VoicesRequest: ApiResponseRequest {
typealias Response = [RemoteVoice]

var endpoint: ApiEndpoint {
return .zotero(path: "tts/voices")
}

var httpMethod: ApiHttpMethod {
return .get
}

var encoding: ApiParameterEncoding {
return .url
}

var parameters: [String: Any]? {
return nil
}

var headers: [String: String]? {
return nil
}
}
14 changes: 14 additions & 0 deletions Zotero/Controllers/API/Responses/CreditsResponse.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
//
// CreditsResponse.swift
// Zotero
//
// Created by Michal Rentka on 13.02.2026.
// Copyright © 2026 Corporation for Digital Scholarship. All rights reserved.
//

import Foundation

struct CreditsResponse: Decodable {
let standardCreditsRemaining: Int
let premiumCreditsRemaining: Int
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ struct ReadSpeechLanguageDbRequest: DbResponseRequest {
var needsWrite: Bool { return false }

func process(in database: Realm) throws -> String? {
return database.objects(RItem.self).filter(.key(key, in: libraryId)).first?.speechLanguage
if let language = database.objects(RItem.self).filter(.key(key, in: libraryId)).first?.speechLanguage, !language.isEmpty {
return language
}
return nil
}
}
74 changes: 74 additions & 0 deletions Zotero/Controllers/LanguageDetector.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
//
// LanguageDetector.swift
// Zotero
//
// Created by Michal Rentka on 19.02.2026.
// Copyright © 2026 Corporation for Digital Scholarship. All rights reserved.
//

import AVFAudio
import NaturalLanguage

/// Detects language with regional variation from text.
/// NLLanguageRecognizer only detects base language (e.g., "en"), not regional variations (e.g., "en-US").
/// This utility adds variation detection using device locale fallback and prominent variation defaults.
enum LanguageDetector {
/// Default/prominent variations for languages that have multiple regional variants.
/// These are commonly used variations that serve as reasonable defaults.
private static let prominentVariations: [String: String] = [
"en": "en-US",
"es": "es-ES",
"pt": "pt-BR",
"zh": "zh-CN",
"fr": "fr-FR",
"de": "de-DE",
"ar": "ar-SA",
"nl": "nl-NL"
]

/// Detects language with variation from the given text.
/// - Parameter text: The text to analyze
/// - Returns: A locale string with variation (e.g., "en-US")
static func detectLanguage(from text: String) -> String {
let recognizer = NLLanguageRecognizer()
recognizer.processString(text)
let baseLanguage = recognizer.dominantLanguage?.rawValue ?? "en"
return resolveVariation(for: baseLanguage)
}

/// Resolves a base language to a specific variation.
/// Uses device locale if it matches the base language, otherwise falls back to prominent variations,
/// then to the first available system voice locale for that language.
/// - Parameter baseLanguage: The base language code (e.g., "en")
/// - Returns: A locale string with variation (e.g., "en-US")
static func resolveVariation(for baseLanguage: String) -> String {
// Get all available variations for this language from system voices
let systemLocales = AVSpeechSynthesisVoice.speechVoices().map { $0.language }
let availableVariations = systemLocales.filter { $0.hasPrefix(baseLanguage) }

// If no variations available, fall back to en-US
guard !availableVariations.isEmpty else {
return "en-US"
}

// If only one variation exists, return it directly
if availableVariations.count == 1 {
return availableVariations[0]
}

// Check if device locale matches the base language - if so, use device's variation
let deviceLocale = Locale.current.identifier.replacingOccurrences(of: "_", with: "-")
let deviceBaseLanguage = String(deviceLocale.prefix(2))
if deviceBaseLanguage == baseLanguage, availableVariations.contains(deviceLocale) {
return deviceLocale
}

// Use prominent variation if available
if let prominentVariation = prominentVariations[baseLanguage], availableVariations.contains(prominentVariation) {
return prominentVariation
}

// Fall back to first available variation
return availableVariations[0]
}
}
Loading
Loading