Skip to content

Commit 23da4ab

Browse files
committed
big time
1 parent c63e1b5 commit 23da4ab

File tree

12 files changed

+407
-644
lines changed

12 files changed

+407
-644
lines changed

config/constants.py

Lines changed: 90 additions & 205 deletions
Original file line numberDiff line numberDiff line change
@@ -1,220 +1,105 @@
1-
"""Model definitions and application constants."""
1+
from collections import OrderedDict
2+
3+
_MODEL_SPECS = [
4+
("Whisper large-v3 turbo", "whisper-large-v3-turbo", "float32", 155, 4, "3.2 GB"),
5+
("Whisper large-v3 turbo", "whisper-large-v3-turbo", "bfloat16", 160, 4, "3.0 GB"),
6+
("Whisper large-v3 turbo", "whisper-large-v3-turbo", "float16", 165, 4, "2.8 GB"),
7+
("Distil Whisper large-v3", "distil-whisper-large-v3", "float32", 160, 4, "3.0 GB"),
8+
("Distil Whisper large-v3", "distil-whisper-large-v3", "bfloat16", 160, 4, "3.0 GB"),
9+
("Distil Whisper large-v3", "distil-whisper-large-v3", "float16", 160, 4, "3.0 GB"),
10+
("Whisper large-v3", "whisper-large-v3", "float32", 85, 2, "5.5 GB"),
11+
("Whisper large-v3", "whisper-large-v3", "bfloat16", 95, 3, "3.8 GB"),
12+
("Whisper large-v3", "whisper-large-v3", "float16", 100, 3, "3.3 GB"),
13+
("Distil Whisper medium.en", "distil-whisper-medium.en", "float32", 160, 4, "3.0 GB"),
14+
("Distil Whisper medium.en", "distil-whisper-medium.en", "bfloat16", 160, 4, "3.0 GB"),
15+
("Distil Whisper medium.en", "distil-whisper-medium.en", "float16", 160, 4, "3.0 GB"),
16+
("Whisper medium", "whisper-medium", "float32", 125, 5, "2.8 GB"),
17+
("Whisper medium", "whisper-medium", "bfloat16", 135, 6, "2.2 GB"),
18+
("Whisper medium", "whisper-medium", "float16", 140, 6, "2.0 GB"),
19+
("Whisper medium.en", "whisper-medium.en", "float32", 130, 6, "2.5 GB"),
20+
("Whisper medium.en", "whisper-medium.en", "bfloat16", 140, 7, "2.0 GB"),
21+
("Whisper medium.en", "whisper-medium.en", "float16", 145, 7, "1.8 GB"),
22+
("Distil Whisper small.en", "distil-whisper-small.en", "float32", 160, 4, "3.0 GB"),
23+
("Distil Whisper small.en", "distil-whisper-small.en", "bfloat16", 160, 4, "3.0 GB"),
24+
("Distil Whisper small.en", "distil-whisper-small.en", "float16", 160, 4, "3.0 GB"),
25+
("Whisper small", "whisper-small", "float32", 175, 12, "1.8 GB"),
26+
("Whisper small", "whisper-small", "bfloat16", 185, 13, "1.4 GB"),
27+
("Whisper small", "whisper-small", "float16", 190, 13, "1.3 GB"),
28+
("Whisper small.en", "whisper-small.en", "float32", 180, 14, "1.5 GB"),
29+
("Whisper small.en", "whisper-small.en", "bfloat16", 190, 15, "1.2 GB"),
30+
("Whisper small.en", "whisper-small.en", "float16", 195, 15, "1.1 GB"),
31+
("Whisper base", "whisper-base", "float32", 225, 20, "1.1 GB"),
32+
("Whisper base", "whisper-base", "bfloat16", 235, 21, "0.9 GB"),
33+
("Whisper base", "whisper-base", "float16", 240, 21, "0.85 GB"),
34+
("Whisper base.en", "whisper-base.en", "float32", 230, 22, "1.0 GB"),
35+
("Whisper base.en", "whisper-base.en", "bfloat16", 240, 23, "0.85 GB"),
36+
("Whisper base.en", "whisper-base.en", "float16", 245, 23, "0.8 GB"),
37+
("Whisper tiny", "whisper-tiny", "float32", 275, 28, "0.75 GB"),
38+
("Whisper tiny", "whisper-tiny", "bfloat16", 285, 29, "0.65 GB"),
39+
("Whisper tiny", "whisper-tiny", "float16", 290, 29, "0.6 GB"),
40+
("Whisper tiny.en", "whisper-tiny.en", "float32", 280, 30, "0.7 GB"),
41+
("Whisper tiny.en", "whisper-tiny.en", "bfloat16", 290, 31, "0.6 GB"),
42+
("Whisper tiny.en", "whisper-tiny.en", "float16", 295, 31, "0.55 GB"),
43+
]
244

345
WHISPER_MODELS = {
4-
# LARGE-V3
5-
'Distil Whisper large-v3 - float32': {
6-
'name': 'Distil Whisper large-v3',
7-
'precision': 'float32',
8-
'repo_id': 'ctranslate2-4you/distil-whisper-large-v3-ct2-float32',
9-
'tokens_per_second': 160,
10-
'optimal_batch_size': 4,
11-
'avg_vram_usage': '3.0 GB'
12-
},
13-
'Distil Whisper large-v3 - bfloat16': {
14-
'name': 'Distil Whisper large-v3',
15-
'precision': 'bfloat16',
16-
'repo_id': 'ctranslate2-4you/distil-whisper-large-v3-ct2-bfloat16',
17-
'tokens_per_second': 160,
18-
'optimal_batch_size': 4,
19-
'avg_vram_usage': '3.0 GB'
20-
},
21-
'Distil Whisper large-v3 - float16': {
22-
'name': 'Distil Whisper large-v3',
23-
'precision': 'float16',
24-
'repo_id': 'ctranslate2-4you/distil-whisper-large-v3-ct2-float16',
25-
'tokens_per_second': 160,
26-
'optimal_batch_size': 4,
27-
'avg_vram_usage': '3.0 GB'
28-
},
29-
'Whisper large-v3 - float32': {
30-
'name': 'Whisper large-v3',
31-
'precision': 'float32',
32-
'repo_id': 'ctranslate2-4you/whisper-large-v3-ct2-float32',
33-
'tokens_per_second': 85,
34-
'optimal_batch_size': 2,
35-
'avg_vram_usage': '5.5 GB'
36-
},
37-
'Whisper large-v3 - bfloat16': {
38-
'name': 'Whisper large-v3',
39-
'precision': 'bfloat16',
40-
'repo_id': 'ctranslate2-4you/whisper-large-v3-ct2-bfloat16',
41-
'tokens_per_second': 95,
42-
'optimal_batch_size': 3,
43-
'avg_vram_usage': '3.8 GB'
44-
},
45-
'Whisper large-v3 - float16': {
46-
'name': 'Whisper large-v3',
47-
'precision': 'float16',
48-
'repo_id': 'ctranslate2-4you/whisper-large-v3-ct2-float16',
49-
'tokens_per_second': 100,
50-
'optimal_batch_size': 3,
51-
'avg_vram_usage': '3.3 GB'
52-
},
53-
# MEDIUM.EN
54-
'Distil Whisper medium.en - float32': {
55-
'name': 'Distil Whisper large-v3',
56-
'precision': 'float32',
57-
'repo_id': 'ctranslate2-4you/distil-whisper-medium.en-ct2-float32',
58-
'tokens_per_second': 160,
59-
'optimal_batch_size': 4,
60-
'avg_vram_usage': '3.0 GB'
61-
},
62-
'Distil Whisper medium.en - bfloat16': {
63-
'name': 'Distil Whisper medium.en',
64-
'precision': 'bfloat16',
65-
'repo_id': 'ctranslate2-4you/distil-whisper-medium.en-ct2-bfloat16',
66-
'tokens_per_second': 160,
67-
'optimal_batch_size': 4,
68-
'avg_vram_usage': '3.0 GB'
69-
},
70-
'Distil Whisper medium.en - float16': {
71-
'name': 'Distil Whisper medium.en',
72-
'precision': 'float16',
73-
'repo_id': 'ctranslate2-4you/distil-whisper-medium.en-ct2-float16',
74-
'tokens_per_second': 160,
75-
'optimal_batch_size': 4,
76-
'avg_vram_usage': '3.0 GB'
77-
},
78-
'Whisper medium.en - float32': {
79-
'name': 'Whisper medium.en',
80-
'precision': 'float32',
81-
'repo_id': 'ctranslate2-4you/whisper-medium.en-ct2-float32',
82-
'tokens_per_second': 130,
83-
'optimal_batch_size': 6,
84-
'avg_vram_usage': '2.5 GB'
85-
},
86-
'Whisper medium.en - bfloat16': {
87-
'name': 'Whisper medium.en',
88-
'precision': 'bfloat16',
89-
'repo_id': 'ctranslate2-4you/whisper-medium.en-ct2-bfloat16',
90-
'tokens_per_second': 140,
91-
'optimal_batch_size': 7,
92-
'avg_vram_usage': '2.0 GB'
93-
},
94-
'Whisper medium.en - float16': {
95-
'name': 'Whisper medium.en',
96-
'precision': 'float16',
97-
'repo_id': 'ctranslate2-4you/whisper-medium.en-ct2-float16',
98-
'tokens_per_second': 145,
99-
'optimal_batch_size': 7,
100-
'avg_vram_usage': '1.8 GB'
101-
},
102-
# SMALL.EN
103-
'Distil Whisper small.en - float32': {
104-
'name': 'Distil Whisper small.en',
105-
'precision': 'float32',
106-
'repo_id': 'ctranslate2-4you/distil-whisper-small.en-ct2-float32',
107-
'tokens_per_second': 160,
108-
'optimal_batch_size': 4,
109-
'avg_vram_usage': '3.0 GB'
110-
},
111-
'Distil Whisper small.en - bfloat16': {
112-
'name': 'Distil Whisper small.en',
113-
'precision': 'bfloat16',
114-
'repo_id': 'ctranslate2-4you/distil-whisper-small.en-ct2-bfloat16',
115-
'tokens_per_second': 160,
116-
'optimal_batch_size': 4,
117-
'avg_vram_usage': '3.0 GB'
118-
},
119-
'Distil Whisper small.en - float16': {
120-
'name': 'Distil Whisper small.en',
121-
'precision': 'float16',
122-
'repo_id': 'ctranslate2-4you/distil-whisper-small.en-ct2-float16',
123-
'tokens_per_second': 160,
124-
'optimal_batch_size': 4,
125-
'avg_vram_usage': '3.0 GB'
126-
},
127-
'Whisper small.en - float32': {
128-
'name': 'Whisper small.en',
129-
'precision': 'float32',
130-
'repo_id': 'ctranslate2-4you/whisper-small.en-ct2-float32',
131-
'tokens_per_second': 180,
132-
'optimal_batch_size': 14,
133-
'avg_vram_usage': '1.5 GB'
134-
},
135-
'Whisper small.en - bfloat16': {
136-
'name': 'Whisper small.en',
137-
'precision': 'bfloat16',
138-
'repo_id': 'ctranslate2-4you/whisper-small.en-ct2-bfloat16',
139-
'tokens_per_second': 190,
140-
'optimal_batch_size': 15,
141-
'avg_vram_usage': '1.2 GB'
142-
},
143-
'Whisper small.en - float16': {
144-
'name': 'Whisper small.en',
145-
'precision': 'float16',
146-
'repo_id': 'ctranslate2-4you/whisper-small.en-ct2-float16',
147-
'tokens_per_second': 195,
148-
'optimal_batch_size': 15,
149-
'avg_vram_usage': '1.1 GB'
150-
},
151-
# BASE.EN
152-
'Whisper base.en - float32': {
153-
'name': 'Whisper base.en',
154-
'precision': 'float32',
155-
'repo_id': 'ctranslate2-4you/whisper-base.en-ct2-float32',
156-
'tokens_per_second': 230,
157-
'optimal_batch_size': 22,
158-
'avg_vram_usage': '1.0 GB'
159-
},
160-
'Whisper base.en - bfloat16': {
161-
'name': 'Whisper base.en',
162-
'precision': 'bfloat16',
163-
'repo_id': 'ctranslate2-4you/whisper-base.en-ct2-bfloat16',
164-
'tokens_per_second': 240,
165-
'optimal_batch_size': 23,
166-
'avg_vram_usage': '0.85 GB'
167-
},
168-
'Whisper base.en - float16': {
169-
'name': 'Whisper base.en',
170-
'precision': 'float16',
171-
'repo_id': 'ctranslate2-4you/whisper-base.en-ct2-float16',
172-
'tokens_per_second': 245,
173-
'optimal_batch_size': 23,
174-
'avg_vram_usage': '0.8 GB'
175-
},
176-
# TINY.EN
177-
'Whisper tiny.en - float32': {
178-
'name': 'Whisper tiny.en',
179-
'precision': 'float32',
180-
'repo_id': 'ctranslate2-4you/whisper-tiny.en-ct2-float32',
181-
'tokens_per_second': 280,
182-
'optimal_batch_size': 30,
183-
'avg_vram_usage': '0.7 GB'
184-
},
185-
'Whisper tiny.en - bfloat16': {
186-
'name': 'Whisper tiny.en',
187-
'precision': 'bfloat16',
188-
'repo_id': 'ctranslate2-4you/whisper-tiny.en-ct2-bfloat16',
189-
'tokens_per_second': 290,
190-
'optimal_batch_size': 31,
191-
'avg_vram_usage': '0.6 GB'
192-
},
193-
'Whisper tiny.en - float16': {
194-
'name': 'Whisper tiny.en',
195-
'precision': 'float16',
196-
'repo_id': 'ctranslate2-4you/whisper-tiny.en-ct2-float16',
197-
'tokens_per_second': 295,
198-
'optimal_batch_size': 31,
199-
'avg_vram_usage': '0.55 GB'
200-
},
46+
f"{name} - {prec}": {
47+
'name': name,
48+
'precision': prec,
49+
'repo_id': f'ctranslate2-4you/{slug}-ct2-{prec}',
50+
'tokens_per_second': tps,
51+
'optimal_batch_size': batch,
52+
'avg_vram_usage': vram,
53+
}
54+
for name, slug, prec, tps, batch, vram in _MODEL_SPECS
20155
}
20256

57+
MODEL_NAMES = list(OrderedDict.fromkeys(name for name, *_ in _MODEL_SPECS))
58+
59+
MODEL_PRECISIONS = {}
60+
for name, slug, prec, *_ in _MODEL_SPECS:
61+
MODEL_PRECISIONS.setdefault(name, []).append(prec)
62+
63+
DISTIL_MODELS = frozenset(name for name, *_ in _MODEL_SPECS if name.startswith("Distil"))
64+
65+
WHISPER_LANGUAGES = OrderedDict([
66+
("af", "Afrikaans"), ("am", "Amharic"), ("ar", "Arabic"), ("as", "Assamese"),
67+
("az", "Azerbaijani"), ("ba", "Bashkir"), ("be", "Belarusian"), ("bg", "Bulgarian"),
68+
("bn", "Bengali"), ("bo", "Tibetan"), ("br", "Breton"), ("bs", "Bosnian"),
69+
("ca", "Catalan"), ("cs", "Czech"), ("cy", "Welsh"), ("da", "Danish"),
70+
("de", "German"), ("el", "Greek"), ("en", "English"), ("es", "Spanish"),
71+
("et", "Estonian"), ("eu", "Basque"), ("fa", "Persian"), ("fi", "Finnish"),
72+
("fo", "Faroese"), ("fr", "French"), ("gl", "Galician"), ("gu", "Gujarati"),
73+
("ha", "Hausa"), ("haw", "Hawaiian"), ("he", "Hebrew"), ("hi", "Hindi"),
74+
("hr", "Croatian"), ("ht", "Haitian Creole"), ("hu", "Hungarian"), ("hy", "Armenian"),
75+
("id", "Indonesian"), ("is", "Icelandic"), ("it", "Italian"), ("ja", "Japanese"),
76+
("jw", "Javanese"), ("ka", "Georgian"), ("kk", "Kazakh"), ("km", "Khmer"),
77+
("kn", "Kannada"), ("ko", "Korean"), ("la", "Latin"), ("lb", "Luxembourgish"),
78+
("ln", "Lingala"), ("lo", "Lao"), ("lt", "Lithuanian"), ("lv", "Latvian"),
79+
("mg", "Malagasy"), ("mi", "Maori"), ("mk", "Macedonian"), ("ml", "Malayalam"),
80+
("mn", "Mongolian"), ("mr", "Marathi"), ("ms", "Malay"), ("mt", "Maltese"),
81+
("my", "Myanmar"), ("ne", "Nepali"), ("nl", "Dutch"), ("nn", "Nynorsk"),
82+
("no", "Norwegian"), ("oc", "Occitan"), ("pa", "Punjabi"), ("pl", "Polish"),
83+
("ps", "Pashto"), ("pt", "Portuguese"), ("ro", "Romanian"), ("ru", "Russian"),
84+
("sa", "Sanskrit"), ("sd", "Sindhi"), ("si", "Sinhala"), ("sk", "Slovak"),
85+
("sl", "Slovenian"), ("sn", "Shona"), ("so", "Somali"), ("sq", "Albanian"),
86+
("sr", "Serbian"), ("su", "Sundanese"), ("sv", "Swedish"), ("sw", "Swahili"),
87+
("ta", "Tamil"), ("te", "Telugu"), ("tg", "Tajik"), ("th", "Thai"),
88+
("tk", "Turkmen"), ("tl", "Tagalog"), ("tr", "Turkish"), ("tt", "Tatar"),
89+
("uk", "Ukrainian"), ("ur", "Urdu"), ("uz", "Uzbek"), ("vi", "Vietnamese"),
90+
("yi", "Yiddish"), ("yo", "Yoruba"), ("zh", "Chinese"),
91+
])
20392

204-
# File extensions
20593
SUPPORTED_AUDIO_EXTENSIONS = [
206-
".aac", ".amr", ".asf", ".avi", ".flac", ".m4a",
94+
".aac", ".amr", ".asf", ".avi", ".flac", ".m4a",
20795
".mkv", ".mp3", ".mp4", ".wav", ".webm", ".wma"
20896
]
20997

210-
# Output formats
21198
OUTPUT_FORMATS = ["txt", "vtt", "srt", "tsv", "json"]
212-
213-
# Task modes
21499
TASK_MODES = ["transcribe", "translate"]
215100

216-
# Default settings
217101
DEFAULT_BEAM_SIZE = 1
218102
DEFAULT_BATCH_SIZE = 8
219103
DEFAULT_OUTPUT_FORMAT = "txt"
220-
DEFAULT_TASK_MODE = "transcribe"
104+
DEFAULT_TASK_MODE = "transcribe"
105+
DEFAULT_LANGUAGE = "en"

config/settings.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,25 @@
1-
"""Application settings management."""
21
from dataclasses import dataclass
3-
from typing import List, Optional
2+
from typing import List
43

54
@dataclass
65
class TranscriptionSettings:
7-
"""Settings for transcription processing."""
86
model_key: str
97
device: str
108
beam_size: int
119
batch_size: int
1210
output_format: str
1311
task_mode: str
12+
language: str
1413
recursive: bool
1514
selected_extensions: List[str]
16-
15+
1716
def validate(self) -> List[str]:
18-
"""Validate settings and return list of warnings."""
1917
warnings = []
20-
18+
2119
if self.device.lower() == "cpu" and self.batch_size > 8:
2220
warnings.append(
2321
"CPU batch size > 8 may reduce performance. "
2422
"Consider reducing batch size for better results."
2523
)
26-
27-
return warnings
24+
25+
return warnings

core/models/manager.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import whisper_s2t
88

99
from config.constants import WHISPER_MODELS
10-
from utils.system_utils import get_logical_core_count
10+
from utils.system_utils import get_optimal_cpu_threads
1111

1212
class ModelManager(QObject):
1313
model_loaded = Signal(str, str)
@@ -18,7 +18,7 @@ def __init__(self):
1818
self._current_model = None
1919
self._current_config = None
2020
self._model_mutex = QMutex()
21-
self._cpu_threads = max(4, get_logical_core_count() - 8)
21+
self._cpu_threads = get_optimal_cpu_threads()
2222

2323
def get_or_load_model(self, model_key: str, device: str,
2424
beam_size: int, precision: str) -> Optional[Any]:

core/monitoring/metrics_store.py

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,3 @@
1-
import threading
2-
from typing import List, Optional
3-
41
from PySide6.QtCore import QObject, Signal
52

63
from core.monitoring.system_metrics import SystemMetrics
@@ -10,23 +7,5 @@ class MetricsStore(QObject):
107

118
metrics_ready = Signal(object)
129

13-
def __init__(self, buffer_size: int = 100, parent=None):
14-
super().__init__(parent)
15-
self.buffer_size = buffer_size
16-
self.metrics_history: List[SystemMetrics] = []
17-
self._lock = threading.Lock()
18-
1910
def add_metrics(self, metrics: SystemMetrics) -> None:
20-
with self._lock:
21-
self.metrics_history.append(metrics)
22-
if len(self.metrics_history) > self.buffer_size:
23-
self.metrics_history.pop(0)
2411
self.metrics_ready.emit(metrics)
25-
26-
def get_latest_metrics(self) -> Optional[SystemMetrics]:
27-
with self._lock:
28-
return self.metrics_history[-1] if self.metrics_history else None
29-
30-
def clear(self) -> None:
31-
with self._lock:
32-
self.metrics_history.clear()

0 commit comments

Comments
 (0)