Skip to content

Commit 8b72232

Browse files
committed
Enhance MLX batch_size functionality and documentation
- Improve MLX transcribe_audio call to include more parameters (task, word_timestamps, verbose, temperature) - Add comprehensive batch_size documentation in README with usage examples and guidelines - Add batch_size functionality tests including parsing validation - Ensure batch_size parameter is properly passed through to lightning-whisper-mlx backend - Maintain backward compatibility with existing functionality This ensures the batch_size functionality works exactly like in the source lightning-whisper-mlx repository.
1 parent 5577b7d commit 8b72232

File tree

3 files changed

+73
-2
lines changed

3 files changed

+73
-2
lines changed

README.md

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,9 @@ Over 700+⭐'s because this program this app just works! Works great for windows
1515

1616
### New in 3.1!
1717

18-
Mac acceleration option using the new [lightning-whisper-mlx](https://github.com/mustafaaljadery/lightning-whisper-mlx) backend. Enable with `--device mlx`. Now supports multiple languages, custom vocabulary via `--initial_prompt`, and both transcribe/translate tasks. 10x faster than Whisper CPP, 4x faster than previous MLX implementations!
18+
Mac acceleration option using the new [lightning-whisper-mlx](https://github.com/mustafaaljadery/lightning-whisper-mlx) backend. Enable with `--device mlx`. Now supports multiple languages, custom vocabulary via `--initial_prompt`, both transcribe/translate tasks, and **batch processing** for improved throughput. 10x faster than Whisper CPP, 4x faster than previous MLX implementations!
19+
20+
**Batch Processing:** MLX backend supports `--batch_size` parameter (default: 12) for improved throughput. Higher batch sizes provide better performance but require more memory.
1921

2022
**Model Storage:** MLX models are now stored in `~/.cache/whisper/mlx_models/` for consistency with other backends, instead of cluttering your current working directory.
2123

@@ -55,6 +57,8 @@ transcribe-anything https://www.youtube.com/watch?v=dQw4w9WgXcQ --device insane
5557
transcribe-anything https://www.youtube.com/watch?v=dQw4w9WgXcQ --device insane --task translate
5658
# Mac accelerated back-end
5759
transcribe-anything https://www.youtube.com/watch?v=dQw4w9WgXcQ --device mlx
60+
# Mac accelerated with custom batch size for better throughput
61+
transcribe-anything video.mp4 --device mlx --batch_size 24
5862
# Use custom prompt for better recognition of specific terms
5963
transcribe-anything video.mp4 --initial_prompt "The speaker discusses AI, machine learning, and neural networks."
6064
# Load prompt from file
@@ -174,6 +178,27 @@ Mac:
174178

175179
- Use `--device mlx`
176180

181+
#### MLX Batch Processing
182+
183+
The MLX backend supports batch processing for improved throughput on Apple Silicon:
184+
185+
```bash
186+
# Default batch size (12)
187+
transcribe-anything video.mp4 --device mlx
188+
189+
# Custom batch size for better performance
190+
transcribe-anything video.mp4 --device mlx --batch_size 24
191+
192+
# Lower batch size for memory-constrained systems
193+
transcribe-anything video.mp4 --device mlx --batch_size 6
194+
```
195+
196+
**Batch Size Guidelines:**
197+
- Default: 12 (good balance of speed and memory usage)
198+
- Higher values (16-24): Better throughput but require more unified memory
199+
- Lower values (4-8): Use if experiencing memory issues
200+
- The optimal batch size depends on your model size and available unified memory
201+
177202
# Custom Prompts and Vocabulary
178203

179204
Whisper supports custom prompts to improve transcription accuracy for domain-specific vocabulary, names, or technical terms. This is especially useful when transcribing content with:

src/transcribe_anything/whisper_mac.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,7 @@ def run_whisper_mac_mlx( # pylint: disable=too-many-arguments
233233
word_timestamps = parsed_args.get("word_timestamps", False)
234234
verbose = parsed_args.get("verbose", False)
235235
temperature = parsed_args.get("temperature", 0.0)
236+
task_param = parsed_args.get("task", task) # Use parsed task or fallback to function parameter
236237

237238
# Get the environment and run transcription
238239
env = get_environment()
@@ -291,8 +292,12 @@ def run_whisper_mac_mlx( # pylint: disable=too-many-arguments
291292
audio="{input_wav_abs}",
292293
path_or_hf_repo=str(model_dir),
293294
language={repr(parsed_args.get("language"))},
295+
task={repr(task_param)},
294296
batch_size={batch_size},
295-
initial_prompt={repr(initial_prompt)}
297+
initial_prompt={repr(initial_prompt)},
298+
word_timestamps={repr(word_timestamps)},
299+
verbose={repr(verbose)},
300+
temperature={repr(temperature)}
296301
)
297302
298303
# Print the result as JSON

tests/test_insanely_fast_whisper_mlx.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,47 @@ def test_multilingual_support(self) -> None:
101101
self.assertTrue((test_dir / "out.srt").exists())
102102
self.assertTrue((test_dir / "out.json").exists())
103103

104+
@unittest.skipUnless(CAN_RUN_TEST, "Not mac")
105+
def test_batch_size_functionality(self) -> None:
106+
"""Check that batch_size parameter works correctly."""
107+
test_dir = LOCALFILE_DIR / "text_video_batch_size"
108+
shutil.rmtree(test_dir, ignore_errors=True)
109+
110+
# Test with custom batch_size
111+
run_whisper_mac_mlx(
112+
input_wav=TEST_WAV,
113+
model="small",
114+
output_dir=test_dir,
115+
language="en",
116+
task="transcribe",
117+
other_args=["--batch_size", "6"] # Custom batch size
118+
)
119+
120+
# Verify output files were created
121+
self.assertTrue((test_dir / "out.txt").exists())
122+
self.assertTrue((test_dir / "out.srt").exists())
123+
self.assertTrue((test_dir / "out.json").exists())
124+
self.assertTrue((test_dir / "out.vtt").exists())
125+
126+
@unittest.skipUnless(CAN_RUN_TEST, "Not mac")
127+
def test_batch_size_parsing(self) -> None:
128+
"""Check that batch_size argument parsing works correctly."""
129+
from transcribe_anything.whisper_mac import _parse_other_args
130+
131+
# Test valid batch_size
132+
result = _parse_other_args(["--batch_size", "24"])
133+
self.assertEqual(result["batch_size"], 24)
134+
135+
# Test with other arguments
136+
result = _parse_other_args(["--language", "en", "--batch_size", "8", "--verbose"])
137+
self.assertEqual(result["batch_size"], 8)
138+
self.assertEqual(result["language"], "en")
139+
self.assertTrue(result["verbose"])
140+
141+
# Test invalid batch_size (should not crash, just use default)
142+
result = _parse_other_args(["--batch_size", "invalid"])
143+
self.assertNotIn("batch_size", result) # Should be filtered out due to ValueError
144+
104145

105146
if __name__ == "__main__":
106147
unittest.main()

0 commit comments

Comments
 (0)