@@ -79,6 +79,10 @@ transcribe-anything https://www.youtube.com/watch?v=dQw4w9WgXcQ --device insane
7979# Mac Apple Silicon accelerated
8080transcribe-anything https://www.youtube.com/watch? v=dQw4w9WgXcQ --device mlx
8181
82+ # Groq API (fastest, requires API key)
83+ export GROQ_API_KEY=" your_groq_api_key_here"
84+ transcribe-anything https://www.youtube.com/watch? v=dQw4w9WgXcQ --device groq
85+
8286# Advanced options (see Advanced Options section below for full details)
8387transcribe-anything video.mp4 --device mlx --batch_size 16 --verbose
8488transcribe-anything video.mp4 --device insane --batch-size 8 --flash True
@@ -97,18 +101,27 @@ transcribe_anything(
97101 device = " cuda"
98102)
99103
104+ # Using Groq API for fastest transcription
105+ transcribe_anything(
106+ url_or_file = " video.mp4" ,
107+ output_dir = " output_dir" ,
108+ device = " groq" ,
109+ groq_api_key = " your_groq_api_key" # or set GROQ_API_KEY env var
110+ )
111+
100112# Full function signiture:
101113def transcribe (
102114 url_or_file : str ,
103115 output_dir : Optional[str ] = None ,
104116 model : Optional[str ] = None , # tiny,small,medium,large
105117 task : Optional[str ] = None , # transcribe or translate
106118 language : Optional[str ] = None , # auto detected if none, "en" for english...
107- device : Optional[str ] = None , # cuda,cpu,insane,mlx
119+ device : Optional[str ] = None , # cuda,cpu,insane,mlx,groq
108120 embed : bool = False , # Produces a video.mp4 with the subtitles burned in.
109121 hugging_face_token : Optional[str ] = None , # If you want a speaker.json
110122 other_args : Optional[list[str ]] = None , # Other args to be passed to to the whisper backend
111123 initial_prompt : Optional[str ] = None , # Custom prompt for better recognition of specific terms
124+ groq_api_key : Optional[str ] = None , # Groq API key for speech-to-text (or set GROQ_API_KEY env var)
112125) -> str :
113126
114127```
@@ -197,12 +210,73 @@ Mac:
197210
198211- Use ` --device mlx `
199212
213+ # Groq API Integration
214+
215+ For the fastest transcription speeds, you can use Groq's speech-to-text API. This requires a Groq API key but provides near-instant transcription results.
216+
217+ ## Setup
218+
219+ 1 . Get a free API key from [ Groq Console] ( https://console.groq.com/ )
220+ 2 . Set your API key as an environment variable:
221+
222+ ``` bash
223+ export GROQ_API_KEY=" your_groq_api_key_here"
224+ ```
225+
226+ Or pass it directly:
227+
228+ ``` bash
229+ transcribe-anything video.mp4 --device groq --groq_api_key " your_api_key"
230+ ```
231+
232+ ## Supported Models
233+
234+ - ` whisper-large-v3 ` - Best accuracy, multilingual
235+ - ` whisper-large-v3-turbo ` - Faster, multilingual (default mapping for most models)
236+ - ` distil-whisper-large-v3-en ` - Fastest, English-only
237+
238+ ## Features
239+
240+ - ** Speed** : Near-instant transcription (189-250x real-time)
241+ - ** File Size** : Automatic chunking for files larger than 90MB
242+ - ** Languages** : Multilingual support with automatic detection
243+ - ** Custom Prompts** : Support for domain-specific vocabulary
244+ - ** Output Formats** : Same SRT, VTT, TXT, and JSON outputs as other backends
245+ - ** Smart Chunking** : Large files are automatically split into chunks and reassembled
246+
247+ ## Usage Examples
248+
249+ ``` bash
250+ # Basic Groq transcription
251+ transcribe-anything video.mp4 --device groq
252+
253+ # With custom model
254+ transcribe-anything audio.wav --device groq --model whisper-large-v3
255+
256+ # With custom prompt for better accuracy
257+ transcribe-anything meeting.mp3 --device groq --initial_prompt " This is a technical discussion about AI and machine learning"
258+
259+ # Translate to English
260+ transcribe-anything foreign_audio.mp4 --device groq --task translate
261+
262+ # Large file (will be automatically chunked)
263+ transcribe-anything large_podcast.mp3 --device groq --model whisper-large-v3-turbo
264+ ```
265+
266+ ## Limitations
267+
268+ - Requires internet connection
269+ - API usage limits apply (see Groq pricing)
270+ - Large files are automatically chunked (may have slight timing gaps between chunks)
271+ - Requires ` ffmpeg ` for audio chunking of large files
272+
200273# Advanced Options and Backend-Specific Arguments
201274
202275## Quick Reference
203276
204277| Backend | Device Flag | Key Arguments | Best For |
205278| ---------| -------------| ---------------| ----------|
279+ | ** Groq API** | ` --device groq ` | ` --groq_api_key ` , ` --initial_prompt ` | Fastest transcription (cloud) |
206280| ** MLX** | ` --device mlx ` | ` --batch_size ` , ` --verbose ` , ` --initial_prompt ` | Mac Apple Silicon |
207281| ** Insanely Fast** | ` --device insane ` | ` --batch-size ` , ` --hf_token ` , ` --flash ` , ` --timestamp ` | Windows/Linux GPU |
208282| ** CPU** | ` --device cpu ` | Standard whisper args | Universal compatibility |
0 commit comments