aj47
diff --git a/‎README.md‎
Lines changed: 75 additions & 1 deletion b/‎README.md‎
Lines changed: 75 additions & 1 deletion
diff --git a/‎pyproject.toml‎
Lines changed: 1 addition & 0 deletions b/‎pyproject.toml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/transcribe_anything/_cmd.py‎
Lines changed: 8 additions & 2 deletions b/‎src/transcribe_anything/_cmd.py‎
Lines changed: 8 additions & 2 deletions
diff --git a/‎src/transcribe_anything/api.py‎
Lines changed: 23 additions & 2 deletions b/‎src/transcribe_anything/api.py‎
Lines changed: 23 additions & 2 deletions
@@ -79,6 +79,10 @@ transcribe-anything https://www.youtube.com/watch?v=dQw4w9WgXcQ --device insane
 # Mac Apple Silicon accelerated
 transcribe-anything https://www.youtube.com/watch?v=dQw4w9WgXcQ --device mlx
 
+# Groq API (fastest, requires API key)
+export GROQ_API_KEY="your_groq_api_key_here"
+transcribe-anything https://www.youtube.com/watch?v=dQw4w9WgXcQ --device groq
+
 # Advanced options (see Advanced Options section below for full details)
 transcribe-anything video.mp4 --device mlx --batch_size 16 --verbose
 transcribe-anything video.mp4 --device insane --batch-size 8 --flash True
@@ -97,18 +101,27 @@ transcribe_anything(
     device="cuda"
 )
 
+# Using Groq API for fastest transcription
+transcribe_anything(
+    url_or_file="video.mp4",
+    output_dir="output_dir",
+    device="groq",
+    groq_api_key="your_groq_api_key"  # or set GROQ_API_KEY env var
+)
+
 # Full function signiture:
 def transcribe(
     url_or_file: str,
     output_dir: Optional[str] = None,
     model: Optional[str] = None,              # tiny,small,medium,large
     task: Optional[str] = None,               # transcribe or translate
     language: Optional[str] = None,           # auto detected if none, "en" for english...
-    device: Optional[str] = None,             # cuda,cpu,insane,mlx
+    device: Optional[str] = None,             # cuda,cpu,insane,mlx,groq
     embed: bool = False,                      # Produces a video.mp4 with the subtitles burned in.
     hugging_face_token: Optional[str] = None, # If you want a speaker.json
     other_args: Optional[list[str]] = None,   # Other args to be passed to to the whisper backend
     initial_prompt: Optional[str] = None,     # Custom prompt for better recognition of specific terms
+    groq_api_key: Optional[str] = None,       # Groq API key for speech-to-text (or set GROQ_API_KEY env var)
 ) -> str:
 
 ```
@@ -197,12 +210,73 @@ Mac:
 
 - Use `--device mlx`
 
+# Groq API Integration
+
+For the fastest transcription speeds, you can use Groq's speech-to-text API. This requires a Groq API key but provides near-instant transcription results.
+
+## Setup
+
+1. Get a free API key from [Groq Console](https://console.groq.com/)
+2. Set your API key as an environment variable:
+
+```bash
+export GROQ_API_KEY="your_groq_api_key_here"
+```
+
+Or pass it directly:
+
+```bash
+transcribe-anything video.mp4 --device groq --groq_api_key "your_api_key"
+```
+
+## Supported Models
+
+- `whisper-large-v3` - Best accuracy, multilingual
+- `whisper-large-v3-turbo` - Faster, multilingual (default mapping for most models)
+- `distil-whisper-large-v3-en` - Fastest, English-only
+
+## Features
+
+- **Speed**: Near-instant transcription (189-250x real-time)
+- **File Size**: Automatic chunking for files larger than 90MB
+- **Languages**: Multilingual support with automatic detection
+- **Custom Prompts**: Support for domain-specific vocabulary
+- **Output Formats**: Same SRT, VTT, TXT, and JSON outputs as other backends
+- **Smart Chunking**: Large files are automatically split into chunks and reassembled
+
+## Usage Examples
+
+```bash
+# Basic Groq transcription
+transcribe-anything video.mp4 --device groq
+
+# With custom model
+transcribe-anything audio.wav --device groq --model whisper-large-v3
+
+# With custom prompt for better accuracy
+transcribe-anything meeting.mp3 --device groq --initial_prompt "This is a technical discussion about AI and machine learning"
+
+# Translate to English
+transcribe-anything foreign_audio.mp4 --device groq --task translate
+
+# Large file (will be automatically chunked)
+transcribe-anything large_podcast.mp3 --device groq --model whisper-large-v3-turbo
+```
+
+## Limitations
+
+- Requires internet connection
+- API usage limits apply (see Groq pricing)
+- Large files are automatically chunked (may have slight timing gaps between chunks)
+- Requires `ffmpeg` for audio chunking of large files
+
 # Advanced Options and Backend-Specific Arguments
 
 ## Quick Reference
 
 | Backend | Device Flag | Key Arguments | Best For |
 |---------|-------------|---------------|----------|
+| **Groq API** | `--device groq` | `--groq_api_key`, `--initial_prompt` | Fastest transcription (cloud) |
 | **MLX** | `--device mlx` | `--batch_size`, `--verbose`, `--initial_prompt` | Mac Apple Silicon |
 | **Insanely Fast** | `--device insane` | `--batch-size`, `--hf_token`, `--flash`, `--timestamp` | Windows/Linux GPU |
 | **CPU** | `--device cpu` | Standard whisper args | Universal compatibility |
 
@@ -18,6 +18,7 @@ dependencies = [
     "webvtt-py==0.4.6",
     "uv-iso-env>=1.0.43",
     "python-dotenv>=1.0.1",
+    "groq>=0.11.0",
 ]
 # VERSION
 version = "3.2.0"  # Update this manually or configure setuptools-scm for automatic versioning
 
@@ -100,12 +100,12 @@ def parse_arguments() -> argparse.Namespace:
         default=None,
         choices=[None] + whisper_options["language"],
     )
-    choices = [None, "cpu", "cuda", "insane"]
+    choices = [None, "cpu", "cuda", "insane", "groq"]
     if platform.system() == "Darwin":
         choices.extend(["mlx", "mps"])  # mps for backward compatibility
     parser.add_argument(
         "--device",
-        help="device to use for processing, None will auto select CUDA if available or else CPU",
+        help="device to use for processing, None will auto select CUDA if available or else CPU. Use 'groq' for Groq API",
         default=None,
         choices=choices,
     )
@@ -119,6 +119,11 @@ def parse_arguments() -> argparse.Namespace:
         help="save huggingface token to a file for future use",
         action="store_true",
     )
+    parser.add_argument(
+        "--groq_api_key",
+        help="Groq API key for speech-to-text (can also be set via GROQ_API_KEY environment variable)",
+        default=None,
+    )
     parser.add_argument(
         "--diarization_model",
         help=("Name of the pretrained model/ checkpoint to perform diarization." + " (default: pyannote/speaker-diarization). Only works for --device insane."),
@@ -254,6 +259,7 @@ def main() -> int:
             embed=args.embed,
             hugging_face_token=args.hf_token,
             other_args=unknown,
+            groq_api_key=args.groq_api_key,
         )
     except KeyboardInterrupt:
         print("KeyboardInterrupt")
 
@@ -23,6 +23,7 @@
 from appdirs import user_config_dir  # type: ignore
 
 from transcribe_anything.audio import fetch_audio
+from transcribe_anything.groq_whisper import run_groq_whisper
 from transcribe_anything.insanely_fast_whisper import run_insanely_fast_whisper
 from transcribe_anything.logger import log_error
 from transcribe_anything.util import chop_double_extension, sanitize_filename
@@ -53,6 +54,7 @@ class Device(Enum):
     CUDA = "cuda"
     INSANE = "insane"
     MLX = "mlx"
+    GROQ = "groq"
 
     def __str__(self) -> str:
         return self.value
@@ -73,6 +75,8 @@ def from_str(device: str) -> "Device":
             if sys.platform != "darwin":
                 raise ValueError("MLX is only supported on macOS.")
             return Device.MLX
+        if device == "groq":
+            return Device.GROQ
         # Backward compatibility: accept 'mps' as alias for 'mlx'
         if device == "mps":
             if sys.platform != "darwin":
@@ -174,6 +178,7 @@ def transcribe(
     hugging_face_token: Optional[str] = None,
     other_args: Optional[list[str]] = None,
     initial_prompt: Optional[str] = None,
+    groq_api_key: Optional[str] = None,
 ) -> str:
     """
     Runs the transcription program.
@@ -184,13 +189,14 @@ def transcribe(
         model: Whisper model to use (tiny, small, medium, large, etc.)
         task: Task to perform (transcribe or translate)
         language: Language of the audio (auto-detected if None)
-        device: Device to use (cuda, cpu, insane, mlx)
+        device: Device to use (cuda, cpu, insane, mlx, groq)
         embed: Whether to embed subtitles into video file
         hugging_face_token: Token for speaker diarization
         other_args: Additional arguments to pass to Whisper backend
         initial_prompt: Initial prompt to provide context for transcription.
                        Useful for custom vocabulary, names, or domain-specific terms.
                        Example: "The speaker discusses AI, machine learning, and neural networks."
+        groq_api_key: API key for Groq speech-to-text service (can also be set via GROQ_API_KEY env var)
 
     Returns:
         Path to the output directory containing transcription files
@@ -244,6 +250,10 @@ def transcribe(
             print("#####################################")
             print("####### MAC MLX GPU MODE! ###########")
             print("#####################################")
+        elif device_enum == Device.GROQ:
+            print("#####################################")
+            print("####### GROQ API MODE! ###############")
+            print("#####################################")
         else:
             raise ValueError(f"Unknown device {device}")
         print(f"Using device {device}")
@@ -260,7 +270,18 @@ def transcribe(
 
         print(f"Running whisper on {tmp_wav} (will install models on first run)")
         with tempfile.TemporaryDirectory() as tmpdir:
-            if device_enum == Device.INSANE:
+            if device_enum == Device.GROQ:
+                run_groq_whisper(
+                    input_wav=Path(tmp_wav),
+                    model=model_str,
+                    output_dir=Path(tmpdir),
+                    task=task_str,
+                    language=language_str,
+                    api_key=groq_api_key,
+                    initial_prompt=initial_prompt,
+                    other_args=other_args,
+                )
+            elif device_enum == Device.INSANE:
                 run_insanely_fast_whisper(
                     input_wav=Path(tmp_wav),
                     model=model_str,
Original file line number	Diff line number	Diff line change
`@@ -18,6 +18,7 @@ dependencies = [`
`18`	`18`	`"webvtt-py==0.4.6",`
`19`	`19`	`"uv-iso-env>=1.0.43",`
`20`	`20`	`"python-dotenv>=1.0.1",`
	`21`	`+ "groq>=0.11.0",`
`21`	`22`	`]`
`22`	`23`	`# VERSION`
`23`	`24`	`version = "3.2.0" # Update this manually or configure setuptools-scm for automatic versioning`