Blaizzy
diff --git a/‎CONTRIBUTIONS.md‎
Lines changed: 7 additions & 0 deletions b/‎CONTRIBUTIONS.md‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 2 additions & 1 deletion b/‎README.md‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎examples/denoise/noisey_audio_10s.wav‎
938 KB b/‎examples/denoise/noisey_audio_10s.wav‎
938 KB
diff --git a/‎examples/denoise/noisey_audio_10s_target.wav‎
938 KB b/‎examples/denoise/noisey_audio_10s_target.wav‎
938 KB
diff --git a/‎mlx_audio/sts/__init__.py‎
Lines changed: 20 additions & 1 deletion b/‎mlx_audio/sts/__init__.py‎
Lines changed: 20 additions & 1 deletion
diff --git a/‎mlx_audio/sts/generate.py‎
Lines changed: 146 additions & 0 deletions b/‎mlx_audio/sts/generate.py‎
Lines changed: 146 additions & 0 deletions
diff --git a/‎mlx_audio/sts/models/__init__.py‎
Lines changed: 15 additions & 0 deletions b/‎mlx_audio/sts/models/__init__.py‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎mlx_audio/sts/models/deepfilternet/README.md‎
Lines changed: 34 additions & 0 deletions b/‎mlx_audio/sts/models/deepfilternet/README.md‎
Lines changed: 34 additions & 0 deletions
diff --git a/‎mlx_audio/sts/models/deepfilternet/__init__.py‎
Lines changed: 19 additions & 0 deletions b/‎mlx_audio/sts/models/deepfilternet/__init__.py‎
Lines changed: 19 additions & 0 deletions
@@ -8,3 +8,10 @@ This file acknowledges the original authors and contributors of models ported to
 - **Copyright**: Speech Lab, Alibaba Group
 - **License**: Apache License 2.0
 - **MLX Port**: Dmitry Starkov ([@starkdmi](https://github.com/starkdmi))
+
+## DeepFilterNet (Speech Enhancement)
+
+- **Original**: [Rikorose/DeepFilterNet](https://github.com/Rikorose/DeepFilterNet)
+- **Copyright**: Hendrik Schröter and contributors
+- **License**: MIT / Apache-2.0
+- **MLX Port**: Kyle Howells ([@kylehowells](https://github.com/kylehowells))
@@ -119,8 +119,9 @@ See the [Sortformer README](mlx_audio/vad/models/sortformer/README.md) for API d
 | Model | Description | Use Case | Repo |
 |-------|-------------|----------|------|
 | **SAM-Audio** | Text-guided source separation | Extract specific sounds | [mlx-community/sam-audio-large](https://huggingface.co/mlx-community/sam-audio-large) |
-| **Liquid2.5-Audio*** | Speech-to-Speech, Text-to-Speech and Speech-to-Text | Speech interactions | [mlx-community/LFM2.5-Audio-1.5B-8bit](https://huggingface.co/mlx-community/LFM2.5-Audio-1.5B-8bit)
+| **Liquid2.5-Audio*** | Speech-to-Speech, Text-to-Speech and Speech-to-Text | Speech interactions | [mlx-community/LFM2.5-Audio-1.5B-8bit](https://huggingface.co/mlx-community/LFM2.5-Audio-1.5B-8bit) |
 | **MossFormer2 SE** | Speech enhancement | Noise removal | [starkdmi/MossFormer2_SE_48K_MLX](https://huggingface.co/starkdmi/MossFormer2_SE_48K_MLX) |
+| **DeepFilterNet (1/2/3)** | Speech enhancement | Noise suppression | [mlx-community/DeepFilterNet-mlx](https://huggingface.co/mlx-community/DeepFilterNet-mlx) |
 
 ## Model Examples
 
 
@@ -1,3 +1,11 @@
+from .models.deepfilternet import (
+    DeepFilterNet2Config,
+    DeepFilterNet3Config,
+    DeepFilterNetConfig,
+    DeepFilterNetModel,
+    DeepFilterNetStreamer,
+    DeepFilterNetStreamingConfig,
+)
 from .models.mossformer2_se import (
     MossFormer2SE,
     MossFormer2SEConfig,
@@ -11,7 +19,11 @@
     SeparationResult,
     save_audio,
 )
-from .voice_pipeline import VoicePipeline
+
+try:
+    from .voice_pipeline import VoicePipeline
+except ImportError:
+    VoicePipeline = None
 
 __all__ = [
     "SAMAudio",
@@ -21,6 +33,13 @@
     "save_audio",
     "SAMAudioConfig",
     "VoicePipeline",
+    # DeepFilterNet
+    "DeepFilterNetModel",
+    "DeepFilterNetConfig",
+    "DeepFilterNet2Config",
+    "DeepFilterNet3Config",
+    "DeepFilterNetStreamer",
+    "DeepFilterNetStreamingConfig",
     # MossFormer2 SE
     "MossFormer2SE",
     "MossFormer2SEConfig",
 
@@ -0,0 +1,146 @@
+"""Generate enhanced audio using speech-to-speech models.
+
+Usage:
+    python -m mlx_audio.sts.generate --model mlx-community/DeepFilterNet-mlx --audio noisy.wav
+    python -m mlx_audio.sts.generate --model mlx-community/DeepFilterNet-mlx --audio noisy.wav --version 2
+    python -m mlx_audio.sts.generate --model mlx-community/DeepFilterNet-mlx --audio noisy.wav --stream
+    python -m mlx_audio.sts.generate --model starkdmi/MossFormer2_SE_48K_MLX --audio noisy.wav
+"""
+
+from __future__ import annotations
+
+import argparse
+import time
+from pathlib import Path
+
+# Repo ID substrings to model type mapping
+REPO_HINTS = {
+    "deepfilter": "deepfilternet",
+    "mossformer": "mossformer2",
+}
+
+
+def _detect_model_type(model_name: str) -> str:
+    """Detect model type from repo ID or path name."""
+    lower = model_name.lower()
+    for hint, model_type in REPO_HINTS.items():
+        if hint in lower:
+            return model_type
+    raise ValueError(
+        f"Cannot detect model type from '{model_name}'. "
+        f"Supported models: {', '.join(REPO_HINTS.keys())}"
+    )
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Enhance audio using speech-to-speech models"
+    )
+    parser.add_argument(
+        "--model",
+        type=str,
+        default="mlx-community/DeepFilterNet-mlx",
+        help="HuggingFace repo ID or local path to the model",
+    )
+    parser.add_argument(
+        "--audio",
+        type=str,
+        required=True,
+        help="Path to the input audio file",
+    )
+    parser.add_argument(
+        "--output-path",
+        type=str,
+        default=None,
+        help="Output audio file path (default: <input>_enhanced.wav)",
+    )
+    parser.add_argument(
+        "--verbose",
+        action="store_true",
+        help="Print detailed processing information",
+    )
+
+    # DeepFilterNet-specific options
+    dfn = parser.add_argument_group("DeepFilterNet options")
+    dfn.add_argument(
+        "--version",
+        type=int,
+        default=None,
+        choices=[1, 2, 3],
+        help="DeepFilterNet version (1, 2, or 3). Default: 3",
+    )
+    dfn.add_argument(
+        "--subfolder",
+        type=str,
+        default=None,
+        help="Subfolder within the model repo (e.g. v1, v2, v3)",
+    )
+    dfn.add_argument(
+        "--stream",
+        action="store_true",
+        help="Use streaming enhancement mode (DeepFilterNet v2/v3 only)",
+    )
+
+    return parser.parse_args()
+
+
+def main():
+    args = parse_args()
+
+    in_path = Path(args.audio).expanduser().resolve()
+    if not in_path.exists():
+        raise FileNotFoundError(f"Input audio file not found: {in_path}")
+
+    if args.output_path:
+        out_path = Path(args.output_path).expanduser().resolve()
+    else:
+        out_path = in_path.with_stem(in_path.stem + "_enhanced")
+
+    model_type = _detect_model_type(args.model)
+
+    if args.verbose:
+        print(f"Model:  {args.model}")
+        print(f"Type:   {model_type}")
+        print(f"Input:  {in_path}")
+        print(f"Output: {out_path}")
+
+    start = time.time()
+
+    if model_type == "deepfilternet":
+        from mlx_audio.sts.models.deepfilternet import DeepFilterNetModel
+
+        load_kwargs = {"model_name_or_path": args.model}
+        if args.version is not None:
+            load_kwargs["version"] = args.version
+        elif args.subfolder is not None:
+            load_kwargs["subfolder"] = args.subfolder
+
+        model = DeepFilterNetModel.from_pretrained(**load_kwargs)
+
+        if args.stream:
+            model.enhance_file_streaming(str(in_path), str(out_path))
+            mode = "streaming"
+        else:
+            model.enhance_file(str(in_path), str(out_path))
+            mode = "offline"
+
+    elif model_type == "mossformer2":
+        from mlx_audio import audio_io
+        from mlx_audio.sts.models.mossformer2_se import MossFormer2SEModel
+
+        model = MossFormer2SEModel.from_pretrained(args.model)
+        enhanced = model.enhance(str(in_path))
+        audio_io.write(str(out_path), enhanced, model.config.sample_rate)
+        mode = "offline"
+
+    elapsed = time.time() - start
+
+    if args.verbose:
+        print(f"Mode:   {mode}")
+        print(f"Time:   {elapsed:.2f}s")
+
+    print(f"Saved:  {out_path}")
+
+
+if __name__ == "__main__":
+    main()
@@ -1,5 +1,13 @@
 # Copyright (c) 2025 Prince Canuma and contributors (https://github.com/Blaizzy/mlx-audio)
 
+from .deepfilternet import (
+    DeepFilterNet2Config,
+    DeepFilterNet3Config,
+    DeepFilterNetConfig,
+    DeepFilterNetModel,
+    DeepFilterNetStreamer,
+    DeepFilterNetStreamingConfig,
+)
 from .lfm_audio import (
     ChatState,
     GenerationConfig,
@@ -25,6 +33,13 @@
     "Batch",
     "save_audio",
     "SAMAudioConfig",
+    # DeepFilterNet
+    "DeepFilterNetModel",
+    "DeepFilterNetConfig",
+    "DeepFilterNet2Config",
+    "DeepFilterNet3Config",
+    "DeepFilterNetStreamer",
+    "DeepFilterNetStreamingConfig",
     # MossFormer2 SE
     "MossFormer2SE",
     "MossFormer2SEConfig",
 
@@ -0,0 +1,34 @@
+# DeepFilterNet (MLX)
+
+DeepFilterNet speech enhancement in pure MLX with support for model versions 1, 2, and 3.
+
+Pretrained weights: [mlx-community/DeepFilterNet-mlx](https://huggingface.co/mlx-community/DeepFilterNet-mlx)
+
+## Quick Start
+
+```python
+from mlx_audio.sts.models.deepfilternet import DeepFilterNetModel
+
+# Load v3 (default)
+model = DeepFilterNetModel.from_pretrained()
+model.enhance_file("noisy.wav", "clean.wav")
+
+# Load a specific version
+model = DeepFilterNetModel.from_pretrained(version=2)
+
+# Or specify the subfolder directly
+model = DeepFilterNetModel.from_pretrained(subfolder="v1")
+```
+
+Streaming/chunked mode (true per-hop stateful processing for v2/v3):
+
+```python
+streamer = model.create_streamer(pad_end_frames=3, compensate_delay=True)
+out_1 = streamer.process_chunk(chunk_a)
+out_2 = streamer.process_chunk(chunk_b)
+out_tail = streamer.flush()
+```
+
+## Model Selection
+
+Model architecture is selected automatically from `config.json` (`model_version` field).
@@ -0,0 +1,19 @@
+"""DeepFilterNet speech enhancement model for MLX."""
+
+from .config import DeepFilterNet2Config, DeepFilterNet3Config, DeepFilterNetConfig
+from .model import DeepFilterNetModel
+from .streaming import DeepFilterNetStreamer, DeepFilterNetStreamingConfig
+
+Model = DeepFilterNetModel
+ModelConfig = DeepFilterNetConfig
+
+__all__ = [
+    "DeepFilterNetModel",
+    "DeepFilterNetConfig",
+    "DeepFilterNet2Config",
+    "DeepFilterNet3Config",
+    "DeepFilterNetStreamer",
+    "DeepFilterNetStreamingConfig",
+    "Model",
+    "ModelConfig",
+]