gfdb
diff --git a/‎pyproject.toml‎
Lines changed: 1 addition & 0 deletions b/‎pyproject.toml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎readme.md‎
Lines changed: 0 additions & 1 deletion b/‎readme.md‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎tests/test_gpu_augmentations.py‎
Lines changed: 5 additions & 4 deletions b/‎tests/test_gpu_augmentations.py‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎uv.lock‎
Lines changed: 93 additions & 2 deletions b/‎uv.lock‎
Lines changed: 93 additions & 2 deletions
diff --git a/‎wav2aug/data/fetch.py‎
Lines changed: 13 additions & 39 deletions b/‎wav2aug/data/fetch.py‎
Lines changed: 13 additions & 39 deletions
diff --git a/‎wav2aug/gpu/__init__.py‎
Lines changed: 2 additions & 1 deletion b/‎wav2aug/gpu/__init__.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎wav2aug/gpu/amplitude_clipping.py‎
Lines changed: 14 additions & 8 deletions b/‎wav2aug/gpu/amplitude_clipping.py‎
Lines changed: 14 additions & 8 deletions
diff --git a/‎wav2aug/gpu/amplitude_scaling.py‎
Lines changed: 10 additions & 2 deletions b/‎wav2aug/gpu/amplitude_scaling.py‎
Lines changed: 10 additions & 2 deletions
@@ -30,6 +30,7 @@ dependencies = [
     "torch>=2.0.0",
     "torchaudio>=2.0.0",
     "torchcodec>=0.7.0",
+    "tqdm>=4.0.0",
 ]
 
 [project.optional-dependencies]
 
@@ -6,7 +6,6 @@ A minimalistic PyTorch-based audio augmentation library for speech and audio aug
 
 ## ⚙️ Features
 
-* **Minimal dependencies**: we only rely on PyTorch, torchcodec, and torchaudio.
 * **9 core augmentations**: amplitude scaling/clipping, noise addition, frequency dropout, polarity inversion, chunk swapping, speed perturbation, time dropout, and babble noise.
 * **Simplicity**: just install and start augmenting!
 * **Randomness**: all stochastic ops use PyTorch RNGs. Set a single seed and be done, e.g. torch.manual_seed(0); torch.cuda.manual_seed_all(0)
 
@@ -49,7 +49,7 @@ def test_chunk_swap_outputs_permutation():
     )
     reference = base.clone()
     out = chunk_swap(base)
-    assert out.data_ptr() == base.data_ptr()
+    assert out.shape == base.shape
     assert torch.allclose(
         torch.sort(out, dim=1).values, torch.sort(reference, dim=1).values
     )
@@ -74,7 +74,7 @@ def _stub_noise_like(ref, sample_rate, noise_dir):
     ptr = waveforms.data_ptr()
     out = add_noise(
         waveforms,
-        sample_rate=16_000,
+        16_000,  # sample_rate as positional argument
         snr_low=0.0,
         snr_high=0.0,
         download=False,
@@ -103,8 +103,9 @@ def test_speed_perturb_adjusts_length():
     waveforms = torch.linspace(
         0, 1, steps=200, device=DEVICE, dtype=torch.float32
     ).repeat(2, 1)
-    out = speed_perturb(waveforms, 16000, speed_changes=(0.5,))
-    expected_len = int(round(200 * 1 / 0.5))
+    out = speed_perturb(waveforms, 16000, speeds=(50,))
+    # speed=50% → ratio=2.0 → 2x samples (slower)
+    expected_len = int(200 * 2.0)
     assert out.shape == (2, expected_len)
 
 
 
@@ -5,15 +5,15 @@
 import logging
 import os
 import pathlib
-import shutil
-import sys
 import tarfile
 import tempfile
 import time
 import urllib.request
 from pathlib import Path
 from urllib.parse import urlparse
 
+from tqdm import tqdm
+
 try:
     import fcntl
 
@@ -59,51 +59,25 @@ def _safe_extract_tar_gz(tgz_path: str, dest_dir: str) -> None:
 
 def _download(url: str, out_path: str) -> None:
     """Download with simple progress bar to stderr."""
-    show = os.environ.get("WAV2AUG_PROGRESS", "1") != "0"
-
-    def _progress(done: int, total: int):
-        w = max(10, min(40, shutil.get_terminal_size(fallback=(80, 20)).columns - 30))
-        if total > 0:
-            pct = done / total
-            fill = int(pct * w)
-            bar = "#" * fill + "." * (w - fill)
-            sys.stderr.write(
-                f"\rwav2aug - Progress [{bar}] {pct*100:5.1f}%  {done/1e6:6.1f}MB/{total/1e6:6.1f}MB"
-            )
-        else:
-            sys.stderr.write(f"\rwav2aug - Progress {done/1e6:6.1f}MB")
-        sys.stderr.flush()
-
     name = Path(urlparse(url).path).name or "download"
 
-    sys.stderr.write(f"wav2aug - Downloading: {name}\n")
-    sys.stderr.flush()
-
     req = urllib.request.Request(url, headers={"User-Agent": "wav2aug/1.0"})
     start = time.monotonic()
     with urllib.request.urlopen(req) as r, open(out_path, "wb") as f:
         total = int(r.headers.get("Content-Length") or 0)
         chunk = 1 << 20
         done = 0
-        last = time.monotonic()
-        tty = show and sys.stderr.isatty()
-        if tty:
-            _progress(0, total)
-
-        while True:
-            buf = r.read(chunk)
-            if not buf:
-                break
-            f.write(buf)
-            done += len(buf)
-            if tty and (time.monotonic() - last) >= 0.05:
-                _progress(done, total)
-                last = time.monotonic()
-
-        if tty:
-            _progress(done, total)
-            sys.stderr.write("\n")
-            sys.stderr.flush()
+
+        with tqdm(
+            total=total, desc=f"Downloading {name}", unit="B", unit_scale=True
+        ) as pbar:
+            while True:
+                buf = r.read(chunk)
+                if not buf:
+                    break
+                f.write(buf)
+                done += len(buf)
+                pbar.update(len(buf))
 
     elapsed = max(1e-6, time.monotonic() - start)
     log.info(
 
@@ -2,7 +2,7 @@
 from .amplitude_scaling import rand_amp_scale
 from .chunk_swapping import chunk_swap
 from .frequency_dropout import freq_drop
-from .noise_addition import add_babble_noise, add_noise
+from .noise_addition import NoiseLoader, add_babble_noise, add_noise
 from .polarity_inversion import invert_polarity
 from .speed_perturbation import speed_perturb
 from .time_dropout import time_dropout
@@ -15,6 +15,7 @@
     "freq_drop",
     "add_noise",
     "add_babble_noise",
+    "NoiseLoader",
     "invert_polarity",
     "speed_perturb",
     "time_dropout",
 
@@ -13,6 +13,9 @@ def rand_amp_clip(
 ) -> torch.Tensor:
     """Random amplitude clipping for batched waveforms.
 
+    Normalizes each waveform to [-1, 1], applies clipping, then restores
+    the original amplitude scaled by the clip factor.
+
     Args:
         waveforms: Tensor of shape [batch, time].
         clip_low: Minimum clipping threshold as a fraction of peak.
@@ -30,19 +33,22 @@ def rand_amp_clip(
 
     device = waveforms.device
     dtype = waveforms.dtype
-    peaks = waveforms.abs().amax(dim=1, keepdim=True).clamp_min(1.0)
-    normalized = waveforms / peaks
 
-    # Per-sample clip thresholds
-    clip = torch.rand((waveforms.size(0), 1), device=device, dtype=dtype)
+    # Normalize to [-1, 1] by absolute max
+    abs_max = waveforms.abs().amax(dim=1, keepdim=True)
+    abs_max = abs_max.clamp_min(eps)
+    waveforms.div_(abs_max)
+
+    # Single clip value for entire batch (matches SpeechBrain)
+    clip = torch.rand(1, device=device, dtype=dtype)
     clip = clip * (clip_high - clip_low) + clip_low
     clip = clip.clamp_min(eps)
 
-    normalized = torch.minimum(normalized, clip)
-    normalized = torch.maximum(normalized, -clip)
+    # Apply clipping
+    waveforms.clamp_(-clip, clip)
 
-    scale = peaks / clip
-    waveforms.copy_(normalized * scale)
+    # Restore amplitude scaled by clip factor
+    waveforms.mul_(abs_max / clip)
     return waveforms
 
 
 
@@ -12,6 +12,9 @@ def rand_amp_scale(
 ) -> torch.Tensor:
     """Random amplitude scaling for batched waveforms.
 
+    Normalizes each waveform to [-1, 1] then applies a random amplitude
+    scale factor.
+
     Args:
         waveforms: Tensor of shape [batch, time].
         amp_low: Minimum amplitude scale factor.
@@ -28,12 +31,17 @@ def rand_amp_scale(
 
     device = waveforms.device
     dtype = waveforms.dtype
-    denom = waveforms.abs().amax(dim=1, keepdim=True).clamp_min(1.0)
+
+    # Normalize to [-1, 1] by dividing by absolute max
+    abs_max = waveforms.abs().amax(dim=1, keepdim=True)
+    # Avoid division by zero for silent signals
+    abs_max = abs_max.clamp_min(1e-14)
+    waveforms.div_(abs_max)
 
     # Per-sample scaling factors
     scales = torch.rand((waveforms.size(0), 1), device=device, dtype=dtype)
     scales = scales * (amp_high - amp_low) + amp_low
-    waveforms.mul_(scales / denom)
+    waveforms.mul_(scales)
     return waveforms
Original file line number	Diff line number	Diff line change
`@@ -30,6 +30,7 @@ dependencies = [`
`30`	`30`	`"torch>=2.0.0",`
`31`	`31`	`"torchaudio>=2.0.0",`
`32`	`32`	`"torchcodec>=0.7.0",`
	`33`	`+ "tqdm>=4.0.0",`
`33`	`34`	`]`
`34`	`35`
`35`	`36`	`[project.optional-dependencies]`