archinetai
diff --git a/‎README.md‎
Lines changed: 29 additions & 11 deletions b/‎README.md‎
Lines changed: 29 additions & 11 deletions
diff --git a/‎audio_diffusion_pytorch/__init__.py‎
Lines changed: 7 additions & 4 deletions b/‎audio_diffusion_pytorch/__init__.py‎
Lines changed: 7 additions & 4 deletions
diff --git a/‎audio_diffusion_pytorch/diffusion.py‎
Lines changed: 144 additions & 63 deletions b/‎audio_diffusion_pytorch/diffusion.py‎
Lines changed: 144 additions & 63 deletions
@@ -14,6 +14,25 @@ pip install audio-diffusion-pytorch
 
 ## Usage
 
+```py
+
+model = AudioDiffusionModel()
+
+# Train model with audio sources [batch, channels, samples]
+x = torch.randn(2, 1, 2 ** 18)
+loss = net(x)
+loss.backward()
+
+
+# Sample given start noise
+noise = torch.randn(2, 1, 2 ** 18)
+sampled = net.sample(
+    noise=noise,
+    num_steps=5 # Range 1-100
+) # [2, 1, 2**18]
+```
+
+## Usage with Components
 
 ### UNet1d
 ```py
@@ -50,15 +69,15 @@ y = unet(x, t) # [2, 1, 32768], 2 samples of ~1.5 seconds of generated audio at
 
 #### Training
 ```python
-from audio_diffusion_pytorch import Diffusion, LogNormalSampler
+from audio_diffusion_pytorch import Diffusion, LogNormalDistribution
 
 diffusion = Diffusion(
     net=unet,
-    sigma_sampler=LogNormalSampler(mean = -3.0, std = 1.0),
+    sigma_distribution=LogNormalDistribution(mean = -3.0, std = 1.0),
     sigma_data=0.1
 )
 
-x = torch.randn(3, 1, 2 ** 16) # Batch of training audio samples
+x = torch.randn(3, 1, 2 ** 18) # Batch of training audio samples
 loss = diffusion(x)
 loss.backward() # Do this many times
 ```
@@ -69,22 +88,21 @@ from audio_diffusion_pytorch import DiffusionSampler, KerrasSchedule
 
 sampler = DiffusionSampler(
     diffusion,
-    num_steps=50, # Range 32-1000, higher for better quality
-    sigma_schedule=KerrasSchedule(
+    num_steps=5, # Range 1-100, higher better quality but takes longer
+    sampler=ADPM2Sampler(rho=1),
+    sigma_schedule=KarrasSchedule(
         sigma_min=0.002,
         sigma_max=1
-    ),
-    s_tmin=0,
-    s_tmax=10,
-    s_churn=40,
-    s_noise=1.003
+    )
 )
 # Generate a sample starting from the provided noise
-y = sampler(x = torch.randn(1,1,2 ** 15))
+y = sampler(noise = torch.randn(1,1,2 ** 18))
 ```
 
 #### Inpainting
 
+Note: this uses an old version, needs to be updated.
+
 ```py
 from audio_diffusion_pytorch import DiffusionInpainter, KerrasSchedule
 
 
@@ -1,11 +1,14 @@
 from .diffusion import (
+    ADPM2Sampler,
     Diffusion,
     DiffusionInpainter,
     DiffusionSampler,
-    KerrasSchedule,
-    LogNormalSampler,
-    SigmaSampler,
-    SigmaSchedule,
+    Distribution,
+    KarrasSampler,
+    KarrasSchedule,
+    LogNormalDistribution,
+    Sampler,
+    Schedule,
     SpanBySpanComposer,
 )
 from .model import AudioDiffusionModel, Model1d
 
@@ -1,5 +1,5 @@
 from math import sqrt
-from typing import Any, Optional
+from typing import Any, Callable, Optional
 
 import torch
 import torch.nn as nn
@@ -9,15 +9,15 @@
 
 from .utils import default, exists
 
-""" Samplers and sigma schedules """
+""" Distributions """
 
 
-class SigmaSampler:
+class Distribution:
     def __call__(self, num_samples: int, device: torch.device):
         raise NotImplementedError()
 
 
-class LogNormalSampler(SigmaSampler):
+class LogNormalDistribution(Distribution):
     def __init__(self, mean: float, std: float):
         self.mean = mean
         self.std = std
@@ -29,15 +29,18 @@ def __call__(
         return normal.exp()
 
 
-class SigmaSchedule(nn.Module):
-    """Interface used by different sampling sigma schedules"""
+""" Schedules """
+
+
+class Schedule(nn.Module):
+    """Interface used by different schedules"""
 
     def forward(self, num_steps: int, device: torch.device) -> Tensor:
         raise NotImplementedError()
 
 
-class KerrasSchedule(SigmaSchedule):
-    """https://arxiv.org/abs/2206.00364 eq. (5)"""
+class KarrasSchedule(Schedule):
+    """https://arxiv.org/abs/2206.00364 equation 5"""
 
     def __init__(self, sigma_min: float, sigma_max: float, rho: float = 7.0):
         super().__init__()
@@ -57,21 +60,139 @@ def forward(self, num_steps: int, device: Any) -> Tensor:
         return sigmas
 
 
+""" Samplers """
+
+
+class Sampler(nn.Module):
+    def forward(
+        self, noise: Tensor, fn: Callable, sigmas: Tensor, num_steps: int
+    ) -> Tensor:
+        raise NotImplementedError()
+
+
+class KarrasSampler(Sampler):
+    """https://arxiv.org/abs/2206.00364 algorithm 1"""
+
+    def __init__(
+        self,
+        s_tmin: float = 0,
+        s_tmax: float = float("inf"),
+        s_churn: float = 0.0,
+        s_noise: float = 1.0,
+    ):
+        super().__init__()
+        self.s_tmin = s_tmin
+        self.s_tmax = s_tmax
+        self.s_noise = s_noise
+        self.s_churn = s_churn
+
+    def step(
+        self,
+        x: Tensor,
+        fn: Callable,
+        sigma: float,
+        sigma_next: float,
+        gamma: float,
+        clamp: bool = True,
+    ) -> Tensor:
+        """Algorithm 2 (step)"""
+        # Select temporarily increased noise level
+        sigma_hat = sigma + gamma * sigma
+        # Add noise to move from sigma to sigma_hat
+        epsilon = self.s_noise * torch.randn_like(x)
+        x_hat = x + sqrt(sigma_hat ** 2 - sigma ** 2) * epsilon
+        # Evaluate ∂x/∂sigma at sigma_hat
+        d = (x_hat - fn(x_hat, sigma=sigma_hat, clamp=clamp)) / sigma_hat
+        # Take euler step from sigma_hat to sigma_next
+        x_next = x_hat + (sigma_next - sigma_hat) * d
+        # Second order correction
+        if sigma_next != 0:
+            model_out_next = fn(x_next, sigma=sigma_next, clamp=clamp)
+            d_prime = (x_next - model_out_next) / sigma_next
+            x_next = x_hat + 0.5 * (sigma - sigma_hat) * (d + d_prime)
+        return x_next
+
+    def forward(
+        self, noise: Tensor, fn: Callable, sigmas: Tensor, num_steps: int
+    ) -> Tensor:
+        x = sigmas[0] * noise
+        # Compute gammas
+        gammas = torch.where(
+            (sigmas >= self.s_tmin) & (sigmas <= self.s_tmax),
+            min(self.s_churn / num_steps, sqrt(2) - 1),
+            0.0,
+        )
+        # Denoise to sample
+        for i in range(num_steps - 1):
+            x = self.step(
+                x, fn=fn, sigma=sigmas[i], sigma_next=sigmas[i + 1], gamma=gammas[i]  # type: ignore # noqa
+            )
+
+        return x
+
+
+class ADPM2Sampler(Sampler):
+    """https://github.com/crowsonkb/k-diffusion/blob/master/k_diffusion/sampling.py"""
+
+    """ https://www.desmos.com/calculator/jbxjlqd9mb """
+
+    def __init__(self, rho: float = 1.0):
+        super().__init__()
+        self.rho = rho
+
+    def step(
+        self,
+        x: Tensor,
+        fn: Callable,
+        sigma: float,
+        sigma_next: float,
+        clamp: bool = True,
+    ) -> Tensor:
+        # Sigma steps
+        r = self.rho
+        sigma_up = sqrt(sigma_next ** 2 * (sigma ** 2 - sigma_next ** 2) / sigma ** 2)
+        sigma_down = sqrt(sigma_next ** 2 - sigma_up ** 2)
+        sigma_mid = ((sigma ** (1 / r) + sigma_down ** (1 / r)) / 2) ** r
+        # Derivative at sigma (∂x/∂sigma)
+        d = (x - fn(x, sigma=sigma, clamp=clamp)) / sigma
+        # Denoise to midpoint
+        x_mid = x + d * (sigma_mid - sigma)
+        # Derivative at sigma_mid (∂x_mid/∂sigma_mid)
+        d_mid = (x_mid - fn(x_mid, sigma=sigma_mid, clamp=clamp)) / sigma_mid
+        # Denoise to next
+        x = x + d_mid * (sigma_down - sigma)
+        # Add randomness
+        x_next = x + torch.randn_like(x) * sigma_up
+        return x_next
+
+    def forward(
+        self, noise: Tensor, fn: Callable, sigmas: Tensor, num_steps: int
+    ) -> Tensor:
+        x = sigmas[0] * noise
+        # Denoise to sample
+        for i in range(num_steps - 1):
+            x = self.step(x, fn=fn, sigma=sigmas[i], sigma_next=sigmas[i + 1])  # type: ignore # noqa
+        return x
+
+
+""" Diffusion Classes """
+
+
 class Diffusion(nn.Module):
     """Elucidated Diffusion: https://arxiv.org/abs/2206.00364"""
 
     def __init__(
         self,
         net: nn.Module,
         *,
-        sigma_sampler: SigmaSampler,
+        sigma_distribution: Distribution,
         sigma_data: float,  # data distribution standard deviation
     ):
         super().__init__()
 
         self.net = net
         self.sigma_data = sigma_data
-        self.sigma_sampler = sigma_sampler
+        self.sigma_distribution = sigma_distribution
 
     def c_skip(self, sigmas: Tensor) -> Tensor:
         return (self.sigma_data ** 2) / (sigmas ** 2 + self.sigma_data ** 2)
@@ -121,7 +242,7 @@ def forward(self, x: Tensor, noise: Tensor = None) -> Tensor:
         batch, device = x.shape[0], x.device
 
         # Sample amount of noise to add for each batch element
-        sigmas = self.sigma_sampler(num_samples=batch, device=device)
+        sigmas = self.sigma_distribution(num_samples=batch, device=device)
         sigmas_padded = rearrange(sigmas, "b -> b 1 1")
 
         # Add noise to input
@@ -145,65 +266,25 @@ def __init__(
         self,
         diffusion: Diffusion,
         *,
-        num_steps: int,
-        sigma_schedule: SigmaSchedule,
-        s_tmin: float = 0,
-        s_tmax: float = float("inf"),
-        s_churn: float = 0.0,
-        s_noise: float = 1.0,
+        sampler: Sampler,
+        sigma_schedule: Schedule,
+        num_steps: Optional[int] = None,
     ):
         super().__init__()
         self.denoise_fn = diffusion.denoise_fn
-        self.num_steps = num_steps
+        self.sampler = sampler
         self.sigma_schedule = sigma_schedule
-        self.s_tmin = s_tmin
-        self.s_tmax = s_tmax
-        self.s_noise = s_noise
-        self.s_churn = s_churn
-
-    def step(
-        self,
-        x: Tensor,
-        sigma: float,
-        sigma_next: float,
-        gamma: float,
-        clamp: bool = True,
-    ) -> Tensor:
-        """Algorithm 2 (step)"""
-        # Select temporarily increased noise level
-        sigma_hat = sigma + gamma * sigma
-        # Add noise to move from sigma to sigma_hat
-        epsilon = self.s_noise * torch.randn_like(x)
-        x_hat = x + sqrt(sigma_hat ** 2 - sigma ** 2) * epsilon
-        # Evaluate ∂x/∂sigma at sigma_hat
-        d = (x_hat - self.denoise_fn(x_hat, sigma=sigma_hat, clamp=clamp)) / sigma_hat
-        # Take euler step from sigma_hat to sigma_next
-        x_next = x_hat + (sigma_next - sigma_hat) * d
-        # Second order correction
-        if sigma_next != 0:
-            model_out_next = self.denoise_fn(x_next, sigma=sigma_next, clamp=clamp)
-            d_prime = (x_next - model_out_next) / sigma_next
-            x_next = x_hat + 0.5 * (sigma - sigma_hat) * (d + d_prime)
-        return x_next
+        self.num_steps = num_steps
 
     @torch.no_grad()
-    def forward(self, x: Tensor, num_steps: int = None) -> Tensor:
-        device = x.device
-        num_steps = default(num_steps, self.num_steps)
+    def forward(self, noise: Tensor, num_steps: Optional[int] = None) -> Tensor:
+        device = noise.device
+        num_steps = default(num_steps, self.num_steps)  # type: ignore
+        assert exists(num_steps), "Parameter `num_steps` must be provided"
         # Compute sigmas using schedule
         sigmas = self.sigma_schedule(num_steps, device)
-        # Sample from first sigma distribution
-        x = sigmas[0] * x
-        # Compute gammas
-        gammas = torch.where(
-            (sigmas >= self.s_tmin) & (sigmas <= self.s_tmax),
-            min(self.s_churn / num_steps, sqrt(2) - 1),
-            0.0,
-        )
-        # Denoise x
-        for i in range(num_steps - 1):
-            x = self.step(x, sigma=sigmas[i], sigma_next=sigmas[i + 1], gamma=gammas[i])  # type: ignore # noqa
-
+        # Sample using sampler
+        x = self.sampler(noise, fn=self.denoise_fn, sigmas=sigmas, num_steps=num_steps)
         x = x.clamp(-1.0, 1.0)
         return x
 
@@ -217,7 +298,7 @@ def __init__(
         *,
         num_steps: int,
         num_resamples: int,
-        sigma_schedule: SigmaSchedule,
+        sigma_schedule: Schedule,
         s_tmin: float = 0,
         s_tmax: float = float("inf"),
         s_churn: float = 0.0,