Fix variance sampling and add tests (#8)

mathpluscode · web-flow · commit f3b2c3102493 · 2023-05-06T18:32:11.000+01:00
diff --git a/.github/workflows/unit-test.yml b/.github/workflows/unit-test.yml
@@ -1,9 +1,17 @@
 name: unit-test
 
 on:
-  pull_request:
+  schedule:
+    - cron: "0 0 * * *"
+  workflow_dispatch:
+  # Trigger the workflow on push or pull request,
+  # but only for the main branch
   push:
-    branches: [main]
+    branches:
+      - main
+  pull_request:
+    branches:
+      - main
 
 jobs:
   build:
diff --git a/docker/requirements.txt b/docker/requirements.txt
@@ -6,6 +6,7 @@ hydra-core==1.3.1
 optax==0.1.4
 pandas==1.5.3
 pre-commit==3.0.4
+protobuf==3.20.3 # https://github.com/tensorflow/datasets/issues/4858
 pytest-cov==4.0.0
 pytest-xdist==3.1.0
 pytest==7.2.1
diff --git a/imgx/conf/config_amos_diffusion.yaml b/imgx/conf/config_amos_diffusion.yaml
@@ -16,7 +16,7 @@ task:
   name: "diffusion" # segmentation, diffusion
   diffusion:
     num_timesteps: 5
-    num_timesteps_beta: 1000
+    num_timesteps_beta: 1001
     beta:
       beta_schedule: "linear" # linear, quadradic, cosine, warmup10, warmup50
       beta_start: 0.0001
diff --git a/imgx/conf/config_amos_segmentation.yaml b/imgx/conf/config_amos_segmentation.yaml
@@ -16,7 +16,7 @@ task:
   name: "segmentation" # segmentation, diffusion
   diffusion:
     num_timesteps: 5
-    num_timesteps_beta: 1000
+    num_timesteps_beta: 1001
     beta:
       beta_schedule: "linear" # linear, quadradic, cosine, warmup10, warmup50
       beta_start: 0.0001
diff --git a/imgx/conf/config_pelvic_diffusion.yaml b/imgx/conf/config_pelvic_diffusion.yaml
@@ -16,7 +16,7 @@ task:
   name: "diffusion" # segmentation, diffusion
   diffusion:
     num_timesteps: 5
-    num_timesteps_beta: 1000
+    num_timesteps_beta: 1001
     beta:
       beta_schedule: "linear" # linear, quadradic, cosine, warmup10, warmup50
       beta_start: 0.0001
diff --git a/imgx/conf/config_pelvic_segmentation.yaml b/imgx/conf/config_pelvic_segmentation.yaml
@@ -16,7 +16,7 @@ task:
   name: "segmentation" # segmentation, diffusion
   diffusion:
     num_timesteps: 5
-    num_timesteps_beta: 1000
+    num_timesteps_beta: 1001
     beta:
       beta_schedule: "linear" # linear, quadradic, cosine, warmup10, warmup50
       beta_start: 0.0001
diff --git a/imgx/diffusion/gaussian_diffusion.py b/imgx/diffusion/gaussian_diffusion.py
@@ -10,25 +10,19 @@
 import haiku as hk
 import jax.numpy as jnp
 import jax.random
-import numpy as np
 
 from imgx import EPS
+from imgx.diffusion.variance_schedule import (
+    DiffusionBetaSchedule,
+    downsample_beta_schedule,
+    get_beta_schedule,
+)
 from imgx.metric.distribution import (
     discretized_gaussian_log_likelihood,
     normal_kl,
 )
 
 
-class DiffusionBetaSchedule(enum.Enum):
-    """Class to define beta schedule."""
-
-    LINEAR = enum.auto()
-    QUADRADIC = enum.auto()
-    COSINE = enum.auto()
-    WARMUP10 = enum.auto()
-    WARMUP50 = enum.auto()
-
-
 class DiffusionModelOutputType(enum.Enum):
     """Class to define model's output meaning.
 
@@ -88,90 +82,6 @@ def extract_and_expand(
     return jnp.expand_dims(arr[t], axis=tuple(range(1, ndim)))
 
 
-def get_beta_schedule(
-    num_timesteps: int,
-    beta_schedule: DiffusionBetaSchedule,
-    beta_start: float,
-    beta_end: float,
-) -> jnp.ndarray:
-    """Get variance (beta) schedule for q(x_t | x_{t-1}).
-
-        TODO: open-source code used float64 for beta.
-
-    Args:
-        num_timesteps: number of time steps in total, T.
-        beta_schedule: schedule for beta.
-        beta_start: beta for t=0.
-        beta_end: beta for t=T.
-
-    Raises:
-        ValueError: for unknown schedule.
-    """
-    if beta_schedule == DiffusionBetaSchedule.LINEAR:
-        return jnp.linspace(
-            beta_start,
-            beta_end,
-            num_timesteps,
-        )
-    if beta_schedule == DiffusionBetaSchedule.QUADRADIC:
-        return (
-            jnp.linspace(
-                beta_start**0.5,
-                beta_end**0.5,
-                num_timesteps,
-            )
-            ** 2
-        )
-    if beta_schedule == DiffusionBetaSchedule.COSINE:
-
-        def alphas_cumprod(t: float) -> float:
-            """Eq 17 in https://arxiv.org/abs/2102.09672."""
-            return np.cos((t + 0.008) / 1.008 * np.pi / 2) ** 2
-
-        max_beta = 0.999
-        betas = []
-        for i in range(num_timesteps):
-            t1 = i / num_timesteps
-            t2 = (i + 1) / num_timesteps
-            beta = min(1 - alphas_cumprod(t2) / alphas_cumprod(t1), max_beta)
-            betas.append(beta)
-        return jnp.array(betas)
-
-    if beta_schedule == DiffusionBetaSchedule.WARMUP10:
-        num_timesteps_warmup = max(num_timesteps // 10, 1)
-        betas_warmup = (
-            jnp.linspace(
-                beta_start**0.5,
-                beta_end**0.5,
-                num_timesteps_warmup,
-            )
-            ** 2
-        )
-        return jnp.concatenate(
-            [
-                betas_warmup,
-                jnp.ones((num_timesteps - num_timesteps_warmup,)) * beta_end,
-            ]
-        )
-    if beta_schedule == DiffusionBetaSchedule.WARMUP50:
-        num_timesteps_warmup = max(num_timesteps // 2, 1)
-        betas_warmup = (
-            jnp.linspace(
-                beta_start**0.5,
-                beta_end**0.5,
-                num_timesteps_warmup,
-            )
-            ** 2
-        )
-        return jnp.concatenate(
-            [
-                betas_warmup,
-                jnp.ones((num_timesteps - num_timesteps_warmup,)) * beta_end,
-            ]
-        )
-    raise ValueError(f"Unknown beta_schedule {beta_schedule}.")
-
-
 @dataclasses.dataclass
 class GaussianDiffusion(hk.Module):
     """Class for Gaussian diffusion sampling.
@@ -228,24 +138,17 @@ def __init__(
 
         # shape are all (T,)
         # corresponding to 0, ..., T-1, where 0 means one step
-        self.betas = get_beta_schedule(
+        betas = get_beta_schedule(
             num_timesteps=num_timesteps_beta,
             beta_schedule=beta_schedule,
             beta_start=beta_start,
             beta_end=beta_end,
         )
-        if num_timesteps_beta % num_timesteps != 0:
-            raise ValueError(
-                f"num_timesteps_beta={num_timesteps_beta} "
-                f"can't be evenly divided by num_timesteps={num_timesteps}."
-            )
-        if num_timesteps != num_timesteps_beta:
-            # adjust beta
-            step_scale = num_timesteps_beta // num_timesteps
-            alphas = 1.0 - self.betas
-            alphas_cumprod = jnp.cumprod(alphas)
-            alphas_cumprod = alphas_cumprod[step_scale - 1 :: step_scale]
-            self.betas = 1.0 - alphas_cumprod[1:] / alphas_cumprod[:-1]
+        self.betas = downsample_beta_schedule(
+            betas=betas,
+            num_timesteps=num_timesteps_beta,
+            num_timesteps_to_keep=num_timesteps,
+        )
 
         alphas = 1.0 - self.betas  # alpha_t
         self.alphas_cumprod = jnp.cumprod(alphas)  # \bar{alpha}_t
diff --git a/imgx/diffusion/variance_schedule.py b/imgx/diffusion/variance_schedule.py
@@ -0,0 +1,160 @@
+"""Variance schedule for diffusion models."""
+from __future__ import annotations
+
+import enum
+
+import numpy as np
+from jax import numpy as jnp
+
+
+class DiffusionBetaSchedule(enum.Enum):
+    """Class to define beta schedule."""
+
+    LINEAR = enum.auto()
+    QUADRADIC = enum.auto()
+    COSINE = enum.auto()
+    WARMUP10 = enum.auto()
+    WARMUP50 = enum.auto()
+
+
+def get_beta_schedule(
+    num_timesteps: int,
+    beta_schedule: DiffusionBetaSchedule,
+    beta_start: float,
+    beta_end: float,
+) -> jnp.ndarray:
+    """Get variance (beta) schedule for q(x_t | x_{t-1}).
+
+    Args:
+        num_timesteps: number of time steps in total, T.
+        beta_schedule: schedule for beta.
+        beta_start: beta for t=0.
+        beta_end: beta for t=T-1.
+
+    Returns:
+        Shape (num_timesteps,) array of beta values, for t=0, ..., T-1.
+        Values are in ascending order.
+
+    Raises:
+        ValueError: for unknown schedule.
+    """
+    if beta_schedule == DiffusionBetaSchedule.LINEAR:
+        return jnp.linspace(
+            beta_start,
+            beta_end,
+            num_timesteps,
+        )
+    if beta_schedule == DiffusionBetaSchedule.QUADRADIC:
+        return (
+            jnp.linspace(
+                beta_start**0.5,
+                beta_end**0.5,
+                num_timesteps,
+            )
+            ** 2
+        )
+    if beta_schedule == DiffusionBetaSchedule.COSINE:
+
+        def f(t: float) -> float:
+            """Eq 17 in https://arxiv.org/abs/2102.09672.
+
+            Args:
+                t: time step with values in [0, 1].
+
+            Returns:
+                Cumulative product of alpha.
+            """
+            return np.cos((t + 0.008) / 1.008 * np.pi / 2) ** 2
+
+        betas = [0.0]
+        alphas_cumprod_prev = 1.0
+        for i in range(1, num_timesteps):
+            t = i / (num_timesteps - 1)
+            alphas_cumprod = f(t)
+            beta = 1 - alphas_cumprod / alphas_cumprod_prev
+            betas.append(beta)
+        return jnp.array(betas) * (beta_end - beta_start) + beta_start
+
+    if beta_schedule == DiffusionBetaSchedule.WARMUP10:
+        num_timesteps_warmup = max(num_timesteps // 10, 1)
+        betas_warmup = (
+            jnp.linspace(
+                beta_start**0.5,
+                beta_end**0.5,
+                num_timesteps_warmup,
+            )
+            ** 2
+        )
+        return jnp.concatenate(
+            [
+                betas_warmup,
+                jnp.ones((num_timesteps - num_timesteps_warmup,)) * beta_end,
+            ]
+        )
+    if beta_schedule == DiffusionBetaSchedule.WARMUP50:
+        num_timesteps_warmup = max(num_timesteps // 2, 1)
+        betas_warmup = (
+            jnp.linspace(
+                beta_start**0.5,
+                beta_end**0.5,
+                num_timesteps_warmup,
+            )
+            ** 2
+        )
+        return jnp.concatenate(
+            [
+                betas_warmup,
+                jnp.ones((num_timesteps - num_timesteps_warmup,)) * beta_end,
+            ]
+        )
+    raise ValueError(f"Unknown beta_schedule {beta_schedule}.")
+
+
+def downsample_beta_schedule(
+    betas: jnp.ndarray,
+    num_timesteps: int,
+    num_timesteps_to_keep: int,
+) -> jnp.ndarray:
+    """Downsample beta schedule.
+
+    Args:
+        betas: beta schedule, shape (num_timesteps,).
+            Values are in ascending order.
+        num_timesteps: number of time steps in total, T.
+        num_timesteps_to_keep: number of time steps to keep.
+
+    Returns:
+        Downsampled beta schedule, shape (num_timesteps_to_keep,).
+    """
+    if betas.shape != (num_timesteps,):
+        raise ValueError(
+            f"betas.shape ({betas.shape}) must be equal to "
+            f"(num_timesteps,)=({num_timesteps},)"
+        )
+    if (num_timesteps - 1) % (num_timesteps_to_keep - 1) != 0:
+        raise ValueError(
+            f"num_timesteps-1={num_timesteps-1} can't be evenly divided by "
+            f"num_timesteps_to_keep-1={num_timesteps_to_keep-1}."
+        )
+    if num_timesteps_to_keep < 2:
+        raise ValueError(
+            f"num_timesteps_to_keep ({num_timesteps_to_keep}) must be >= 2."
+        )
+    if num_timesteps_to_keep == num_timesteps:
+        return betas
+    if num_timesteps_to_keep < num_timesteps:
+        step_scale = (num_timesteps - 1) // (num_timesteps_to_keep - 1)
+        beta0 = betas[0]
+        alphas = 1.0 - betas
+        alphas_cumprod = jnp.cumprod(alphas)
+        # (num_timesteps_to_keep,)
+        alphas_cumprod = alphas_cumprod[::step_scale]
+        # (num_timesteps_to_keep-1,)
+        betas = 1.0 - alphas_cumprod[1:] / alphas_cumprod[:-1]
+        # (num_timesteps_to_keep,)
+        betas = jnp.append(beta0, betas)
+        return betas
+    raise ValueError(
+        f"num_timesteps_to_keep ({num_timesteps_to_keep}) "
+        f"must be <= num_timesteps ({num_timesteps})"
+    )
diff --git a/tests/unit/test_diffusion_gaussian.py b/tests/unit/test_diffusion_gaussian.py
diff --git a/tests/unit/test_diffusion_variance_schedule.py b/tests/unit/test_diffusion_variance_schedule.py