prepare so that unet can work with a channel of one, and also make it so image size is hard coded in diffusion class. preparing for training on protein distograms

lucidrains · lucidrains · commit ae42f48f6a89 · 2021-06-11T14:06:39.000-07:00
diff --git a/README.md b/README.md
@@ -27,6 +27,7 @@ model = Unet(
 
 diffusion = GaussianDiffusion(
     model,
+    image_size = 128,
     timesteps = 1000,   # number of steps
     loss_type = 'l1'    # L1 or L2
 )
@@ -36,7 +37,7 @@ loss = diffusion(training_images)
 loss.backward()
 # after a lot of training
 
-sampled_images = diffusion.sample(128, batch_size = 4)
+sampled_images = diffusion.sample(batch_size = 4)
 sampled_images.shape # (4, 3, 128, 128)
 ```
 
@@ -52,14 +53,14 @@ model = Unet(
 
 diffusion = GaussianDiffusion(
     model,
+    image_size = 128,
     timesteps = 1000,   # number of steps
     loss_type = 'l1'    # L1 or L2
 ).cuda()
 
 trainer = Trainer(
     diffusion,
     'path/to/your/images',
-    image_size = 128,
     train_batch_size = 32,
     train_lr = 2e-5,
     train_num_steps = 700000,         # total training steps
@@ -77,23 +78,22 @@ Samples and model checkpoints will be logged to `./results` periodically
 
 ```bibtex
 @misc{ho2020denoising,
-    title={Denoising Diffusion Probabilistic Models},
-    author={Jonathan Ho and Ajay Jain and Pieter Abbeel},
-    year={2020},
-    eprint={2006.11239},
-    archivePrefix={arXiv},
-    primaryClass={cs.LG}
+    title   = {Denoising Diffusion Probabilistic Models},
+    author  = {Jonathan Ho and Ajay Jain and Pieter Abbeel},
+    year    = {2020},
+    eprint  = {2006.11239},
+    archivePrefix = {arXiv},
+    primaryClass = {cs.LG}
 }
 ```
 
 ```bibtex
-@inproceedings{
-    anonymous2021improved,
-    title={Improved Denoising Diffusion Probabilistic Models},
-    author={Anonymous},
-    booktitle={Submitted to International Conference on Learning Representations},
-    year={2021},
-    url={https://openreview.net/forum?id=-NEXDKk8gZ},
-    note={under review}
+@inproceedings{anonymous2021improved,
+    title   = {Improved Denoising Diffusion Probabilistic Models},
+    author  = {Anonymous},
+    booktitle = {Submitted to International Conference on Learning Representations},
+    year    = {2021},
+    url     = {https://openreview.net/forum?id=-NEXDKk8gZ},
+    note    = {under review}
 }
 ```
diff --git a/denoising_diffusion_pytorch/denoising_diffusion_pytorch.py b/denoising_diffusion_pytorch/denoising_diffusion_pytorch.py
@@ -181,9 +181,16 @@ def forward(self, x):
 # model
 
 class Unet(nn.Module):
-    def __init__(self, dim, out_dim = None, dim_mults=(1, 2, 4, 8), groups = 8):
+    def __init__(
+        self,
+        dim,
+        out_dim = None,
+        dim_mults=(1, 2, 4, 8),
+        groups = 8,
+        channels = 3
+    ):
         super().__init__()
-        dims = [3, *map(lambda m: dim * m, dim_mults)]
+        dims = [channels, *map(lambda m: dim * m, dim_mults)]
         in_out = list(zip(dims[:-1], dims[1:]))
 
         self.time_pos_emb = SinusoidalPosEmb(dim)
@@ -279,8 +286,17 @@ def cosine_beta_schedule(timesteps, s = 0.008):
     return np.clip(betas, a_min = 0, a_max = 0.999)
 
 class GaussianDiffusion(nn.Module):
-    def __init__(self, denoise_fn, timesteps=1000, loss_type='l1', betas = None):
+    def __init__(
+        self,
+        denoise_fn,
+        *,
+        image_size,
+        timesteps = 1000,
+        loss_type = 'l1',
+        betas = None
+    ):
         super().__init__()
+        self.image_size = image_size
         self.denoise_fn = denoise_fn
 
         if exists(betas):
@@ -371,7 +387,8 @@ def p_sample_loop(self, shape):
         return img
 
     @torch.no_grad()
-    def sample(self, image_size, batch_size = 16):
+    def sample(self, batch_size = 16):
+        image_size = self.image_size
         return self.p_sample_loop((batch_size, 3, image_size, image_size))
 
     @torch.no_grad()
@@ -415,7 +432,8 @@ def p_losses(self, x_start, t, noise = None):
         return loss
 
     def forward(self, x, *args, **kwargs):
-        b, *_, device = *x.shape, x.device
+        b, c, h, w, device, img_size, = *x.shape, x.device, self.image_size
+        assert h == img_size and w == img_size, f'height and width of image must be {img_size}'
         t = torch.randint(0, self.num_timesteps, (b,), device=device).long()
         return self.p_losses(x, t, *args, **kwargs)
 
@@ -467,7 +485,7 @@ def __init__(
         self.step_start_ema = step_start_ema
 
         self.batch_size = train_batch_size
-        self.image_size = image_size
+        self.image_size = diffusion_model.image_size
         self.gradient_accumulate_every = gradient_accumulate_every
         self.train_num_steps = train_num_steps
 
@@ -528,7 +546,7 @@ def train(self):
             if self.step != 0 and self.step % SAVE_AND_SAMPLE_EVERY == 0:
                 milestone = self.step // SAVE_AND_SAMPLE_EVERY
                 batches = num_to_groups(36, self.batch_size)
-                all_images_list = list(map(lambda n: self.ema_model.sample(self.image_size, batch_size=n), batches))
+                all_images_list = list(map(lambda n: self.ema_model.sample(batch_size=n), batches))
                 all_images = torch.cat(all_images_list, dim=0)
                 utils.save_image(all_images, str(RESULTS_FOLDER / f'sample-{milestone}.png'), nrow=6)
                 self.save(milestone)
diff --git a/setup.py b/setup.py
@@ -3,7 +3,7 @@
 setup(
   name = 'denoising-diffusion-pytorch',
   packages = find_packages(),
-  version = '0.5.2',
+  version = '0.6.0',
   license='MIT',
   description = 'Denoising Diffusion Probabilistic Models - Pytorch',
   author = 'Phil Wang',