fix readme, add some better asserts, allow for passing in mask as (b, h, w) shape, and also plan on putting more work into the repository now that a researcher has seen good results

lucidrains · lucidrains · commit fd6ba36484c8 · 2023-02-16T09:41:17.000-08:00
diff --git a/README.md b/README.md
@@ -20,6 +20,8 @@ from med_seg_diff_pytorch import Unet, MedSegDiff
 model = Unet(
     dim = 64,
     image_size = 128,
+    mask_channels = 1,          # segmentation has 1 channel
+    input_img_channels = 3,     # input images have 3 channels
     dim_mults = (1, 2, 4, 8)
 )
 
@@ -28,7 +30,7 @@ diffusion = MedSegDiff(
     timesteps = 1000
 ).cuda()
 
-segmented_imgs = torch.rand(8, 3, 128, 128)  # inputs are normalized from 0 to 1
+segmented_imgs = torch.rand(8, 1, 128, 128)  # inputs are normalized from 0 to 1
 input_imgs = torch.rand(8, 3, 128, 128)
 
 loss = diffusion(segmented_imgs, input_imgs)
@@ -56,6 +58,8 @@ If you want to add in self condition where we condition with the mask we have so
 
 - [x] some basic training code, with Trainer taking in custom dataset tailored for medical image formats  - thanks to <a href="https://github.com/isamu-isozaki">@isamu-isozaki</a>
 
+- [ ] full blown transformer of any depth in the middle, as done in <a href="https://arxiv.org/abs/2301.11093">simple diffusion</a>
+
 ## Citations
 
 ```bibtex
diff --git a/med_seg_diff_pytorch/med_seg_diff_pytorch.py b/med_seg_diff_pytorch/med_seg_diff_pytorch.py
@@ -238,8 +238,8 @@ def __init__(
         self,
         dim,
         image_size,
-        mask_channels=1,
-        input_img_channels=3,
+        mask_channels = 1,
+        input_img_channels = 3,
         init_dim = None,
         out_dim = None,
         dim_mults: tuple = (1, 2, 4, 8),
@@ -258,6 +258,7 @@ def __init__(
         self.input_img_channels = input_img_channels
         self.mask_channels = mask_channels
         self.self_condition = self_condition
+
         output_channels = mask_channels
         mask_channels = mask_channels * (2 if self_condition else 1)
 
@@ -699,11 +700,21 @@ def p_losses(self, x_start, t, cond, noise = None):
         return F.mse_loss(model_out, target)
 
     def forward(self, img, cond_img, *args, **kwargs):
+        if img.ndim == 3:
+            img = rearrange(img, 'b h w -> b 1 h w')
+
+        if cond_img.ndim == 3:
+            cond_img = rearrange(cond_img, 'b h w -> b 1 h w')
+
         device = self.device
         img, cond_img = img.to(device), cond_img.to(device)
 
-        b, c, h, w, device, img_size, = *img.shape, img.device, self.image_size
+        b, c, h, w, device, img_size, img_channels, mask_channels = *img.shape, img.device, self.image_size, self.input_img_channels, self.mask_channels
+
         assert h == img_size and w == img_size, f'height and width of image must be {img_size}'
+        assert cond_img.shape[1] == img_channels, f'your input medical must have {img_channels} channels'
+        assert img.shape[1] == mask_channels, f'the segmented image must have {mask_channels} channels'
+
         times = torch.randint(0, self.num_timesteps, (b,), device = device).long()
 
         img = normalize_to_neg_one_to_one(img)
diff --git a/setup.py b/setup.py
@@ -3,7 +3,7 @@
 setup(
   name = 'med-seg-diff-pytorch',
   packages = find_packages(exclude=[]),
-  version = '0.1.1',
+  version = '0.1.2',
   license='MIT',
   description = 'MedSegDiff - SOTA medical image segmentation - Pytorch',
   author = 'Phil Wang',