add the general logic for the zero shot mask weighting of loss

lucidrains · lucidrains · commit aeeb9eec3d6c · 2023-08-16T08:34:52.000-07:00
diff --git a/README.md b/README.md
@@ -80,12 +80,13 @@ values = wrapped_to_values(
 
 ## Todo
 
-- [ ] add the zero-shot masking of concept proposed in paper
 - [ ] offer a way to combine separately learned concepts from multiple `Rank1EditModule` into one for inference
 - [ ] handle rank-1 update for multiple concepts
     - [x] handle training with multiple concepts
     - [ ] handle multiple concepts in one prompt at inference - summation of the sigmoid term + outputs
 - [ ] offer a magic function that automatically tries to wire up the cross attention by looking for appropriately named `nn.Linear` and auto-inferring which ones are keys or values
+
+- [x] add the zero-shot masking of concept proposed in paper
 - [x] take care of the function that takes in the dataset and text encoder and precomputes the covariance matrix needed for the rank-1 update
 - [x] instead of having the researcher worry about different learning rates, offer the fractional gradient trick from other paper (to learn the concept embedding)
 
diff --git a/perfusion_pytorch/perfusion.py b/perfusion_pytorch/perfusion.py
@@ -81,6 +81,36 @@ def return_text_enc_with_concept_and_superclass(
 
     return concept_text_enc, concept_indices, superclass_text_enc
 
+# loss weighted by the mask
+
+@beartype
+def loss_fn_weighted_by_mask(
+    pred: FloatTensor,
+    target: FloatTensor,
+    mask: FloatTensor,
+    normalized_mask_min_value = 0.
+):
+    assert mask.shape[-2:] == pred.shape[-2:] == target.shape[-2:]
+    assert mask.shape[0] == pred.shape[0] == target.shape[0]
+
+    assert (mask.amin() >= 0.).all(), 'mask should not have values below 0'
+
+    if mask.ndim == 4:
+        assert mask.shape[1] == 1
+        mask = rearrange(mask, 'b 1 h w -> b h w')
+
+    loss = F.mse_loss(pred, target, reduction = 'none')
+    loss = reduce(loss, 'b c h w -> b h w')
+
+    # normalize mask by max
+
+    normalized_mask = mask / mask.amax(dim = -1, keepdim = True).clamp(min = 1e-5)
+    normalized_mask = normalized_mask.clamp(min = normalized_mask_min_value)
+
+    loss = loss * normalized_mask
+
+    return loss.mean()
+
 # a module that wraps the keys and values projection of the cross attentions to text encodings
 
 class Rank1EditModule(Module):
diff --git a/setup.py b/setup.py
@@ -3,7 +3,7 @@
 setup(
   name = 'perfusion-pytorch',
   packages = find_packages(exclude=[]),
-  version = '0.0.21',
+  version = '0.0.22',
   license='MIT',
   description = 'Perfusion - Pytorch',
   author = 'Phil Wang',