another round of feedbacks, update todos

lucidrains · lucidrains · commit 7527f48232f3 · 2023-08-16T07:45:28.000-07:00
diff --git a/README.md b/README.md
@@ -80,6 +80,8 @@ values = wrapped_to_values(
 
 ## Todo
 
+- [ ] add the zero-shot masking of concept proposed in paper
+- [ ] offer a way to combine separately learned concepts from multiple `Rank1EditModule` into one for inference
 - [ ] handle rank-1 update for multiple concepts
     - [x] handle training with multiple concepts
     - [ ] handle multiple concepts in one prompt at inference - summation of the sigmoid term + outputs
diff --git a/perfusion_pytorch/perfusion.py b/perfusion_pytorch/perfusion.py
@@ -129,11 +129,11 @@ def __init__(
         self.register_buffer('initted', torch.zeros(num_concepts, 1).bool())
         self.register_buffer('ema_concept_text_encs', torch.zeros(num_concepts, dim_input))
 
-        # superclass outputs - only optimized for values, but not keys
+        # concept outputs - only optimized for values, but not keys
 
         self.is_key_proj = is_key_proj # will lock the output to the super-class, and turn off gradients
 
-        self.superclass_output = nn.Parameter(torch.zeros(num_concepts, dim_output), requires_grad = not is_key_proj)
+        self.concept_output = nn.Parameter(torch.zeros(num_concepts, dim_output), requires_grad = not is_key_proj)
 
         # C in the paper, inverse precomputed
 
@@ -143,7 +143,7 @@ def parameters(self):
         if not self.is_key_proj:
             return []
 
-        return [self.superclass_outputs]
+        return [self.concept_output]
 
     @beartype
     def forward(
@@ -209,15 +209,15 @@ def forward(
             if not initted:
                 assert exists(superclass_output), 'text_enc_with_superclass must be passed in for the first batch'
 
-                # for the prompt ids not initialized yet, hard copy over the initial superclass outputs
-                self.superclass_output[concept_id].data.copy_(superclass_output)
+                # init concept output with superclass output - fixed for keys, learned for values
+                self.concept_output[concept_id].data.copy_(superclass_output)
 
-            elif exists(superclass_output):
+            elif exists(superclass_output) and self.is_key_proj:
                 # if text enc with superclass is passed in for more than 1 batch
-                # just take the opportunity to exponentially average it a bit more
+                # just take the opportunity to exponentially average it a bit more for the keys, which have fixed concept output (to superclass)
 
-                ema_superclass_output = self.superclass_output * decay + superclass_output * (1. - decay)
-                self.superclass_output[concept_id].data.copy_(ema_superclass_output)
+                ema_concept_output = self.concept_output * decay + superclass_output * (1. - decay)
+                self.concept_output[concept_id].data.copy_(ema_concept_output)
 
             # if any in the batch is not initialized, initialize
 
@@ -234,13 +234,13 @@ def forward(
 
             if not initted:
                 self.initted[concept_id].data.copy_(Tensor([True]))
-                self.ema_concept_text_encs[concept_id].data.copy_(ema_concept_text_enc)
+                self.ema_concept_text_encs[concept_id].data.copy_(concept_text_enc)
         else:
             assert initted, 'you have not initialized or trained this module yet'
 
         # make it easier to match with paper
 
-        i, o, W = concept_text_enc, self.superclass_output[concept_id], weights
+        i, o, W = self.ema_concept_text_encs[concept_id], self.concept_output[concept_id], weights
 
         # main contribution eq (3)
 
diff --git a/setup.py b/setup.py
@@ -3,7 +3,7 @@
 setup(
   name = 'perfusion-pytorch',
   packages = find_packages(exclude=[]),
-  version = '0.0.20',
+  version = '0.0.21',
   license='MIT',
   description = 'Perfusion - Pytorch',
   author = 'Phil Wang',