more back and forth with author through emails

lucidrains · lucidrains · commit f095f571bdf2 · 2023-08-14T08:21:04.000-07:00
diff --git a/perfusion_pytorch/perfusion.py b/perfusion_pytorch/perfusion.py
@@ -57,8 +57,9 @@ def __init__(
         # will smooth both concept and superclass token inputs
 
         self.register_buffer('initted', torch.zeros(num_finetune_prompts).bool())
-        self.register_buffer('ema_concept_text_enc', torch.zeros(num_finetune_prompts, dim_input))
-        self.register_buffer('ema_superclass_text_enc', torch.zeros(num_finetune_prompts, dim_input))
+        self.register_buffer('ema_concept_text_encs', torch.zeros(num_finetune_prompts, dim_input))
+        self.register_buffer('ema_superclass_text_encs', torch.zeros(num_finetune_prompts, dim_input))
+        self.register_buffer('superclass_outputs', torch.zeros(num_finetune_prompts, dim_output))
 
         # C in the paper, inverse precomputed
 
@@ -109,6 +110,8 @@ def forward(
         superclass_text_enc = text_enc_with_superclass[batch_indices, concept_indices]
         superclass_text_enc = rearrange(superclass_text_enc, 'b 1 d -> b d')
 
+        superclass_output = einsum('b i, o i -> b o', superclass_text_enc, weights)
+
         # only if training, and if prompt ids are given
         # do exponential smoothing of the inputs, both concept and superclass
 
@@ -120,8 +123,13 @@ def forward(
             initted = rearrange(initted, 'b -> b 1')
             all_initted = initted.all()
 
-            ema_concept_text_enc = self.ema_concept_text_enc[prompt_ids]
-            ema_superclass_text_enc = self.ema_superclass_text_enc[prompt_ids]
+            ema_concept_text_enc = self.ema_concept_text_encs[prompt_ids]
+            ema_superclass_text_enc = self.ema_superclass_text_encs[prompt_ids]
+
+            # for keys, the superclass output (o*) is stored on init
+            # and never optimized
+
+            stored_superclass_output = self.superclass_outputs[prompt_ids]
 
             # if any in the batch is not initialized, initialize
 
@@ -138,6 +146,12 @@ def forward(
                     superclass_text_enc
                 )
 
+                superclass_output = torch.where(
+                    initted,
+                    stored_superclass_output,
+                    superclass_output
+                )
+
             # exponential moving average of both concept and superclass
 
             concept_text_enc = ema_concept_text_enc * decay + concept_text_enc * (1. - decay)
@@ -147,20 +161,19 @@ def forward(
 
             if not all_initted:
                 self.initted[prompt_ids] = True
-                self.ema_concept_text_enc[prompt_ids] = ema_concept_text_enc
-                self.ema_superclass_text_enc[prompt_ids] = ema_superclass_text_enc
+                self.ema_concept_text_encs[prompt_ids] = ema_concept_text_enc
+                self.ema_superclass_text_encs[prompt_ids] = ema_superclass_text_enc
+                self.superclass_outputs[prompt_ids] = superclass_output
 
         # take care of the output
         # for the keys, make sure to turn off gradients as it is 'locked'
 
-        superclass_text_enc_output = einsum('b i, o i -> b o', superclass_text_enc, weights)
-
         if self.is_key_proj:
-            superclass_text_enc_output = superclass_text_enc_output.detach()
+            superclass_output = superclass_output.detach()
 
         # make it easier to match with paper
 
-        i, o, W = concept_text_enc, superclass_text_enc_output, weights
+        i, o, W = concept_text_enc, superclass_output, weights
 
         # main contribution eq (3)
 
diff --git a/setup.py b/setup.py
@@ -3,7 +3,7 @@
 setup(
   name = 'perfusion-pytorch',
   packages = find_packages(exclude=[]),
-  version = '0.0.8',
+  version = '0.0.9',
   license='MIT',
   description = 'Perfusion - Pytorch',
   author = 'Phil Wang',