address a few issue raised by a researcher on discord

lucidrains · lucidrains · commit 6178a7f29e92 · 2024-07-31T11:19:07.000-07:00
diff --git a/alphafold3_pytorch/alphafold3.py b/alphafold3_pytorch/alphafold3.py
@@ -449,12 +449,12 @@ def forward(
             return self.dropout(t)
 
         if self.dropout_type == 'row':
-            batch, row, _, _ = t.shape
-            ones_shape = (batch, row, 1, 1)
+            batch, _, col, dim = t.shape
+            ones_shape = (batch, 1, col, dim)
 
         elif self.dropout_type == 'col':
-            batch, _, col, _ = t.shape
-            ones_shape = (batch, 1, col, 1)
+            batch, row, _, dim = t.shape
+            ones_shape = (batch, row, 1, dim)
 
         ones = t.new_ones(ones_shape)
         dropped = self.dropout(ones)
@@ -624,9 +624,8 @@ def forward(
         out = self.to_out_norm(out)
 
         out_gate = self.out_gate(x).sigmoid()
-        out = out * out_gate
 
-        return self.to_out(out)
+        return self.to_out(out) * out_gate
 
 # there are two types of attention in this paper, triangle and attention-pair-bias
 # they differ by how the attention bias is computed
@@ -1316,8 +1315,8 @@ def __init__(
         # final projection of mean pooled repr -> out
 
         self.to_out = nn.Sequential(
-            LinearNoBias(dim, dim_pairwise),
-            nn.ReLU()
+            nn.ReLU(),
+            LinearNoBias(dim, dim_pairwise)
         )
 
         self.layerscale = nn.Parameter(torch.zeros(dim_pairwise)) if layerscale_output else 1.
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "alphafold3-pytorch"
-version = "0.2.57"
+version = "0.2.58"
 description = "Alphafold 3 - Pytorch"
 authors = [
     { name = "Phil Wang", email = "lucidrains@gmail.com" }