[export] maybe fix conv.backward for joint graph export (#5450)

pianpwk · facebook-github-bot · commit 39b2769ac5a5 · 2024-09-17T16:58:40.000-07:00
Summary: Pull Request resolved: pytorch/executorch#5450 Differential Revision: D62910149
diff --git a/test/export/test_experimental.py b/test/export/test_experimental.py
@@ -327,6 +327,40 @@ def forward(self, x):
         )
         joint_ep = _export_forward_backward(ep)
 
+    def test_joint_cifar10_backwards(self) -> None:
+        import torch.nn as nn
+        import torch.nn.functional as F
+
+        # From Pytorch's CIFAR10 example:
+        # https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html
+        class Net(nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.conv1 = nn.Conv2d(3, 6, 5)
+                self.pool = nn.MaxPool2d(2, 2)
+                self.conv2 = nn.Conv2d(6, 16, 5)
+                self.fc1 = nn.Linear(16 * 5 * 5, 120)
+                self.fc2 = nn.Linear(120, 84)
+                self.fc3 = nn.Linear(84, 10)
+                self.loss = nn.CrossEntropyLoss()
+
+            def forward(self, x, labels):
+                x = self.pool(F.relu(self.conv1(x)))
+                x = self.pool(F.relu(self.conv2(x)))
+                x = torch.flatten(x, 1)  # flatten all dimensions except batch
+                x = F.relu(self.fc1(x))
+                x = F.relu(self.fc2(x))
+                x = self.fc3(x)
+                return self.loss(x, labels)
+
+        net = Net()
+        x = torch.randn(4, 3, 32, 32)
+        labels = torch.ones(4, dtype=torch.int64)
+        inputs = (x, labels)
+
+        ep = export(net, inputs)
+        _export_forward_backward(ep)
+
 
 if __name__ == "__main__":
     run_tests()
diff --git a/torch/_functorch/aot_autograd.py b/torch/_functorch/aot_autograd.py
@@ -1295,7 +1295,8 @@ def flattened_joint(*args):
                     assert grad is None
             return *fw_outs, *output_gradients
 
-        fx_g = make_fx(flattened_joint)(*full_args)
+        flattened_joint._orig_mod = fx_g
+        fx_g = make_fx(flattened_joint, record_module_stack=True)(*full_args)
 
     user_args_flat = pytree.arg_tree_leaves(*args, **kwargs)
     return fx_g, create_graph_signature(
diff --git a/torch/_meta_registrations.py b/torch/_meta_registrations.py
@@ -3110,16 +3110,9 @@ def meta_convolution_backward(
 ):
     # High level logic taken from slow_conv3d_backward_cpu which should
     # be representative of all convolution_backward impls
-    backend_grad_input = None
-    backend_grad_weight = None
-    backend_grad_bias = None
-
-    if output_mask[0]:
-        backend_grad_input = grad_output_.new_empty(input_.size())
-    if output_mask[1]:
-        backend_grad_weight = grad_output_.new_empty(weight_.size())
-    if output_mask[2]:
-        backend_grad_bias = grad_output_.new_empty(bias_sizes_opt)
+    backend_grad_input = grad_output_.new_empty(input_.size())
+    backend_grad_weight = grad_output_.new_empty(weight_.size())
+    backend_grad_bias = grad_output_.new_empty(bias_sizes_opt)
 
     return (backend_grad_input, backend_grad_weight, backend_grad_bias)