Use all frames of the stack trace when importing (#4075)

srinathava · Srinath Avadhanula · web-flow · commit e355217b70fc · 2025-03-10T10:16:47.000-07:00
We currently use the first frame of the stack_trace when importing a
node into MLIR. This causes modules with deeply nested ops to lose most
useful information. This recovers all the stack frames (at the expected
cost of an increase in the MLIR size). This also seems to be how we were
originally importing from TorchScript.

For an example module like this (in `/tmp/mode.py`):

```python
def add_fp32_loader() -&gt; RCPayload:
    class AddFP32Net(torch.nn.Module):
        def __init__(self):
            super().__init__()

        def forward(self, inputs: Dict[str, torch.Tensor]) -&gt; Dict[str, torch.Tensor]:
            def bar(x):
                return x + 1.0
            
            def foo(x1, x2):
                return bar(x1) + bar(x2)
            
            z1 = foo(inputs["x"], inputs["y"])
            return {"z1": z1}
```

if we import this, we now get:

```mlir
#loc1 = loc("compile.py":1332:0)
module {
  func.func @add_fp32(%arg0: !torch.vtensor&lt;[128,128],f32&gt; loc("compile.py":1332:0)) -&gt; !torch.vtensor&lt;[128,128],f32&gt; attributes {torch.assume_strict_symbolic_shapes} {
    %none = torch.constant.none loc(#loc1)
    %0 = torch.aten.clone %arg0, %none : !torch.vtensor&lt;[128,128],f32&gt;, !torch.none -&gt; !torch.vtensor&lt;[128,128],f32&gt; loc(#loc1)
    %none_0 = torch.constant.none loc(#loc1)
    %1 = torch.aten.clone %arg1, %none_0 : !torch.vtensor&lt;[128,128],f32&gt;, !torch.none -&gt; !torch.vtensor&lt;[128,128],f32&gt; loc(#loc1)
    %float1.000000e00 = torch.constant.float 1.000000e+00 loc(#loc10)
    %int1 = torch.constant.int 1 loc(#loc10)
    %2 = torch.aten.add.Scalar %0, %float1.000000e00, %int1 : !torch.vtensor&lt;[128,128],f32&gt;, !torch.float, !torch.int -&gt; !torch.vtensor&lt;[128,128],f32&gt; loc(#loc10)
    %float1.000000e00_1 = torch.constant.float 1.000000e+00 loc(#loc10)
    %int1_2 = torch.constant.int 1 loc(#loc10)
    %3 = torch.aten.add.Scalar %1, %float1.000000e00_1, %int1_2 : !torch.vtensor&lt;[128,128],f32&gt;, !torch.float, !torch.int -&gt; !torch.vtensor&lt;[128,128],f32&gt; loc(#loc10)
    %int1_3 = torch.constant.int 1 loc(#loc9)
    %4 = torch.aten.add.Tensor %2, %3, %int1_3 : !torch.vtensor&lt;[128,128],f32&gt;, !torch.vtensor&lt;[128,128],f32&gt;, !torch.int -&gt; !torch.vtensor&lt;[128,128],f32&gt; loc(#loc9)
    return %4 : !torch.vtensor&lt;[128,128],f32&gt; loc(#loc1)
  } loc(#loc1)
} loc(#loc)
#loc = loc(unknown)
#loc2 = loc("/tmp/model.py":17:0)
#loc3 = loc("/tmp/model.py":20:0)
#loc4 = loc("/tmp/model.py":22:0)
#loc5 = loc("torch/nn/modules/module.py":1562:0)
#loc6 = loc("compile.py":1333:0)
#loc7 = loc(callsite(#loc5 at #loc6))
#loc8 = loc(callsite(#loc4 at #loc7))
#loc9 = loc(callsite(#loc3 at #loc8))
#loc10 = loc(callsite(#loc2 at #loc9))
```

Originally, all ops would have a single location pointing to
`compile.py` (the frame from where we initiated the import). Inlining
the locations for the final `aten.add.Tensor` op gives us:

```
#loc9:
   "/tmp/model.py":20:0
   "/tmp/model.py":22:0
   "torch/nn/modules/module.py":1562:0
   "compile.py":1333:0
```

---------

Co-authored-by: Srinath Avadhanula &lt;srinath.avadhanula@getcruise.com&gt;
diff --git a/python/torch_mlir/extras/fx_importer.py b/python/torch_mlir/extras/fx_importer.py
@@ -1174,10 +1174,16 @@ def get_node_location(self, node: torch_fx.Node) -> Optional[Location]:
         # https://github.com/pytorch/pytorch/issues/91000
         stack_trace = node.stack_trace
         if stack_trace:
-            m = re.search(r"""File "([^"]+)", line ([0-9]+),""", stack_trace)
-            if m:
-                filename, line = m.group(1), int(m.group(2))
-                return Location.file(filename, line, col=0, context=self._c)
+            matches = re.findall(r"""File "([^"]+)", line ([0-9]+),""", stack_trace)
+            locations = [
+                Location.file(m[0], int(m[1]), col=0, context=self._c) for m in matches
+            ]
+            if len(locations) > 1:
+                return Location.callsite(
+                    locations[-1], locations[-2::-1], context=self._c
+                )
+            elif len(locations) == 1:
+                return locations[0]
         return Location.unknown(context=self._c)
 
     def set_symbolic_guards(
diff --git a/test/python/fx_importer/basic_test.py b/test/python/fx_importer/basic_test.py
@@ -205,3 +205,33 @@ def forward(self):
         "torch-simplification-pipeline",
     )
     print(m)
+
+
+@run
+# CHECK-LABEL: test_stack_trace
+# CHECK: #loc[[LOC1:.+]] = loc(
+# CHECK: #loc[[LOC2:.+]] = loc(
+# CHECK: #loc[[LOC3:.+]] = loc(
+# CHECK: #loc[[LOC4:.+]] = loc(callsite(#loc[[LOC2]] at #loc[[LOC3]]))
+# CHECK: #loc[[LOC5:.+]] = loc(callsite(#loc[[LOC1]] at #loc[[LOC4]]))
+# CHECK: %{{.+}} = torch.aten.add.Tensor {{.+}} loc(#loc[[LOC4]])
+def test_stack_trace():
+    class Basic(nn.Module):
+        def __init__(self):
+            super().__init__()
+
+        def forward(self, x, y):
+            def bar(x):
+                return x + 1.0
+
+            def foo(x, y):
+                return bar(x) + bar(y)
+
+            z = foo(x, y)
+            return {"z": z}
+
+    x = torch.randn(128, 128)
+    y = torch.randn(128, 128)
+    m = fx.export_and_import(Basic(), x, y, func_name="test_stack_trace")
+    mlir_asm = m.operation.get_asm(enable_debug_info=True)
+    print(mlir_asm)