Add option to save real tensors in TORCH_COMPILE_DEBUG repro (pytorch#138110)

eellison · pytorchmergebot · commit d90717e4e2e5 · 2024-10-28T16:18:22.000Z
This pr adds a utility to try to try to construct the corresponding real tensor values of fake tensors by seeing if their meta storage is contained in the meta converter. Then, we are able to save real tensor values for fx_graph_runnable if `TORCH_COMPILE_DEBUG_SAVE_REAL=1` is set. Differential Revision: [D64502744](https://our.internmc.facebook.com/intern/diff/D64502744) Pull Request resolved: pytorch#138110 Approved by: https://github.com/ezyang
diff --git a/test/test_fake_tensor.py b/test/test_fake_tensor.py
@@ -179,6 +179,16 @@ def test_repr(self):
             x = torch.empty(2, 2, device="meta")
             self.assertEqual(repr(x), "FakeTensor(..., device='meta', size=(2, 2))")
 
+    def test_convert_fake_to_real(self):
+        x = torch.ones([20])
+        with FakeTensorMode(allow_non_fake_inputs=True) as m:
+            _ = x + 1
+
+        out = torch._subclasses.fake_utils.try_convert_fake_to_real([x[0:10]])
+
+        self.assertEqual(torch.ones([10]), out[0])
+
+
     @unittest.skipIf(not RUN_CUDA, "requires cuda")
     def test_zero_dim(self):
         with FakeTensorMode() as mode:
diff --git a/torch/_dynamo/repro/after_aot.py b/torch/_dynamo/repro/after_aot.py
@@ -225,7 +225,9 @@ def inner_debug_fn(real_inputs):
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #
 
 
-def generate_compiler_repro_string(gm, args, *, stable_output=False, save_dir=None):
+def generate_compiler_repro_string(
+    gm, args, *, stable_output=False, save_dir=None, stable_hash=False
+):
     model_str = textwrap.dedent(
         f"""
 import torch
@@ -257,7 +259,7 @@ def generate_compiler_repro_string(gm, args, *, stable_output=False, save_dir=No
     def hint_if_symint(x):
         return tuple(i.node.hint if isinstance(i, torch.SymInt) else i for i in x)
 
-    writer = InputWriter(save_dir)
+    writer = InputWriter(save_dir, stable_hash=stable_hash)
     for placeholder, arg in zip(fx_placeholder_targets(gm), args):
         if isinstance(arg, (int, torch.SymInt)):
             writer.symint(placeholder, arg)
@@ -287,6 +289,7 @@ def save_graph_repro(
     accuracy=None,
     tracing_mode=None,
     check_str=None,
+    stable_hash=False,
 ):
     if any(
         isinstance(arg, torch.fx.experimental._backward_state.BackwardState)
@@ -296,12 +299,14 @@ def save_graph_repro(
             "Repro is not generated due to existence of BackwardState in graph input"
         )
         return
+
     fd.write(
         generate_compiler_repro_string(
             gm,
             args,
             stable_output=stable_output,
             save_dir=save_dir,
+            stable_hash=stable_hash,
         )
     )
     if accuracy is None:
diff --git a/torch/_inductor/config.py b/torch/_inductor/config.py
@@ -1224,6 +1224,9 @@ class trace:
     # master switch for all debugging flags below
     enabled = os.environ.get("TORCH_COMPILE_DEBUG", "0") == "1"
 
+    # save real tensors
+    save_real_tensors = os.environ.get("TORCH_COMPILE_DEBUG_SAVE_REAL", "0") == "1"
+
     # Save debug information to a temporary directory
     # If not specified, a temp directory will be created by system
     debug_dir: Optional[str] = None
diff --git a/torch/_inductor/debug.py b/torch/_inductor/debug.py
@@ -473,7 +473,26 @@ def fx_graph(
         inputs: List[torch.Tensor],
     ) -> None:
         with self.fopen("fx_graph_runnable.py") as fd:
-            save_graph_repro(fd, gm, inputs, "inductor")
+            save_dir = None
+            if torch._inductor.config.trace.save_real_tensors:
+                inputs = torch._subclasses.fake_utils.try_convert_fake_to_real(inputs)
+                save_dir = os.path.dirname(fd.name)
+
+            # dont try to use stable hash torchinductor compilation if saving real tensors
+            # and avoid recursively trying to save real tensors inside of the inductor compilation
+            # regardless
+            stable_hash = torch._inductor.config.trace.save_real_tensors
+            with torch._inductor.config.patch(
+                {"trace.enabled": False, "trace.save_real_tensors": False}
+            ):
+                save_graph_repro(
+                    fd,
+                    gm,
+                    inputs,
+                    "inductor",
+                    save_dir=save_dir,
+                    stable_hash=stable_hash,
+                )
 
         with self.fopen("fx_graph_readable.py") as fd:
             fd.write(gm.print_readable(print_output=False))
diff --git a/torch/_subclasses/fake_utils.py b/torch/_subclasses/fake_utils.py
@@ -2,12 +2,13 @@
 
 import functools
 import warnings
-from typing import Callable, Union
+from typing import Any, Callable, List, Union
 
 import torch
 import torch.utils._pytree as pytree
 from torch._ops import OpOverload
 from torch._subclasses.fake_tensor import (
+    FakeTensor,
     FakeTensorMode,
     tree_flatten_only,
     UnsupportedFakeTensorException,
@@ -75,6 +76,74 @@ def is_sdpa_error(func, idx, e):
     return False
 
 
+def try_convert_fake_to_real(
+    ten_list: List[Union[FakeTensor, Any]]
+) -> List[Union[FakeTensor, torch.Tensor, Any]]:
+    """
+    Attempt to convert fake tensors to a corresponding real tensor with the correct underlying storage by looking up
+    the FakeTensorMode meta to real storage mapping. On failure to find the storage mapping, the FakeTensor will
+    remain in the list.
+
+    Note: this is not currently optimized (makes copies of the meta converter internal dictionaries)
+    """
+
+    fake_tensor = next(
+        (item for item in ten_list if isinstance(item, FakeTensor)), None
+    )
+    if fake_tensor is None:
+        return ten_list
+
+    fake_mode = fake_tensor.fake_mode
+    meta_converter = fake_mode.fake_tensor_converter.meta_converter
+    desc = meta_converter.describer
+
+    storage_to_key = {v: k for k, v in meta_converter.storage_memo.items()}
+    key_to_real_storage = {v: k for k, v in desc.lookup_storage.items()}
+    out = []
+    for t in ten_list:
+        if not isinstance(t, FakeTensor) or not t.layout == torch.strided:
+            out.append(t)
+            continue
+
+        key = storage_to_key.get(t.untyped_storage())
+        real_storage = None if key is None else key_to_real_storage.get(key)
+        if real_storage is None:
+            out.append(t)
+            continue
+
+        unhinted = False
+
+        def map_symint(s):
+            nonlocal unhinted
+            if not isinstance(s, torch.SymInt):
+                return s
+            unhinted = unhinted if not unhinted else s.node.has_hint()
+            return s.node.hint
+
+        stor_offset = map_symint(t.storage_offset())
+        size = [map_symint(s) for s in t.shape]
+        stride = [map_symint(s) for s in t.stride()]
+
+        if unhinted:
+            out.append(t)
+            continue
+
+        new_tensor = torch.empty(
+            [],
+            dtype=t.dtype,
+            device=t.device,
+        )
+        new_tensor.set_(
+            real_storage,
+            storage_offset=stor_offset,
+            size=size,
+            stride=stride,
+        )
+        out.append(new_tensor.clone())
+
+    return out
+
+
 class CrossRefFakeMode(TorchDispatchMode):
     def __init__(
         self,