githubsgi
diff --git a/‎.lintrunner.toml‎
Lines changed: 0 additions & 3 deletions b/‎.lintrunner.toml‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎test/distributed/test_c10d_nccl.py‎
Lines changed: 1 addition & 1 deletion b/‎test/distributed/test_c10d_nccl.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎test/test_fake_tensor.py‎
Lines changed: 75 additions & 43 deletions b/‎test/test_fake_tensor.py‎
Lines changed: 75 additions & 43 deletions
@@ -1193,8 +1193,6 @@ exclude_patterns = [
     'test/quantization/fx/test_numeric_suite_fx.py',
     'test/quantization/fx/test_quantize_fx.py',
     'test/quantization/fx/test_subgraph_rewriter.py',
-    'test/test_fake_tensor.py',
-    'test/test_flop_counter.py',
     'test/test_function_schema.py',
     'test/test_functional_autograd_benchmark.py',
     'test/test_functional_optim.py',
@@ -1330,7 +1328,6 @@ exclude_patterns = [
     'torch/_export/serde/serialize.py',
     'torch/_export/serde/upgrade.py',
     'torch/_export/trace.py',
-    'torch/_export/verifier.py',
     'torch/testing/_internal/__init__.py',
     'torch/testing/_internal/autocast_test_lists.py',
     'torch/testing/_internal/autograd_function_db.py',
 
@@ -3731,7 +3731,7 @@ def test_allgather_base(self):
     @parametrize("float8_dtype", [torch.float8_e4m3fn, torch.float8_e5m2])
     def test_allgather_float8(self, float8_dtype):
         device = torch.device(f"cuda:{self.rank:d}")
-        if not sm_is_or_higher_than(device, 9, 0):
+        if not sm_is_or_higher_than(device, 9, 0):  # noqa: F821
             self.skipTest("FP8 reduction support begins with sm90 capable devices")
         store = dist.FileStore(self.file_name, self.world_size)
         dist.init_process_group(
 
@@ -5,23 +5,23 @@
 import contextlib
 import copy
 import dataclasses
+import gc
 import inspect
+import io
 import itertools
 import pickle
 import unittest
 import weakref
 from unittest.mock import patch
-import io
-import gc
 
 import numpy as np
+
 import torch
 import torch._dynamo
 import torch._functorch.config
 import torch._prims as prims
 import torch.testing._internal.optests as optests
 import torch.utils._pytree as pytree
-
 from torch import distributed as dist
 from torch._C._functorch import _add_batch_dim, get_unwrapped, is_batchedtensor
 from torch._dispatch.python import enable_python_dispatcher
@@ -32,10 +32,10 @@
     _CacheKeyState,
     DynamicOutputShapeException,
     extract_tensor_metadata,
-    MetadataMismatchError,
     FakeTensor,
     FakeTensorConverter,
     FakeTensorMode,
+    MetadataMismatchError,
     unset_fake_temporarily,
     UnsupportedOperatorException,
 )
@@ -56,6 +56,7 @@
     OpDTypes,
     ops,
 )
+from torch.testing._internal.common_dtype import all_types_complex_float8_and
 from torch.testing._internal.common_utils import (
     instantiate_parametrized_tests,
     parametrize,
@@ -68,15 +69,14 @@
     TestCase,
     xfailIfTorchDynamo,
 )
-from torch.testing._internal.common_dtype import all_types_complex_float8_and
 from torch.testing._internal.custom_op_db import custom_op_db
-
 from torch.testing._internal.inductor_utils import GPU_TYPE
 from torch.testing._internal.jit_utils import RUN_CUDA
 from torch.testing._internal.two_tensor import TwoTensor
 from torch.utils._mode_utils import no_dispatch
 from torch.utils._python_dispatch import TorchDispatchMode
 
+
 aten = torch.ops.aten
 
 torch._dynamo.config.fake_tensor_cache_enabled = True
@@ -977,10 +977,12 @@ def test_fast_div(self):
         with mode:
             x = torch.empty(2, 2, device="cpu", dtype=torch.int32)
         from torch._subclasses.fake_impls import get_fast_op_impls
+
         fast_div = get_fast_op_impls()[torch.ops.aten.div.Tensor]
         y = fast_div(mode, x, 2)
         self.assertEqual(y.dtype, torch.float32)
 
+
 instantiate_parametrized_tests(FakeTensorTest)
 
 
@@ -1115,7 +1117,9 @@ def test_fake(self, device, dtype, op):
 make_propagate_real_tensors_cls(FakeTensorOpInfoTest)
 instantiate_device_type_tests(FakeTensorOpInfoTest, globals(), only_for=("cpu", "cuda"))
 instantiate_device_type_tests(
-    PropagateRealTensorsFakeTensorOpInfoTest, globals(), only_for=("cpu",)  # noqa: F821
+    PropagateRealTensorsFakeTensorOpInfoTest,  # noqa: F821
+    globals(),
+    only_for=("cpu",),
 )
 
 
@@ -1415,13 +1419,11 @@ def forward(self, arg1, arg2, arg3):
                 self.assertTrue("output[0]" not in str(e))
                 if self.__class__.__name__.startswith("PropagateRealTensors"):
                     self.assertTrue(
-                        "Real tensor propagation found a metadata mismatch"
-                        in str(e)
+                        "Real tensor propagation found a metadata mismatch" in str(e)
                     )
                 else:
                     self.assertTrue(
-                        "found mismatched tensor metadata for output"
-                        in str(e)
+                        "found mismatched tensor metadata for output" in str(e)
                     )
 
     # IMPORTANT!!! Always run even if CUDA is not available
@@ -1623,61 +1625,74 @@ def test_nonzero_stride(self):
     def test_torch_load_with_fake_mode(self):
         model = torch.nn.Linear(5, 10)
         sd = model.state_dict()
-        sd['tt'] = TwoTensor(torch.randn(2), torch.randn(2))
+        sd["tt"] = TwoTensor(torch.randn(2), torch.randn(2))
 
         def _read_tensor_and_check(key, sd_loaded, all_bytes, device):
             dtype = torch.float32
             t = sd_loaded[key]
             self.assertEqual(t.device.type, device)
             if isinstance(t, TwoTensor):
-                untyped_storage_a, untyped_storage_b = t.a.untyped_storage(), t.b.untyped_storage()
-                offset_a, offset_b = untyped_storage_a._checkpoint_offset, untyped_storage_b._checkpoint_offset
-                nbytes_a, nbytes_b = untyped_storage_a.nbytes() // 4, untyped_storage_b.nbytes() // 4
-                result_a = torch.frombuffer(all_bytes, dtype=dtype, count=nbytes_a, offset=offset_a).resize_(t.a.size())
-                result_b = torch.frombuffer(all_bytes, dtype=dtype, count=nbytes_b, offset=offset_b).resize_(t.b.size())
+                untyped_storage_a, untyped_storage_b = (
+                    t.a.untyped_storage(),
+                    t.b.untyped_storage(),
+                )
+                offset_a, offset_b = (
+                    untyped_storage_a._checkpoint_offset,
+                    untyped_storage_b._checkpoint_offset,
+                )
+                nbytes_a, nbytes_b = (
+                    untyped_storage_a.nbytes() // 4,
+                    untyped_storage_b.nbytes() // 4,
+                )
+                result_a = torch.frombuffer(
+                    all_bytes, dtype=dtype, count=nbytes_a, offset=offset_a
+                ).resize_(t.a.size())
+                result_b = torch.frombuffer(
+                    all_bytes, dtype=dtype, count=nbytes_b, offset=offset_b
+                ).resize_(t.b.size())
                 self.assertEqual(TwoTensor(result_a, result_b), sd[key])
             else:
                 untyped_storage = t.untyped_storage()
                 offset = untyped_storage._checkpoint_offset
                 nbytes = untyped_storage.nbytes() // 4
-                result = torch.frombuffer(all_bytes, dtype=dtype, count=nbytes, offset=offset).resize_(t.size())
+                result = torch.frombuffer(
+                    all_bytes, dtype=dtype, count=nbytes, offset=offset
+                ).resize_(t.size())
                 self.assertEqual(result, sd[key])
 
-
         with TemporaryFileName() as f, torch.serialization.safe_globals([TwoTensor]):
             # Create state_dict to be loaded later
             torch.save(sd, f)
-            with open(f, 'rb') as g:
+            with open(f, "rb") as g:
                 all_bytes = g.read()
 
             fake_mode = FakeTensorMode()
             with fake_mode:
                 sd_loaded = torch.load(f)
             for k in sd:
-                _read_tensor_and_check(k, sd_loaded, all_bytes, 'cpu')
+                _read_tensor_and_check(k, sd_loaded, all_bytes, "cpu")
             with fake_mode:
                 sd_loaded = torch.load(f, map_location="cuda")
             for k in sd:
-                _read_tensor_and_check(k, sd_loaded, all_bytes, 'cuda')
-
+                _read_tensor_and_check(k, sd_loaded, all_bytes, "cuda")
 
         for k in sd.keys():
-            sd[k] = sd[k].to('cuda')
+            sd[k] = sd[k].to("cuda")
 
         with TemporaryFileName() as f, torch.serialization.safe_globals([TwoTensor]):
             torch.save(sd, f)
-            with open(f, 'rb') as g:
+            with open(f, "rb") as g:
                 all_bytes = g.read()
 
             fake_mode = FakeTensorMode()
             with fake_mode:
                 sd_loaded = torch.load(f)
             for k in sd:
-                _read_tensor_and_check(k, sd_loaded, all_bytes, 'cuda')
+                _read_tensor_and_check(k, sd_loaded, all_bytes, "cuda")
             with fake_mode:
                 sd_loaded = torch.load(f, map_location="cpu")
             for k in sd:
-                _read_tensor_and_check(k, sd_loaded, all_bytes, 'cpu')
+                _read_tensor_and_check(k, sd_loaded, all_bytes, "cpu")
 
 
 make_propagate_real_tensors_cls(FakeTensorPropTest)
@@ -1994,9 +2009,9 @@ def test_fft_hfft2_issue145522(self):
             x = torch.randn(s0, s1, s2)
             out = torch.randn(s0, s3, s4)
             kwargs = {
-                's': (s3, s4),
-                'dim': (1, s5),
-                'norm': 'ortho',
+                "s": (s3, s4),
+                "dim": (1, s5),
+                "norm": "ortho",
             }
             r = torch._C._fft.fft_hfft2(x, **kwargs, out=out)
             self.assertEqual(r.shape, out.shape)
@@ -2074,8 +2089,12 @@ def __tensor_unflatten__(inner_tensors, meta, outer_size, outer_stride):
             def __torch_dispatch__(cls, func, types, args, kwargs):
                 if kwargs is None:
                     kwargs = {}
-                args = pytree.tree_map_only(DifferentDeviceTensor, lambda x: x.inner_tensor, args)
-                kwargs = pytree.tree_map_only(DifferentDeviceTensor, lambda x: x.inner_tensor, kwargs)
+                args = pytree.tree_map_only(
+                    DifferentDeviceTensor, lambda x: x.inner_tensor, args
+                )
+                kwargs = pytree.tree_map_only(
+                    DifferentDeviceTensor, lambda x: x.inner_tensor, kwargs
+                )
                 # Returns unwrapped tensor
                 return func(*args, **kwargs)
 
@@ -2098,7 +2117,7 @@ def f(x):
             return torch.nn.functional.interpolate(
                 x,
                 size=[256, 256],
-                mode='bilinear',
+                mode="bilinear",
                 align_corners=False,
                 antialias=True,
             )
@@ -2108,8 +2127,13 @@ def f(x):
         x = fake_m.from_tensor(
             torch.randn(1, 3, 2005, 1920, requires_grad=True),
             symbolic_context=StatelessSymbolicContext(
-                dynamic_sizes=[DimDynamic.STATIC, DimDynamic.STATIC, DimDynamic.DYNAMIC, DimDynamic.DYNAMIC],
-                constraint_sizes=[None, None, None, None]
+                dynamic_sizes=[
+                    DimDynamic.STATIC,
+                    DimDynamic.STATIC,
+                    DimDynamic.DYNAMIC,
+                    DimDynamic.DYNAMIC,
+                ],
+                constraint_sizes=[None, None, None, None],
             ),
         )
         with fake_m, enable_python_dispatcher():
@@ -2126,14 +2150,14 @@ def test_from_buffer(self):
 
             t = torch.ByteTensor(storage)
             self.assertTrue(isinstance(t, FakeTensor))
-            self.assertEqual(t.device, torch.device('cpu'))
+            self.assertEqual(t.device, torch.device("cpu"))
 
     def test_meta_tensor_to_fake_cpu(self):
-        x = torch.randn(4, 4, device='meta')
+        x = torch.randn(4, 4, device="meta")
         with FakeTensorMode(allow_non_fake_inputs=True):
-            x_cpu = x.to(device='cpu')
+            x_cpu = x.to(device="cpu")
         self.assertTrue(isinstance(x_cpu, FakeTensor))
-        self.assertEqual(x_cpu.device, torch.device('cpu'))
+        self.assertEqual(x_cpu.device, torch.device("cpu"))
 
     def test_cache_tuple_outputs(self):
         """
@@ -2158,7 +2182,6 @@ def test_cache_tuple_outputs(self):
                     extract_tensor_metadata(b),
                 )
 
-
     def test_cache_aten_index(self):
         with FakeTensorMode():
             x = torch.randn(4, 4, 4)
@@ -2178,10 +2201,16 @@ def test_cache_aten_index(self):
         with FakeTensorMode():
             x = torch.randn(4, 4, 4)
             idx_tensor1 = torch.tensor([True, True, False, True])
-            self.assertRaises(DynamicOutputShapeException, lambda: torch.ops.aten.index(x, [None, idx_tensor1]))
+            self.assertRaises(
+                DynamicOutputShapeException,
+                lambda: torch.ops.aten.index(x, [None, idx_tensor1]),
+            )
 
             idx_tensor1 = torch.tensor([1, -2, 3, -4], dtype=torch.int8)
-            self.assertRaises(DynamicOutputShapeException, lambda: torch.ops.aten.index(x, [None, idx_tensor1]))
+            self.assertRaises(
+                DynamicOutputShapeException,
+                lambda: torch.ops.aten.index(x, [None, idx_tensor1]),
+            )
 
     @skipIfTorchDynamo("cache hit/miss changes with invoke_subgraph caching")
     def test_invoke_subgraph(self):
@@ -2335,11 +2364,14 @@ def forward(
         lengths = torch.tensor([0, 2, 3, 1, 4])
         indices = torch.tensor([2, 3, 4, 6, 7, 8, 9])
         offsets = torch.cumsum(lengths, 0)
-        ep = torch.export.export(LengthsGather(), (input, lengths, indices, offsets), strict=False)
+        ep = torch.export.export(
+            LengthsGather(), (input, lengths, indices, offsets), strict=False
+        )
 
         FakeTensorMode.cache_clear()
         ep.run_decompositions({})
         self.assertBypasses("unrepresented symbol in output", 2)
 
+
 if __name__ == "__main__":
     run_tests()