change guard_or impl for better perf and simplicity (pytorch#153674)

laithsakka · pytorchmergebot · commit 9e089bb5b67d · 2025-05-23T15:24:28.000Z
PR time benchmarks has been showing regressions as we move to guard_or_false, reason is that prev implementation do not cache. This new approach will propagate the fallback value to eval and return it. allowing eval to cache and reducing scamming logs and complexity. Pull Request resolved: pytorch#153674 Approved by: https://github.com/bobrenjc93
diff --git a/benchmarks/dynamo/pr_time_benchmarks/expected_results.csv b/benchmarks/dynamo/pr_time_benchmarks/expected_results.csv
@@ -1,16 +1,16 @@
-add_loop_eager,compile_time_instruction_count,2960000000,0.015
+add_loop_eager,compile_time_instruction_count,2953000000,0.015
 
 
 
-add_loop_eager_dynamic,compile_time_instruction_count,5827000000,0.025
+add_loop_eager_dynamic,compile_time_instruction_count,5808000000,0.025
 
 
 
 add_loop_inductor,compile_time_instruction_count,29370000000,0.015
 
 
 
-add_loop_inductor_dynamic_gpu,compile_time_instruction_count,44080000000,0.025
+add_loop_inductor_dynamic_gpu,compile_time_instruction_count,44010000000,0.025
 
 
 
@@ -22,43 +22,27 @@ basic_modules_ListOfLinears_eager,compile_time_instruction_count,939900000,0.015
 
 
 
-basic_modules_ListOfLinears_inductor,compile_time_instruction_count,18240000000,0.015
+basic_modules_ListOfLinears_inductor,compile_time_instruction_count,18140000000,0.015
 
 
 
-basic_modules_ListOfLinears_inductor_gpu_force_shape_pad,compile_time_instruction_count,16340000000,0.015
+basic_modules_ListOfLinears_inductor_gpu_force_shape_pad,compile_time_instruction_count,16220000000,0.015
 
 
 
 basic_modules_ListOfLinears_inductor_gpu,compile_time_instruction_count,10370000000,0.2
 
 
 
-basic_InlineMod_eager,compile_time_instruction_count,7101000000,0.015
+update_hint_regression,compile_time_instruction_count,1681000000,0.02
 
 
 
-update_hint_regression,compile_time_instruction_count,1683000000,0.02
+float_args,compile_time_instruction_count,449800000,0.015
 
 
 
-float_args,compile_time_instruction_count,455100000,0.015
-
-
-
-mm_loop_inductor_gpu,compile_time_instruction_count,4407000000,0.015
-
-
-
-mm_loop_inductor_dynamic_gpu,compile_time_instruction_count,7381000000,0.015
-
-
-
-basic_NestedModule_eager,compile_time_instruction_count,8241000000,0.015
-
-
-
-sum_floordiv_regression,compile_time_instruction_count,1000000000,0.015
+sum_floordiv_regression,compile_time_instruction_count,998600000,0.015
 
 
 
@@ -78,11 +62,11 @@ aotdispatcher_inference_subclass_cpu,compile_time_instruction_count,5981000000,0
 
 
 
-aotdispatcher_partitioner_cpu,compile_time_instruction_count,8630000000,0.015
+aotdispatcher_partitioner_cpu,compile_time_instruction_count,8585000000,0.015
 
 
 
-aotdispatcher_partitioner_cpu2,compile_time_instruction_count,1890000000,0.015
+aotdispatcher_partitioner_cpu2,compile_time_instruction_count,1900000000,0.015
 
 
 
diff --git a/test/test_dynamic_shapes.py b/test/test_dynamic_shapes.py
@@ -2851,10 +2851,11 @@ def func(a, b):
             else:
                 return b * 20
 
-        # call with guarding.
+        # eager.
         self.assertEqual(func(torch.tensor([1]), torch.tensor([1])), torch.tensor([10]))
         self.assertEqual(func(torch.tensor([2]), torch.tensor([1])), torch.tensor([20]))
 
+        # compile with unbacked.
         unbacked_func = torch.compile(func, dynamic=True, fullgraph=True)
         a = torch.tensor([1])
         b = torch.tensor([1])
@@ -2916,10 +2917,11 @@ def func(a, b):
             else:
                 return b * 20
 
-        # call with guarding.
+        # eager.
         self.assertEqual(func(torch.tensor([1]), torch.tensor([1])), torch.tensor([10]))
         self.assertEqual(func(torch.tensor([2]), torch.tensor([1])), torch.tensor([20]))
 
+        # compile with unbacked.
         unbacked_func = torch.compile(func, dynamic=True, fullgraph=True)
         a = torch.tensor([1])
         b = torch.tensor([1])
diff --git a/torch/fx/experimental/symbolic_shapes.py b/torch/fx/experimental/symbolic_shapes.py
@@ -17,7 +17,6 @@
 import abc
 import atexit
 import collections
-import contextlib
 import dis
 import functools
 import hashlib
@@ -1218,17 +1217,6 @@ def compute_unbacked_bindings(
     return symbol_to_path
 
 
-def _log_suppressed_dde(a: SymBool, assumed_value: bool) -> None:
-    sloc, extra = a.node.shape_env._get_stack_summary(True)
-    log.info(
-        "could not evaluate %s due to data dependency, it was assumed to be %s with no runtime assertions %s %s",
-        a,
-        assumed_value,
-        sloc,
-        extra,
-    )
-
-
 # The following two functions are common utilities used while defining unbacked semantics
 # of various framework code. Those would be used in situations you prefer to guard and know
 # the result of the expression over not guarding, but in case you hit a data dependent error
@@ -1265,12 +1253,11 @@ def _guard_or(a: BoolLikeType, default: bool) -> bool:
     if shape_env is None:
         return guard_bool(a)
 
-    with a.node.shape_env.dde_suppressed():
-        try:
-            return guard_bool(a)
-        except GuardOnDataDependentSymNode:
-            _log_suppressed_dde(a, default)
-            return default
+    sym_node = a.node
+    r = sym_node.shape_env.evaluate_sym_node(
+        sym_node, size_oblivious=False, fallback_value=default
+    )
+    return bool(r)
 
 
 def guard_or_false(a: BoolLikeType) -> bool:
@@ -3314,10 +3301,6 @@ def __init__(
             else []
         )
 
-        # Set true when data dependent errors are handled by caller side and not thrown. Ex: guard_or_false
-        # and guard_or_true. When its true, a different error message is produced.
-        self._dde_suppressed = False
-
         # FakeTensor per-ShapeEnv operation cache. This is used for caching
         # operations that contain symbolic shapes which have guards on the
         # ShapeEnv (so are ShapeEnv-dependent).
@@ -3330,18 +3313,6 @@ def __init__(
             torch._subclasses.fake_tensor._DispatchCacheEntry,
         ] = {}
 
-    @contextlib.contextmanager
-    def dde_suppressed(self) -> Iterator[None]:
-        """Suppressed GuardOnDataDependent error logs"""
-
-        # We do not expect this to be called recursively.
-        assert not self._dde_suppressed, "not expected value for _dde_suppressed"
-        self._dde_suppressed = True
-        try:
-            yield
-        finally:
-            self._dde_suppressed = False
-
     # Pro-tip: if you add new field to ShapeEnv, this affects some accept
     # tests.  Accept their output with:
     #
@@ -3643,7 +3614,6 @@ def check_equal(self, other: ShapeEnv) -> None:
             "replacements_slocs",
             "_resimplify_floor_div_axioms",
             "_expr_sym_node_id",
-            "_dde_suppressed",
             "specialization_stacks",
         )
 
@@ -6152,12 +6122,6 @@ def _make_data_dependent_error(
         size_oblivious_result: Optional[sympy.Basic] = None,
         expr_sym_node_id: Optional[int] = None,
     ) -> GuardOnDataDependentSymNode:
-        if self._dde_suppressed:
-            return GuardOnDataDependentSymNode(
-                expr,
-                "This data dependent error is suppressed and handled by the caller",
-            )
-
         # TODO: in a Dynamo context, having user code, and having the
         # name of the local, will be much better
         size_like_symbols = []
@@ -6846,14 +6810,19 @@ def evaluate_sym_node(
         self,
         sym_node: SymNode,
         size_oblivious: bool = False,
+        fallback_value: Optional[bool] = None,
     ) -> sympy.Basic:
         """
         Given a a SymNode, evaluates sym_node.expr, adding guards if necessary.
         """
 
         self._expr_sym_node_id = id(sym_node)
         return self.evaluate_expr(
-            sym_node.expr, sym_node.hint, sym_node.fx_node, size_oblivious
+            sym_node.expr,
+            sym_node.hint,
+            sym_node.fx_node,
+            size_oblivious,
+            fallback_value=fallback_value,
         )
 
     def _is_python_assert(self) -> bool:
@@ -6939,17 +6908,25 @@ def evaluate_expr(
         hint: Optional[Union[int, bool, float]] = None,
         fx_node: Optional[torch.fx.Node] = None,
         size_oblivious: bool = False,
+        fallback_value: Optional[bool] = None,
         *,
         forcing_spec: bool = False,
     ) -> sympy.Basic:
         """
         Given an expression, evaluates it, adding guards if necessary
+        When fallback_value is not None the function return fallback_value instead of failing with data dependent error.
         """
 
         # Add extra state that evaluate_expr() depends on.
         suppress_guards_tls = ShapeEnv._suppress_guards_tls()
         return self._inner_evaluate_expr(
-            orig_expr, hint, fx_node, size_oblivious, forcing_spec, suppress_guards_tls
+            orig_expr,
+            hint,
+            fx_node,
+            size_oblivious,
+            forcing_spec,
+            suppress_guards_tls,
+            fallback_value,
         )
 
     @lru_cache(256)
@@ -6962,17 +6939,19 @@ def _inner_evaluate_expr(
         size_oblivious: bool,
         forcing_spec: bool,
         _suppress_guards_tls: bool,
+        fallback_value: Optional[bool] = None,
     ) -> sympy.Basic:
         try:
             return self._evaluate_expr(
                 orig_expr,
                 hint,
                 fx_node,
                 size_oblivious,
+                fallback_value,
                 forcing_spec=forcing_spec,
             )
         except Exception as e:
-            if isinstance(e, GuardOnDataDependentSymNode) and self._dde_suppressed:
+            if isinstance(e, GuardOnDataDependentSymNode):
                 pass
             else:
                 self.log.warning(
@@ -6984,12 +6963,23 @@ def _inner_evaluate_expr(
                 )
             raise
 
+    def _log_suppressed_dde(self, a: SymBool, assumed_value: bool) -> None:
+        sloc, extra = self._get_stack_summary(True)
+        log.info(
+            "could not evaluate %s due to data dependency, it was assumed to be %s with no runtime assertions %s %s",
+            a,
+            assumed_value,
+            sloc,
+            extra,
+        )
+
     def _evaluate_expr(
         self,
         orig_expr: sympy.Basic,
         hint: Optional[Union[bool, int, float]] = None,
         fx_node: Optional[torch.fx.Node] = None,
         size_oblivious: bool = False,
+        fallback_value: Optional[bool] = None,
         *,
         forcing_spec: bool = False,
     ) -> sympy.Basic:
@@ -7021,7 +7011,7 @@ def compute_concrete_val() -> sympy.Basic:
         #   3. the guard should not be suppressed
         #   4. the guard doesn't contain backed symfloat symbols
         #      since z3 can't handle floats
-        #
+        #   5. fallback_value is none.
         # If all of the above check, we create an FX node representing the
         # actual expression to be guarded.
         node = None
@@ -7032,6 +7022,7 @@ def compute_concrete_val() -> sympy.Basic:
             and not self._suppress_guards_tls()
             and not size_oblivious
             and not any(symbol_is_type(s, SymT.FLOAT) for s in orig_expr.free_symbols)
+            and fallback_value is None
         ):
             # TODO: does this even worked with unbacked :think:
             concrete_val = compute_concrete_val()
@@ -7113,7 +7104,7 @@ def compute_concrete_val() -> sympy.Basic:
                     # Those are backed dimentions that are treated as unbacked to avoid specializations, but if
                     # we fail to bypass with size oblivious reasoning we compute using the actual hint and guard.
                     if (
-                        not self._dde_suppressed
+                        fallback_value is None  # do not do this under guard_or
                         and self.oblivious_var_to_val
                         and not (
                             correct_hint := orig_expr.xreplace(
@@ -7143,8 +7134,9 @@ def compute_concrete_val() -> sympy.Basic:
                     # unbacked_var_to_val is not None iff propagate_real_tensors is on.
                     # if propagate_real_tensors is on, we check the example values to generate (unsound_result)
                     # and if they pass we add a runtime assertions and continue.
+
                     if (
-                        not self._dde_suppressed
+                        fallback_value is None  # do not do this under guard_or
                         and not ok
                         and self.unbacked_var_to_val
                         and not (
@@ -7165,10 +7157,16 @@ def compute_concrete_val() -> sympy.Basic:
                         transmute_into_runtime_assert = True
                         ok = True
 
+                    # fallback value is set when guard_or_true, gaurd_or_false are used.
+                    # whe we fail to evaluate soundly, we use the default value set by it.
+                    if not ok and fallback_value is not None:
+                        self._log_suppressed_dde(orig_expr, fallback_value)
+                        return fallback_value
+
                     if not ok:
                         size_oblivious_result = None
                         # compute size_oblivious_result to suggest it as a fix for the user if it works.
-                        if not size_oblivious and not self._dde_suppressed:
+                        if not size_oblivious:
                             size_oblivious_result = self._maybe_evaluate_static(
                                 expr, size_oblivious=True
                             )