[FRONTEND] Better handling of boolean operators and scalars (#6769)

apgoucher · web-flow · commit 6a3027a90bdb · 2025-05-11T23:17:57.000+01:00
This commit contains the following frontend enhancements:

1. `tl.reshape(x, [])` works for single-element tensors and returns a
scalar. Previously, it was very difficult to produce scalars, and
`tl.reshape(x, [])` would throw an error.
2. `if`-statements correctly throw frontend errors if used on
multiple-element tensors, and if a multidimensional single-element
tensor is provided then we raise a warning (recommending `tl.reshape(x,
[])`) and unsplat it to a scalar. Before, the backend would crash in
both cases with an inscrutable MLIR error.
3. chained boolean operations such as `(P or Q or R)` are supported,
whereas before they would throw frontend errors.
4. in boolean operations, operands with constexpr truth values are
handled specially:
- if we are in a conjunction and encounter a constexpr falsey operand,
we short-circuit and return it;
- if we are in a disjunction and encounter a constexpr truthy operand,
we short-circuit and return it;
- other constexpr operands are ignored completely (they do not
participate in the result);

The last of these enhancements allows one to write things such as:

```
    if (x is not None) and (x.dtype == tl.int32):
        ...
```

which would previously have failed as Triton would have tried to compute
both operands (the latter yielding an error) before taking their
conjunction.
diff --git a/python/test/unit/language/test_core.py b/python/test/unit/language/test_core.py
@@ -7486,3 +7486,79 @@ def _namedtuple_float_tuple_kernel():
         x, y = float('-inf'), float('inf')  # noqa: F841
 
     _namedtuple_float_tuple_kernel[(1, )]()
+
+
+@pytest.mark.interpreter
+def test_short_circuiting(device):
+
+    @triton.jit
+    def short_circuiting_kernel(x):
+        if (x is not None) and hasattr(x, "dtype") and isinstance(
+                x.dtype, tl.pointer_type) and (x.dtype.element_ty == tl.int32) and (tl.load(x) > 42):
+            tl.store(x, 42)
+
+    def f(x):
+        short_circuiting_kernel[(1, )](x, num_warps=1)
+
+    f(None)  # should succeed with NoneType
+    f(1)  # should succeed with tl.constexpr type
+    f(2)  # should succeed with integer type
+
+    def g(y, dtype):
+        x = torch.full((1, ), y, device=device, dtype=dtype)
+        f(x)
+        return x.item()
+
+    assert g(37.5, torch.float32) == 37.5
+    assert g(84.0, torch.float32) == 84.0
+    assert g(-76893, torch.int32) == -76893
+    assert g(100000, torch.int32) == 42
+    assert g(100000, torch.int64) == 100000
+
+
+@pytest.mark.interpreter
+def test_unsplat(device):
+
+    @triton.jit
+    def unsplat_kernel(x, explicit: tl.constexpr):
+
+        # this is a single-element tensor:
+        condition = tl.load(x + tl.arange(0, 1)) > 42
+
+        if explicit:
+            condition = condition.reshape([])
+
+        if condition:
+            tl.store(x, 42)
+
+    def g(y, explicit):
+        x = torch.full((1, ), y, device=device, dtype=torch.int32)
+        unsplat_kernel[(1, )](x, explicit, num_warps=1)
+        return x.item()
+
+    assert g(41, False) == 41
+    assert g(43, False) == 42
+    assert g(41, True) == 41
+    assert g(43, True) == 42
+
+
+@pytest.mark.interpreter
+def test_tuple_logic():
+
+    @triton.jit
+    def tuple_logic_kernel():
+
+        # arity-2 BoolOps:
+        tl.static_assert(((3, 4) or (5, 6)) == (3, 4))
+        tl.static_assert(((3, 4) and (5, 6)) == (5, 6))
+        tl.static_assert(((3, 4) and ()) == ())
+        tl.static_assert((() or (5, 6)) == (5, 6))
+
+        # arity-3 BoolOps:
+        tl.static_assert(((1, 2) and (3, 4) and (5, 6)) == (5, 6))
+        tl.static_assert(((1, 2) or (3, 4) or (5, 6)) == (1, 2))
+
+        # constexpr short-circuiting over dynamic argument:
+        tl.static_assert((() and tl.program_id(0)) == ())
+
+    tuple_logic_kernel[(1, )]()
diff --git a/python/triton/compiler/code_generator.py b/python/triton/compiler/code_generator.py
@@ -324,7 +324,10 @@ def __init__(self, context, prototype, gscope, function_name, jit_fn: JITFunctio
         # special handling.
         self.visiting_arg_default_value = False
 
-    builtin_namespace: Dict[str, Any] = {_.__name__: _ for _ in (len, list, range, float, int, isinstance, getattr)}
+    builtin_namespace: Dict[str, Any] = {
+        _.__name__: _
+        for _ in (len, list, range, float, int, isinstance, getattr, hasattr)
+    }
     builtin_namespace.update((
         ('print', language.core.device_print),
         ('min', language.minimum),
@@ -766,6 +769,13 @@ def visit_If(self, node):
         cond = self.visit(node.test)
 
         if _is_triton_tensor(cond):
+            if _is_non_scalar_tensor(cond):
+                raise self._unsupported(node, "Boolean value of Tensor with more than one value is ambiguous")
+            if cond.type.is_block():
+                warnings.warn(
+                    "If conditional called with multidimensional Tensor instead of scalar; please use \"if (%s).reshape([])\" instead"
+                    % ast.unparse(node.test))
+                cond = language.core._unsplat(cond, _builder=self.builder, _generator=self)
             cond = cond.to(language.int1, _builder=self.builder)
             contains_return = ContainsReturnChecker(self.gscope).visit(node)
             if contains_return:
@@ -876,6 +886,8 @@ def visit_UnaryOp(self, node):
         try:
             return getattr(operand, fn)()
         except AttributeError:
+            if fn == "__not__":
+                return constexpr(not operand)
             raise self._unsupported(
                 node, f"AST unary operator '{fn}' is not (currently) implemented on type {type(operand).__name__}")
 
@@ -1264,16 +1276,49 @@ def visit_Constant(self, node):
         return constexpr(node.value)
 
     def visit_BoolOp(self, node: ast.BoolOp):
-        if len(node.values) != 2:
-            raise self._unsupported(
-                node, "chained boolean operators (A or B or C) are not supported; use parentheses to split the chain.")
-        lhs = self.visit(node.values[0])
-        rhs = self.visit(node.values[1])
         method_name = self._method_name_for_bool_op.get(type(node.op))
         if method_name is None:
             raise self._unsupported(
                 node, "AST boolean operator '{}' is not (currently) implemented.".format(node.op.__name__))
-        return self._apply_binary_method(method_name, lhs, rhs)
+
+        nontrivial_values = []
+
+        for subnode in node.values:
+            # we visit the values in order, executing their side-effects
+            # and possibly early-exiting:
+            value = self.visit(subnode)
+            if not _is_triton_tensor(value):
+                # this is a constexpr, so we might be able to short-circuit:
+                bv = bool(value)
+                if (bv is False) and (method_name == "logical_and"):
+                    # value is falsey so return that:
+                    return value
+                if (bv is True) and (method_name == "logical_or"):
+                    # value is truthy so return that:
+                    return value
+                # otherwise, our constexpr has no effect on the output of the
+                # expression so we do not append it to nontrivial_values.
+            else:
+                if value.type.is_block():
+                    warnings.warn(
+                        "Logical operators 'and' and 'or' are deprecated for non-scalar tensors; please use '&' or '|' instead"
+                    )
+                # not a constexpr so we must append it:
+                nontrivial_values.append(value)
+
+        if len(nontrivial_values) == 0:
+            # the semantics of a disjunction of falsey values or conjunction
+            # of truthy values is to return the final value:
+            nontrivial_values.append(value)
+
+        while len(nontrivial_values) >= 2:
+            rhs = nontrivial_values.pop()
+            lhs = nontrivial_values.pop()
+            res = self._apply_binary_method(method_name, lhs, rhs)
+            nontrivial_values.append(res)
+
+        assert len(nontrivial_values) == 1
+        return nontrivial_values[0]
 
     _method_name_for_bool_op: Dict[Type[ast.boolop], str] = {ast.And: 'logical_and', ast.Or: 'logical_or'}
 
diff --git a/python/triton/language/core.py b/python/triton/language/core.py
@@ -1718,6 +1718,22 @@ def _take_first(a, b):
     return a
 
 
+def _unsplat(x, _builder=None, _generator=None):
+    """
+    Convert a single-element tensor to a scalar.
+    """
+    if len(x.shape) == 0:
+        return x
+    numel = 1
+    for d in x.shape:
+        numel *= d
+    assert numel == 1, "can only unsplat single-element tensors"
+    if len(x.shape) >= 2:
+        x = semantic.reshape(x, [1], builder=_builder)
+    x = typing.cast(tensor, reduce(x, 0, _take_first, _builder=_builder, _generator=_generator))
+    return x
+
+
 @_tensor_member_fn
 @builtin
 def split(a, _builder=None, _generator=None) -> tuple[tensor, tensor]:
@@ -1747,8 +1763,8 @@ def split(a, _builder=None, _generator=None) -> tuple[tensor, tensor]:
 
     if was_rank_1:
         # Currently `reduce` is the best way to convert a tensor of shape [1] to a scalar.
-        out_lhs = typing.cast(tensor, reduce(out_lhs, None, _take_first, _builder=_builder, _generator=_generator))
-        out_rhs = typing.cast(tensor, reduce(out_rhs, None, _take_first, _builder=_builder, _generator=_generator))
+        out_lhs = _unsplat(out_lhs, _builder, _generator)
+        out_rhs = _unsplat(out_rhs, _builder, _generator)
 
     return out_lhs, out_rhs
 
@@ -1777,7 +1793,7 @@ def view(input, *shape, _builder=None):
 
 @_tensor_member_fn
 @builtin
-def reshape(input, *shape, can_reorder=False, _builder=None):
+def reshape(input, *shape, can_reorder=False, _builder=None, _generator=None):
     """
     Returns a tensor with the same number of elements as input but with the
     provided shape.
@@ -1793,6 +1809,8 @@ def reshape(input, *shape, can_reorder=False, _builder=None):
         reshape(x, 32, 32)
     """
     shape = _shape_check_impl(_unwrap_iterable(shape))
+    if len(shape) == 0:
+        return _unsplat(input, _builder=_builder, _generator=_generator)
     return semantic.reshape(input, shape, can_reorder, _builder)