[FRONTEND] Fix atomic min/max for float with negative zero (#6431)

peterbell10 · web-flow · commit a5e485f284b9 · 2025-04-09T13:15:16.000+01:00
Fixes #6376

The software emulation of atomic min/max uses `x &gt;= 0` to test the
signbit, which breaks down when `x` is `-0.0` which equals zero but does
have the sign bit set .

I fix this by looking at the bit representation of the float and
extracting the sign bit directly.

I also fix not_ raising an error in the interpreter from
`get_all_ones_value` since `np.bool` doesn't have "int" in the name.
diff --git a/python/test/unit/language/test_core.py b/python/test/unit/language/test_core.py
@@ -1851,6 +1851,26 @@ def kernel_r(ptrs, BLOCK_SIZE: tl.constexpr):
     out = kernel_r[(2, )](data, BLOCK_SIZE=block_size, num_ctas=1, launch_cooperative_grid=False)
 
 
+@pytest.mark.interpreter
+def test_atomic_min_max_neg_zero(device):
+
+    @triton.jit
+    def kernel(inp, out_max, out_min):
+        idx = tl.program_id(0)
+        x = tl.load(inp + idx)
+        tl.atomic_max(out_max + idx, x)
+        tl.atomic_min(out_min + idx, x)
+
+    N_PROG = 1
+    dtype = torch.float32
+    out_min = torch.full([N_PROG], torch.finfo(torch.float32).max, device=device, dtype=dtype)
+    out_max = torch.full([N_PROG], torch.finfo(torch.float32).min, device=device, dtype=dtype)
+    inp = torch.full([N_PROG], -0.0, device=device, dtype=dtype)
+    kernel[(N_PROG, )](inp, out_max, out_min)
+    torch.testing.assert_close(out_min, inp, atol=0, rtol=0)
+    torch.testing.assert_close(out_max, inp, atol=0, rtol=0)
+
+
 # ---------------
 # test cast
 # ---------------
diff --git a/python/triton/language/semantic.py b/python/triton/language/semantic.py
@@ -1388,6 +1388,14 @@ def atom_red_typechecking_impl(ptr: tl.tensor, val: tl.tensor, mask: tl.tensor,
     return ptr, val, mask
 
 
+def _signbit(x: tl.tensor, builder: ir.builder) -> tl.tensor:
+    bitwidth = x.dtype.primitive_bitwidth
+    idtype = tl.get_int_dtype(bitwidth=bitwidth, signed=False)
+    ix = x.to(idtype, bitcast=True, _builder=builder)
+    signbit = lshr(ix, bitwidth - 1, builder)
+    return signbit.to(tl.int1, _builder=builder)
+
+
 def atomic_max(ptr: tl.tensor, val: tl.tensor, mask: tl.tensor, sem: str, scope: str, builder: ir.builder) -> tl.tensor:
     ptr, val, mask = atom_red_typechecking_impl(ptr, val, mask, 'max', builder)
     sem = _str_to_sem(sem)
@@ -1407,16 +1415,14 @@ def atomic_max(ptr: tl.tensor, val: tl.tensor, mask: tl.tensor, sem: str, scope:
     if sca_ty not in {tl.float32, tl.float64}:
         raise TypeError(f"atomic_max not supported for dtype {sca_ty}")
 
-    zero = full([], 0.0, sca_ty, builder)
-
     i_type = tl.int32 if sca_ty == tl.float32 else tl.int64
     i_val = bitcast(val, i_type, builder)
     i_ptr = bitcast(ptr, tl.pointer_type(i_type, 1), builder)
     ui_type = tl.uint32 if sca_ty == tl.float32 else tl.uint64
     ui_val = bitcast(val, ui_type, builder)
     ui_ptr = bitcast(ptr, tl.pointer_type(ui_type, 1), builder)
-    pos = greater_equal(val, zero, builder)
-    neg = less_than(val, zero, builder)
+    neg = _signbit(val, builder)
+    pos = not_(neg, builder)
     pos_ret = tl.tensor(
         builder.create_atomic_rmw(ir.ATOMIC_OP.MAX, i_ptr.handle, i_val.handle,
                                   and_(mask, pos, builder).handle, sem, scope), i_val.type)
@@ -1446,16 +1452,14 @@ def atomic_min(ptr: tl.tensor, val: tl.tensor, mask: tl.tensor, sem: str, scope:
     if sca_ty not in {tl.float32, tl.float64}:
         raise TypeError(f"atomic_min not supported for dtype {sca_ty}")
 
-    zero = full([], 0.0, sca_ty, builder)
-
     i_type = tl.int32 if sca_ty == tl.float32 else tl.int64
     i_val = bitcast(val, i_type, builder)
     i_ptr = bitcast(ptr, tl.pointer_type(i_type, 1), builder)
     ui_type = tl.uint32 if sca_ty == tl.float32 else tl.uint64
     ui_val = bitcast(val, ui_type, builder)
     ui_ptr = bitcast(ptr, tl.pointer_type(ui_type, 1), builder)
-    pos = greater_equal(val, zero, builder)
-    neg = less_than(val, zero, builder)
+    neg = _signbit(val, builder)
+    pos = not_(neg, builder)
     pos_ret = tl.tensor(
         builder.create_atomic_rmw(ir.ATOMIC_OP.MIN, i_ptr.handle, i_val.handle,
                                   and_(mask, pos, builder).handle, sem, scope), i_val.type)
diff --git a/python/triton/runtime/interpreter.py b/python/triton/runtime/interpreter.py
@@ -756,6 +756,8 @@ def get_all_ones_value(self, type):
         np_type = _get_np_dtype(type)
         if "int" in np_type.name:
             return TensorHandle(np.full(1, -1, dtype=np_type), type.scalar)
+        elif np_type == np.bool_:
+            return TensorHandle(np.full(1, True, dtype=np_type), type.scalar)
         else:
             raise TypeError(f"unsupported type {type}")