[TESTS] Reducing warning messages in test_core.py (#6869)

Jokeren · web-flow · commit aeb4d4fbe8be · 2025-05-19T14:12:50.000-04:00
diff --git a/python/test/unit/language/test_core.py b/python/test/unit/language/test_core.py
@@ -350,7 +350,8 @@ def kernel(Z, X, SIZE: tl.constexpr):
     kernel = patch_kernel(kernel, {'GENERATE_TEST_HERE': expr})
     # inputs
     x = numpy_random(SIZE, dtype_str=dtype_x)
-    if 'log' in expr:
+    # avoid log/sqrt of negative numbers
+    if 'log' in expr or 'sqrt' in expr:
         x = np.abs(x) + 0.01
     # reference result
     z_ref = eval(expr if numpy_expr is None else numpy_expr)
@@ -1270,7 +1271,7 @@ def kernel():
         a = tl.arange(0, 64).reshape(2, 4, 8).trans((2, 1, 0))
         tl.static_assert(a.shape == [tl.constexpr(8), tl.constexpr(4), tl.constexpr(2)])
 
-        a = tl.arange(0, 64).view(2, 4, 8)
+        a = tl.reshape(tl.arange(0, 64), 2, 4, 8, can_reorder=True)
         tl.static_assert(a.shape == [tl.constexpr(2), tl.constexpr(4), tl.constexpr(8)])
 
     kernel[(1, )]()
@@ -1543,6 +1544,8 @@ def test_atomic_rmw(op, dtype_x_str, mode, sem, device):
     if is_interpreter():
         if dtype_x_str == 'float16' or dtype_x_str == 'bfloat16':
             pytest.skip("Only test atomic bfloat16/float16 ops on GPU")
+    if "uint" in dtype_x_str and mode in ["min_neg", "all_neg"]:
+        pytest.skip("uint cannot be negative")
 
     n_programs = 5
 
@@ -1745,7 +1748,7 @@ def kernel(in_ptr, idx_ptr, out_ptr, shape0, shape1, mask_step, XBLOCK: tl.const
         xoffset = tl.program_id(0) * XBLOCK
         x_idx = xoffset + tl.arange(0, XBLOCK)[:]
         mask = x_idx < shape0 * shape1
-        mask = mask and (x_idx % mask_step != 0)
+        mask = mask & (x_idx % mask_step != 0)
         idx_base = shape1 * (x_idx // shape1)
         idx_offset = tl.load(idx_ptr + x_idx, mask)
         in_elem = tl.load(in_ptr + x_idx, mask)
@@ -2758,7 +2761,7 @@ def kernel(X, Y, Z, BLOCK_M: tl.constexpr, BLOCK_N: tl.constexpr, AXIS: tl.const
 
     elif op == 'cummax':
         # NumPy does not have cummax
-        z = z.astype(np.int64)
+        z = np.empty_like(x, dtype=np.int64)
         z_ref = torch.cummax(torch.from_numpy(x_in.copy()), axis=axis).indices.numpy()
         if reverse:
             z_ref = x_in.shape[axis] - np.flip(z_ref, axis) - 1
@@ -7515,6 +7518,7 @@ def g(y, dtype):
 
 
 @pytest.mark.interpreter
+@pytest.mark.filterwarnings("ignore:If conditional called with multidimensional Tensor*")
 def test_unsplat(device):
 
     @triton.jit
diff --git a/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ElementwiseOpToLLVM.cpp b/third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ElementwiseOpToLLVM.cpp
@@ -431,8 +431,9 @@ struct FpToFpOpConversion
             // mul{.rnd}.bf16 and mul{.rnd}.bf16x2 requires sm_90 or higher.
             {{F8E5M2TyID, BF16TyID, undefRounding},
              Fp8E5M2_to_Bf16(computeCapability >= 90)},
+            // cvt with .bf16.f16' requires .target sm_90 or higher
             {{F8E4M3TyID, BF16TyID, undefRounding},
-             Fp8E4M3Nv_to_Bf16(computeCapability >= 89)},
+             Fp8E4M3Nv_to_Bf16(computeCapability >= 90)},
             // BF16 -> F8
             {{BF16TyID, F8E5M2TyID, RoundingMode::RTNE},
              Bf16_to_Fp8E5M2(computeCapability >= 89)},