@@ -350,7 +350,8 @@ def kernel(Z, X, SIZE: tl.constexpr):
350
350
kernel = patch_kernel (kernel , {'GENERATE_TEST_HERE' : expr })
351
351
# inputs
352
352
x = numpy_random (SIZE , dtype_str = dtype_x )
353
- if 'log' in expr :
353
+ # avoid log/sqrt of negative numbers
354
+ if 'log' in expr or 'sqrt' in expr :
354
355
x = np .abs (x ) + 0.01
355
356
# reference result
356
357
z_ref = eval (expr if numpy_expr is None else numpy_expr )
@@ -1270,7 +1271,7 @@ def kernel():
1270
1271
a = tl .arange (0 , 64 ).reshape (2 , 4 , 8 ).trans ((2 , 1 , 0 ))
1271
1272
tl .static_assert (a .shape == [tl .constexpr (8 ), tl .constexpr (4 ), tl .constexpr (2 )])
1272
1273
1273
- a = tl .arange (0 , 64 ). view ( 2 , 4 , 8 )
1274
+ a = tl .reshape ( tl . arange (0 , 64 ), 2 , 4 , 8 , can_reorder = True )
1274
1275
tl .static_assert (a .shape == [tl .constexpr (2 ), tl .constexpr (4 ), tl .constexpr (8 )])
1275
1276
1276
1277
kernel [(1 , )]()
@@ -1543,6 +1544,8 @@ def test_atomic_rmw(op, dtype_x_str, mode, sem, device):
1543
1544
if is_interpreter ():
1544
1545
if dtype_x_str == 'float16' or dtype_x_str == 'bfloat16' :
1545
1546
pytest .skip ("Only test atomic bfloat16/float16 ops on GPU" )
1547
+ if "uint" in dtype_x_str and mode in ["min_neg" , "all_neg" ]:
1548
+ pytest .skip ("uint cannot be negative" )
1546
1549
1547
1550
n_programs = 5
1548
1551
@@ -1745,7 +1748,7 @@ def kernel(in_ptr, idx_ptr, out_ptr, shape0, shape1, mask_step, XBLOCK: tl.const
1745
1748
xoffset = tl .program_id (0 ) * XBLOCK
1746
1749
x_idx = xoffset + tl .arange (0 , XBLOCK )[:]
1747
1750
mask = x_idx < shape0 * shape1
1748
- mask = mask and (x_idx % mask_step != 0 )
1751
+ mask = mask & (x_idx % mask_step != 0 )
1749
1752
idx_base = shape1 * (x_idx // shape1 )
1750
1753
idx_offset = tl .load (idx_ptr + x_idx , mask )
1751
1754
in_elem = tl .load (in_ptr + x_idx , mask )
@@ -2758,7 +2761,7 @@ def kernel(X, Y, Z, BLOCK_M: tl.constexpr, BLOCK_N: tl.constexpr, AXIS: tl.const
2758
2761
2759
2762
elif op == 'cummax' :
2760
2763
# NumPy does not have cummax
2761
- z = z . astype ( np .int64 )
2764
+ z = np . empty_like ( x , dtype = np .int64 )
2762
2765
z_ref = torch .cummax (torch .from_numpy (x_in .copy ()), axis = axis ).indices .numpy ()
2763
2766
if reverse :
2764
2767
z_ref = x_in .shape [axis ] - np .flip (z_ref , axis ) - 1
@@ -7515,6 +7518,7 @@ def g(y, dtype):
7515
7518
7516
7519
7517
7520
@pytest .mark .interpreter
7521
+ @pytest .mark .filterwarnings ("ignore:If conditional called with multidimensional Tensor*" )
7518
7522
def test_unsplat (device ):
7519
7523
7520
7524
@triton .jit
0 commit comments