Skip to content

Commit fa15dc1

Browse files
[TEST][intel] Port triton_kernels
Signed-off-by: Whitney Tsang <[email protected]>
1 parent 9084270 commit fa15dc1

File tree

3 files changed

+5
-8
lines changed

3 files changed

+5
-8
lines changed

python/triton_kernels/tests/test_distributed.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def _distributed_worker(rank, fn, world_size, kwargs):
5353
def distributed_launcher(request):
5454
n_gpus = getattr(request, "param", None)
5555
if not torch.cuda.is_available():
56-
pytest.skip("CUDA required for distributed GPU test")
56+
pytest.xfail("CUDA required for distributed GPU test")
5757
if torch.cuda.device_count() < n_gpus:
5858
pytest.skip(f"requires up to {n_gpus} CUDA devices, found {torch.cuda.device_count()}")
5959

@@ -82,8 +82,7 @@ def launch(fn, **kwargs):
8282

8383
@pytest.mark.parametrize("n_expts_shard, n_expts_tot", [(8, 512), (16, 64)])
8484
@pytest.mark.parametrize("affinity_mode", ["uniform", "random"])
85-
def test_make_expt_assignment(n_expts_shard, n_expts_tot, affinity_mode):
86-
device = "cuda"
85+
def test_make_expt_assignment(n_expts_shard, n_expts_tot, affinity_mode, device):
8786
expt_dict = _make_expt_dict_for_mode(n_expts_shard, n_expts_tot, affinity_mode)
8887
expt_assignment = make_expt_assignment(n_expts_shard, n_expts_tot, expt_dict, device)
8988
# mask correctness & uniqueness: each expert set exactly once, and on the right shard

python/triton_kernels/tests/test_reduce.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,13 +56,12 @@ def plus_a(x, a):
5656
"broadcast_n", # broadcast over N: [B,M,1]
5757
])
5858
@pytest.mark.parametrize("dim", [0, 1, 2])
59-
def test_op(B, M, N, dtype_str, dim, mask_mode, postprocess_fn):
59+
def test_op(B, M, N, dtype_str, dim, mask_mode, postprocess_fn, device):
6060
is_hip = triton.runtime.driver.active.get_current_target().backend == "hip"
61-
is_pre_h100 = torch.cuda.get_device_capability() < (9, 0)
61+
is_pre_h100 = torch.cuda.is_available() and torch.cuda.get_device_capability() < (9, 0)
6262
if (is_hip or is_pre_h100) and "float8" in dtype_str:
6363
pytest.skip("float8 not supported on CUDA < 9.0")
6464
torch.manual_seed(0)
65-
device = "cuda"
6665
x = torch.randn((B, M, N), device=device, dtype=torch.float32)
6766
x_mscale, x_flex = None, None
6867
y_flex_tri, y_flex_ref = None, None

python/triton_kernels/tests/test_tensor.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,7 @@ def test_make_ragged_tensor_metadata(n_slices, device):
2828

2929

3030
@pytest.mark.parametrize("n_slices", [9, 32, 911, 1025])
31-
def test_remap_ragged_tensor_metadata(n_slices):
32-
device = "cuda"
31+
def test_remap_ragged_tensor_metadata(n_slices, device):
3332
max_slice_size = 200
3433
n_total_rows = max_slice_size * n_slices
3534
slice_sizes = torch.randint(0, max_slice_size, (n_slices, ), dtype=torch.int32, device=device)

0 commit comments

Comments
 (0)