Skip to content

Commit 6feef10

Browse files
authored
[AMD] Extended FP conversion for gfx1250 (triton-lang#8821)
GFX1250 arch comes with new FP conversion instruction which can convert 8x FP32/FP16/Bf16 to 8x FP8. This PR extends to the AMDGPU backend with the support of the new instructions
1 parent 03a0321 commit 6feef10

File tree

2 files changed

+178
-56
lines changed

2 files changed

+178
-56
lines changed

python/test/unit/language/test_conversions.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,9 @@ def test_typeconvert_downcast_clamping(src_dtype, dst_dtype, mode, device, round
373373
if dst_dtype in ('float8e5', 'float8e4nv') and rounding == 'rtne' and torch.cuda.get_device_capability(0) < (9, 0):
374374
pytest.skip(f"{dst_dtype} downcast with RTNE rounding tests only supported on NVGPU with compute capability 9.0+")
375375

376+
if mode in ('inf', '-inf') and is_hip_gfx12():
377+
pytest.skip(f"clamping from `{mode}` is not supported on AMDGPU GFX12")
378+
376379
converter = {
377380
tl.float8e4nv: torch.float8_e4m3fn,
378381
tl.float8e5: torch.float8_e5m2,

0 commit comments

Comments
 (0)