From 69f8ff4bfa437013250d6218bf8db751b6e74900 Mon Sep 17 00:00:00 2001 From: Changpeng Fang Date: Tue, 1 Jul 2025 11:53:07 -0700 Subject: [PATCH] AMDGPU: Implement tensor_save and tensor_stop for gfx1250 MC layer only. --- llvm/lib/Target/AMDGPU/FLATInstructions.td | 26 +++++++++++++++++++ llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s | 22 ++++++++++++++++ .../AMDGPU/gfx1250_dasm_vflat.txt | 16 ++++++++++++ 3 files changed, 64 insertions(+) create mode 100644 llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s create mode 100644 llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vflat.txt diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 9ae98de039e34..3625db9a4791f 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -392,6 +392,23 @@ multiclass FLAT_Global_Store_AddTid_Pseudo; } +class FLAT_Global_Tensor_Pseudo : FLAT_Pseudo< + opName, + (outs ), + !con(!if(EnableSaddr, (ins SReg_64:$saddr, flat_offset:$offset), (ins )), (ins CPol_0:$cpol)), + !if(EnableSaddr, " $saddr$offset", " ")#"$cpol"> { + + let is_flat_global = 1; + let has_vdst = 0; + let has_data = 0; + let has_vaddr = 0; + let mayLoad = 0; + let mayStore = 1; + let has_saddr = 1; + let enabled_saddr = EnableSaddr; + let has_offset = EnableSaddr; +} + class FLAT_Global_Invalidate_Writeback : FLAT_Pseudo { @@ -1052,6 +1069,12 @@ let SubtargetPredicate = isGFX12Plus in { def GLOBAL_WBINV : FLAT_Global_Invalidate_Writeback<"global_wbinv">; } // End SubtargetPredicate = isGFX12Plus +let SubtargetPredicate = isGFX1250Plus in { + +def TENSOR_SAVE : FLAT_Global_Tensor_Pseudo<"tensor_save", 1>; +def TENSOR_STOP : FLAT_Global_Tensor_Pseudo<"tensor_stop">; +} // End SubtargetPredicate = isGFX1250Plus + defm SCRATCH_LOAD_UBYTE : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte", VGPR_32>; defm SCRATCH_LOAD_SBYTE : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte", VGPR_32>; defm SCRATCH_LOAD_USHORT : FLAT_Scratch_Load_Pseudo <"scratch_load_ushort", VGPR_32>; @@ -3061,6 +3084,9 @@ multiclass VFLAT_Real_Atomics_gfx1250 op, string name = get_FLAT_ps, VFLAT_Real_SADDR_RTN_gfx1250; +defm TENSOR_SAVE : VFLAT_Real_gfx1250<0x06e>; +defm TENSOR_STOP : VFLAT_Real_gfx1250<0x06f>; + defm GLOBAL_LOAD_TR_B128_w32 : VFLAT_Real_AllAddr_gfx1250<0x057, "global_load_tr16_b128">; defm GLOBAL_LOAD_TR_B64_w32 : VFLAT_Real_AllAddr_gfx1250<0x058, "global_load_tr8_b64">; diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s new file mode 100644 index 0000000000000..07b4055f0ab9c --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s @@ -0,0 +1,22 @@ +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s + +tensor_save s[0:1] +// GFX1250: tensor_save s[0:1] ; encoding: [0x00,0x80,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +tensor_save s[0:1] th:TH_STORE_BYPASS scope:SCOPE_SYS +// GFX1250: tensor_save s[0:1] th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x00,0x80,0x1b,0xee,0x00,0x00,0x3c,0x00,0x00,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +tensor_save s[0:1] offset:32 +// GFX1250: tensor_save s[0:1] offset:32 ; encoding: [0x00,0x80,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x20,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +tensor_stop +// GFX1250: tensor_stop ; encoding: [0x7c,0xc0,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +tensor_stop th:TH_STORE_BYPASS scope:SCOPE_SYS +// GFX1250: tensor_stop th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x7c,0xc0,0x1b,0xee,0x00,0x00,0x3c,0x00,0x00,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vflat.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vflat.txt new file mode 100644 index 0000000000000..6421c6f30e177 --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vflat.txt @@ -0,0 +1,16 @@ +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250 %s + +# GFX1250: tensor_save s[0:1] ; encoding: [0x00,0x80,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00] +0x00,0x80,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 + +# GFX1250: tensor_save s[0:1] th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x00,0x80,0x1b,0xee,0x00,0x00,0x3c,0x00,0x00,0x00,0x00,0x00] +0x00,0x80,0x1b,0xee,0x00,0x00,0x3c,0x00,0x00,0x00,0x00,0x00 + +# GFX1250: tensor_save s[0:1] offset:32 ; encoding: [0x00,0x80,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x20,0x00,0x00] +0x00,0x80,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x20,0x00,0x00 + +# GFX1250: tensor_stop ; encoding: [0x7c,0xc0,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00] +0x7c,0xc0,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 + +# GFX1250: tensor_stop th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x7c,0xc0,0x1b,0xee,0x00,0x00,0x3c,0x00,0x00,0x00,0x00,0x00] +0x7c,0xc0,0x1b,0xee,0x00,0x00,0x3c,0x00,0x00,0x00,0x00,0x00