diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index ddeca07e51103..f26639847be75 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -2612,6 +2612,10 @@ def HasPkMinMax3Insts : Predicate<"Subtarget->hasPkMinMax3Insts()">, AssemblerPredicate<(any_of FeatureGFX1250Insts)>; +def HasSGetShaderCyclesInst : + Predicate<"Subtarget->hasSGetShaderCyclesInst()">, + AssemblerPredicate<(any_of FeatureGFX1250Insts)>; + def HasImageInsts : Predicate<"Subtarget->hasImageInsts()">, AssemblerPredicate<(all_of FeatureImageInsts)>; diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index c5bdd28314642..f47ddf5d93ec3 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -1562,6 +1562,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, // \returns true if the target has V_PK_{MIN|MAX}3_{I|U}16 instructions. bool hasPkMinMax3Insts() const { return GFX1250Insts; } + // \returns ture if target has S_GET_SHADER_CYCLES_U64 instruction. + bool hasSGetShaderCyclesInst() const { return GFX1250Insts; } + // \returns true if target has S_SETPRIO_INC_WG instruction. bool hasSetPrioIncWgInst() const { return HasSetPrioIncWgInst; } diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index 8303410115f93..431d73b9a95b5 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -1653,6 +1653,12 @@ def S_SETPRIO_INC_WG : SOPP_Pseudo <"s_setprio_inc_wg", (ins i16imm:$simm16), "$ let SubtargetPredicate = HasSetPrioIncWgInst; } +def S_GET_SHADER_CYCLES_U64 : SOP1_64_0 <"s_get_shader_cycles_u64", + [(set i64:$sdst, (readcyclecounter))]> { + let SubtargetPredicate = HasSGetShaderCyclesInst; + let hasSideEffects = 1; +} + let Uses = [EXEC, M0] in { def S_SENDMSG : SOPP_Pseudo <"s_sendmsg" , (ins SendMsg:$simm16), "$simm16", [(int_amdgcn_s_sendmsg (i32 timm:$simm16), M0)]> { @@ -2145,6 +2151,7 @@ defm S_ALLOC_VGPR : SOP1_Real_gfx12<0x053>; defm S_SLEEP_VAR : SOP1_IMM_Real_gfx12<0x058>; // GFX1250 +defm S_GET_SHADER_CYCLES_U64 : SOP1_Real_gfx12<0x06>; defm S_ADD_PC_I64 : SOP1_Real_gfx12<0x04b>; //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AMDGPU/readcyclecounter.ll b/llvm/test/CodeGen/AMDGPU/readcyclecounter.ll index 131c5f31585d8..f67cbe381bfad 100644 --- a/llvm/test/CodeGen/AMDGPU/readcyclecounter.ll +++ b/llvm/test/CodeGen/AMDGPU/readcyclecounter.ll @@ -10,6 +10,8 @@ ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GETREG,GETREG-GISEL -check-prefix=GCN %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GCN,GFX1250 %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GCN,GFX1250 %s declare i64 @llvm.readcyclecounter() #0 @@ -21,6 +23,7 @@ declare i64 @llvm.readcyclecounter() #0 ; GFX12: s_getreg_b32 [[HI2:s[0-9]+]], hwreg(HW_REG_SHADER_CYCLES_HI) ; GFX12: s_cmp_eq_u32 [[HI1]], [[HI2]] ; GFX12: s_cselect_b32 {{s[0-9]+}}, [[LO1]], 0 +; GFX1250: s_get_shader_cycles_u64 s{{\[[0-9]+:[0-9]+\]}} ; GCN-DAG: kmcnt ; MEMTIME: store_dwordx2 ; SIVI-NOT: kmcnt @@ -53,6 +56,7 @@ define amdgpu_kernel void @test_readcyclecounter(ptr addrspace(1) %out) #0 { ; GFX12: s_getreg_b32 [[HI1:s[0-9]+]], hwreg(HW_REG_SHADER_CYCLES_HI) ; GFX12: s_getreg_b32 [[LO1:s[0-9]+]], hwreg(HW_REG_SHADER_CYCLES_LO) ; GFX12: s_getreg_b32 [[HI2:s[0-9]+]], hwreg(HW_REG_SHADER_CYCLES_HI) +; GFX1250: s_get_shader_cycles_u64 s{{\[[0-9]+:[0-9]+\]}} ; GCN-DAG: s_load_{{dword|b32|b64}} ; GETREG-DAG: s_getreg_b32 s{{[0-9]+}}, hwreg(HW_REG_SHADER_CYCLES, 0, 20) ; GFX12: s_cmp_eq_u32 [[HI1]], [[HI2]] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_sop1.s b/llvm/test/MC/AMDGPU/gfx1250_asm_sop1.s index 41b6e93357a3f..aab8d9a2fcbfd 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_sop1.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_sop1.s @@ -45,6 +45,10 @@ s_rfe_i64 s[2:3] s_rfe_b64 s[2:3] // GFX1250: s_rfe_i64 s[2:3] ; encoding: [0x02,0x4a,0x80,0xbe] +s_get_shader_cycles_u64 s[2:3] +// GFX1250: s_get_shader_cycles_u64 s[2:3] ; encoding: [0x00,0x06,0x82,0xbe] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + s_barrier_signal -3 // GFX1250: s_barrier_signal -3 ; encoding: [0xc3,0x4e,0x80,0xbe] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sop1.txt index 83fa647696d6c..07aca1e40b071 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sop1.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sop1.txt @@ -12,6 +12,9 @@ # GFX1250: s_add_pc_i64 s[2:3] ; encoding: [0x02,0x4b,0x80,0xbe] 0x02,0x4b,0x80,0xbe +# GFX1250: s_get_shader_cycles_u64 s[2:3] ; encoding: [0x00,0x06,0x82,0xbe] +0x00,0x06,0x82,0xbe + # GFX1250: s_barrier_signal -3 ; encoding: [0xc3,0x4e,0x80,0xbe] 0xc3,0x4e,0x80,0xbe