Skip to content

Commit b296ea9

Browse files
authored
[AMDGPU] s_get_shader_cycles_u64 gfx1250 instruction (#152390)
It is the same as reading SHADER_CYCLES_LO and SHADER_CYCLES_HI but with a single instruction.
1 parent f615269 commit b296ea9

File tree

6 files changed

+25
-0
lines changed

6 files changed

+25
-0
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2612,6 +2612,10 @@ def HasPkMinMax3Insts :
26122612
Predicate<"Subtarget->hasPkMinMax3Insts()">,
26132613
AssemblerPredicate<(any_of FeatureGFX1250Insts)>;
26142614

2615+
def HasSGetShaderCyclesInst :
2616+
Predicate<"Subtarget->hasSGetShaderCyclesInst()">,
2617+
AssemblerPredicate<(any_of FeatureGFX1250Insts)>;
2618+
26152619
def HasImageInsts : Predicate<"Subtarget->hasImageInsts()">,
26162620
AssemblerPredicate<(all_of FeatureImageInsts)>;
26172621

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1562,6 +1562,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
15621562
// \returns true if the target has V_PK_{MIN|MAX}3_{I|U}16 instructions.
15631563
bool hasPkMinMax3Insts() const { return GFX1250Insts; }
15641564

1565+
// \returns ture if target has S_GET_SHADER_CYCLES_U64 instruction.
1566+
bool hasSGetShaderCyclesInst() const { return GFX1250Insts; }
1567+
15651568
// \returns true if target has S_SETPRIO_INC_WG instruction.
15661569
bool hasSetPrioIncWgInst() const { return HasSetPrioIncWgInst; }
15671570

llvm/lib/Target/AMDGPU/SOPInstructions.td

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1653,6 +1653,12 @@ def S_SETPRIO_INC_WG : SOPP_Pseudo <"s_setprio_inc_wg", (ins i16imm:$simm16), "$
16531653
let SubtargetPredicate = HasSetPrioIncWgInst;
16541654
}
16551655

1656+
def S_GET_SHADER_CYCLES_U64 : SOP1_64_0 <"s_get_shader_cycles_u64",
1657+
[(set i64:$sdst, (readcyclecounter))]> {
1658+
let SubtargetPredicate = HasSGetShaderCyclesInst;
1659+
let hasSideEffects = 1;
1660+
}
1661+
16561662
let Uses = [EXEC, M0] in {
16571663
def S_SENDMSG : SOPP_Pseudo <"s_sendmsg" , (ins SendMsg:$simm16), "$simm16",
16581664
[(int_amdgcn_s_sendmsg (i32 timm:$simm16), M0)]> {
@@ -2145,6 +2151,7 @@ defm S_ALLOC_VGPR : SOP1_Real_gfx12<0x053>;
21452151
defm S_SLEEP_VAR : SOP1_IMM_Real_gfx12<0x058>;
21462152

21472153
// GFX1250
2154+
defm S_GET_SHADER_CYCLES_U64 : SOP1_Real_gfx12<0x06>;
21482155
defm S_ADD_PC_I64 : SOP1_Real_gfx12<0x04b>;
21492156

21502157
//===----------------------------------------------------------------------===//

llvm/test/CodeGen/AMDGPU/readcyclecounter.ll

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GETREG,GETREG-GISEL -check-prefix=GCN %s
1111
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s
1212
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s
13+
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GCN,GFX1250 %s
14+
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GCN,GFX1250 %s
1315

1416
declare i64 @llvm.readcyclecounter() #0
1517

@@ -21,6 +23,7 @@ declare i64 @llvm.readcyclecounter() #0
2123
; GFX12: s_getreg_b32 [[HI2:s[0-9]+]], hwreg(HW_REG_SHADER_CYCLES_HI)
2224
; GFX12: s_cmp_eq_u32 [[HI1]], [[HI2]]
2325
; GFX12: s_cselect_b32 {{s[0-9]+}}, [[LO1]], 0
26+
; GFX1250: s_get_shader_cycles_u64 s{{\[[0-9]+:[0-9]+\]}}
2427
; GCN-DAG: kmcnt
2528
; MEMTIME: store_dwordx2
2629
; SIVI-NOT: kmcnt
@@ -53,6 +56,7 @@ define amdgpu_kernel void @test_readcyclecounter(ptr addrspace(1) %out) #0 {
5356
; GFX12: s_getreg_b32 [[HI1:s[0-9]+]], hwreg(HW_REG_SHADER_CYCLES_HI)
5457
; GFX12: s_getreg_b32 [[LO1:s[0-9]+]], hwreg(HW_REG_SHADER_CYCLES_LO)
5558
; GFX12: s_getreg_b32 [[HI2:s[0-9]+]], hwreg(HW_REG_SHADER_CYCLES_HI)
59+
; GFX1250: s_get_shader_cycles_u64 s{{\[[0-9]+:[0-9]+\]}}
5660
; GCN-DAG: s_load_{{dword|b32|b64}}
5761
; GETREG-DAG: s_getreg_b32 s{{[0-9]+}}, hwreg(HW_REG_SHADER_CYCLES, 0, 20)
5862
; GFX12: s_cmp_eq_u32 [[HI1]], [[HI2]]

llvm/test/MC/AMDGPU/gfx1250_asm_sop1.s

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,10 @@ s_rfe_i64 s[2:3]
4545
s_rfe_b64 s[2:3]
4646
// GFX1250: s_rfe_i64 s[2:3] ; encoding: [0x02,0x4a,0x80,0xbe]
4747

48+
s_get_shader_cycles_u64 s[2:3]
49+
// GFX1250: s_get_shader_cycles_u64 s[2:3] ; encoding: [0x00,0x06,0x82,0xbe]
50+
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
51+
4852
s_barrier_signal -3
4953
// GFX1250: s_barrier_signal -3 ; encoding: [0xc3,0x4e,0x80,0xbe]
5054

llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sop1.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@
1212
# GFX1250: s_add_pc_i64 s[2:3] ; encoding: [0x02,0x4b,0x80,0xbe]
1313
0x02,0x4b,0x80,0xbe
1414

15+
# GFX1250: s_get_shader_cycles_u64 s[2:3] ; encoding: [0x00,0x06,0x82,0xbe]
16+
0x00,0x06,0x82,0xbe
17+
1518
# GFX1250: s_barrier_signal -3 ; encoding: [0xc3,0x4e,0x80,0xbe]
1619
0xc3,0x4e,0x80,0xbe
1720

0 commit comments

Comments
 (0)