Skip to content

Commit 0263c15

Browse files
[MLIR][NVVM] Add globaltimer_lo support in NVVM Dialect and NVPTX backend (#154672)
This patch adds support for reading the global timer low register in the NVVM dialect and NVPTX backend. This change includes adding the `NVVM_GlobalTimerLoOp` operation to NVVM dialect and `int_nvvm_read_ptx_sreg_globaltimer_lo` intrinsic to the NVPTX backend. All the lit tests have been added.
1 parent f93f6e5 commit 0263c15

File tree

5 files changed

+67
-44
lines changed

5 files changed

+67
-44
lines changed

llvm/include/llvm/IR/IntrinsicsNVVM.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1943,6 +1943,7 @@ def int_nvvm_read_ptx_sreg_clock : PTXReadNCSRegIntrinsic<llvm_i32_ty>;
19431943
def int_nvvm_read_ptx_sreg_clock64 : PTXReadNCSRegIntrinsic<llvm_i64_ty>;
19441944

19451945
def int_nvvm_read_ptx_sreg_globaltimer : PTXReadNCSRegIntrinsic<llvm_i64_ty>;
1946+
def int_nvvm_read_ptx_sreg_globaltimer_lo : PTXReadNCSRegIntrinsic<llvm_i32_ty>;
19461947

19471948
def int_nvvm_read_ptx_sreg_pm0 : PTXReadNCSRegIntrinsic<llvm_i32_ty>;
19481949
def int_nvvm_read_ptx_sreg_pm1 : PTXReadNCSRegIntrinsic<llvm_i32_ty>;

llvm/lib/Target/NVPTX/NVPTXIntrinsics.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4358,10 +4358,12 @@ let hasSideEffects = 1 in {
43584358
def SREG_CLOCK : PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>;
43594359
def SREG_CLOCK64 : PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>;
43604360
def SREG_GLOBALTIMER : PTX_READ_SREG_R64<"globaltimer", int_nvvm_read_ptx_sreg_globaltimer>;
4361+
def SREG_GLOBALTIMER_LO : PTX_READ_SREG_R32<"globaltimer_lo", int_nvvm_read_ptx_sreg_globaltimer_lo>;
43614362
}
43624363

43634364
def: Pat <(i64 (readcyclecounter)), (SREG_CLOCK64)>;
43644365
def: Pat <(i64 (readsteadycounter)), (SREG_GLOBALTIMER)>;
4366+
def: Pat <(i32 (readsteadycounter)), (SREG_GLOBALTIMER_LO)>;
43654367

43664368
def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>;
43674369
def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>;

llvm/test/CodeGen/NVPTX/intrinsics.ll

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,23 @@ define i64 @test_globaltimer() {
267267
ret i64 %ret
268268
}
269269

270+
define i32 @test_globaltimer_lo(){
271+
; CHECK-LABEL: test_globaltimer_lo(
272+
; CHECK: {
273+
; CHECK-NEXT: .reg .b32 %r<4>;
274+
; CHECK-EMPTY:
275+
; CHECK-NEXT: // %bb.0:
276+
; CHECK-NEXT: mov.u32 %r1, %globaltimer_lo;
277+
; CHECK-NEXT: mov.u32 %r2, %globaltimer_lo;
278+
; CHECK-NEXT: add.s32 %r3, %r1, %r2;
279+
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
280+
; CHECK-NEXT: ret;
281+
%a = tail call i32 @llvm.nvvm.read.ptx.sreg.globaltimer.lo()
282+
%b = tail call i32 @llvm.nvvm.read.ptx.sreg.globaltimer.lo()
283+
%ret = add i32 %a, %b
284+
ret i32 %ret
285+
}
286+
270287
define i64 @test_cyclecounter() {
271288
; CHECK-LABEL: test_cyclecounter(
272289
; CHECK: {

mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,7 @@ def NVVM_ClusterDim : NVVM_PureSpecialRangeableRegisterOp<"read.ptx.sreg.cluster
267267
def NVVM_ClockOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.clock">;
268268
def NVVM_Clock64Op : NVVM_SpecialRegisterOp<"read.ptx.sreg.clock64">;
269269
def NVVM_GlobalTimerOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.globaltimer">;
270+
def NVVM_GlobalTimerLoOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.globaltimer.lo">;
270271

271272
//===----------------------------------------------------------------------===//
272273
// envreg registers

mlir/test/Target/LLVMIR/nvvmir.mlir

Lines changed: 46 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -64,92 +64,94 @@ llvm.func @nvvm_special_regs() -> i32 {
6464
%30 = nvvm.read.ptx.sreg.clock64 : i64
6565
// CHECK: call i64 @llvm.nvvm.read.ptx.sreg.globaltimer
6666
%31 = nvvm.read.ptx.sreg.globaltimer : i64
67-
// CHECK: %32 = call range(i32 0, 64) i32 @llvm.nvvm.read.ptx.sreg.tid.x()
68-
%32 = nvvm.read.ptx.sreg.tid.x range <i32, 0, 64> : i32
67+
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.globaltimer.lo()
68+
%32 = nvvm.read.ptx.sreg.globaltimer.lo : i32
69+
// CHECK: %33 = call range(i32 0, 64) i32 @llvm.nvvm.read.ptx.sreg.tid.x()
70+
%33 = nvvm.read.ptx.sreg.tid.x range <i32, 0, 64> : i32
6971
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.warpid
70-
%33 = nvvm.read.ptx.sreg.warpid : i32
72+
%34 = nvvm.read.ptx.sreg.warpid : i32
7173
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nwarpid
72-
%34 = nvvm.read.ptx.sreg.nwarpid : i32
74+
%35 = nvvm.read.ptx.sreg.nwarpid : i32
7375
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.smid
74-
%35 = nvvm.read.ptx.sreg.smid : i32
76+
%36 = nvvm.read.ptx.sreg.smid : i32
7577
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nsmid
76-
%36 = nvvm.read.ptx.sreg.nsmid : i32
78+
%37 = nvvm.read.ptx.sreg.nsmid : i32
7779
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.gridid
78-
%37 = nvvm.read.ptx.sreg.gridid : i32
80+
%38 = nvvm.read.ptx.sreg.gridid : i32
7981
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg0
80-
%38 = nvvm.read.ptx.sreg.envreg0 : i32
82+
%39 = nvvm.read.ptx.sreg.envreg0 : i32
8183
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg1
82-
%39 = nvvm.read.ptx.sreg.envreg1 : i32
84+
%40 = nvvm.read.ptx.sreg.envreg1 : i32
8385
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg2
84-
%40 = nvvm.read.ptx.sreg.envreg2 : i32
86+
%41 = nvvm.read.ptx.sreg.envreg2 : i32
8587
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg3
86-
%41 = nvvm.read.ptx.sreg.envreg3 : i32
88+
%42 = nvvm.read.ptx.sreg.envreg3 : i32
8789
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg4
88-
%42 = nvvm.read.ptx.sreg.envreg4 : i32
90+
%43 = nvvm.read.ptx.sreg.envreg4 : i32
8991
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg5
90-
%43 = nvvm.read.ptx.sreg.envreg5 : i32
92+
%44 = nvvm.read.ptx.sreg.envreg5 : i32
9193
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg6
92-
%44 = nvvm.read.ptx.sreg.envreg6 : i32
94+
%45 = nvvm.read.ptx.sreg.envreg6 : i32
9395
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg7
94-
%45 = nvvm.read.ptx.sreg.envreg7 : i32
96+
%46 = nvvm.read.ptx.sreg.envreg7 : i32
9597
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg8
96-
%46 = nvvm.read.ptx.sreg.envreg8 : i32
98+
%47 = nvvm.read.ptx.sreg.envreg8 : i32
9799
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg9
98-
%47 = nvvm.read.ptx.sreg.envreg9 : i32
100+
%48 = nvvm.read.ptx.sreg.envreg9 : i32
99101
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg10
100-
%48 = nvvm.read.ptx.sreg.envreg10 : i32
102+
%49 = nvvm.read.ptx.sreg.envreg10 : i32
101103
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg11
102-
%49 = nvvm.read.ptx.sreg.envreg11 : i32
104+
%50 = nvvm.read.ptx.sreg.envreg11 : i32
103105
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg12
104-
%50 = nvvm.read.ptx.sreg.envreg12 : i32
106+
%51 = nvvm.read.ptx.sreg.envreg12 : i32
105107
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg13
106-
%51 = nvvm.read.ptx.sreg.envreg13 : i32
108+
%52 = nvvm.read.ptx.sreg.envreg13 : i32
107109
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg14
108-
%52 = nvvm.read.ptx.sreg.envreg14 : i32
110+
%53 = nvvm.read.ptx.sreg.envreg14 : i32
109111
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg15
110-
%53 = nvvm.read.ptx.sreg.envreg15 : i32
112+
%54 = nvvm.read.ptx.sreg.envreg15 : i32
111113
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg16
112-
%54 = nvvm.read.ptx.sreg.envreg16 : i32
114+
%55 = nvvm.read.ptx.sreg.envreg16 : i32
113115
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg17
114-
%55 = nvvm.read.ptx.sreg.envreg17 : i32
116+
%56 = nvvm.read.ptx.sreg.envreg17 : i32
115117
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg18
116-
%56 = nvvm.read.ptx.sreg.envreg18 : i32
118+
%57 = nvvm.read.ptx.sreg.envreg18 : i32
117119
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg19
118-
%57 = nvvm.read.ptx.sreg.envreg19 : i32
120+
%58 = nvvm.read.ptx.sreg.envreg19 : i32
119121
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg20
120-
%58 = nvvm.read.ptx.sreg.envreg20 : i32
122+
%59 = nvvm.read.ptx.sreg.envreg20 : i32
121123
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg21
122-
%59 = nvvm.read.ptx.sreg.envreg21 : i32
124+
%60 = nvvm.read.ptx.sreg.envreg21 : i32
123125
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg22
124-
%60 = nvvm.read.ptx.sreg.envreg22 : i32
126+
%61 = nvvm.read.ptx.sreg.envreg22 : i32
125127
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg23
126-
%61 = nvvm.read.ptx.sreg.envreg23 : i32
128+
%62 = nvvm.read.ptx.sreg.envreg23 : i32
127129
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg24
128-
%62 = nvvm.read.ptx.sreg.envreg24 : i32
130+
%63 = nvvm.read.ptx.sreg.envreg24 : i32
129131
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg25
130-
%63 = nvvm.read.ptx.sreg.envreg25 : i32
132+
%64 = nvvm.read.ptx.sreg.envreg25 : i32
131133
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg26
132-
%64 = nvvm.read.ptx.sreg.envreg26 : i32
134+
%65 = nvvm.read.ptx.sreg.envreg26 : i32
133135
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg27
134-
%65 = nvvm.read.ptx.sreg.envreg27 : i32
136+
%66 = nvvm.read.ptx.sreg.envreg27 : i32
135137
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg28
136-
%66 = nvvm.read.ptx.sreg.envreg28 : i32
138+
%67 = nvvm.read.ptx.sreg.envreg28 : i32
137139
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg29
138-
%67 = nvvm.read.ptx.sreg.envreg29 : i32
140+
%68 = nvvm.read.ptx.sreg.envreg29 : i32
139141
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg30
140-
%68 = nvvm.read.ptx.sreg.envreg30 : i32
142+
%69 = nvvm.read.ptx.sreg.envreg30 : i32
141143
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg31
142-
%69 = nvvm.read.ptx.sreg.envreg31 : i32
144+
%70 = nvvm.read.ptx.sreg.envreg31 : i32
143145
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.eq
144-
%70 = nvvm.read.ptx.sreg.lanemask.eq : i32
146+
%71 = nvvm.read.ptx.sreg.lanemask.eq : i32
145147
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.le
146-
%71 = nvvm.read.ptx.sreg.lanemask.le : i32
148+
%72 = nvvm.read.ptx.sreg.lanemask.le : i32
147149
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.lt
148-
%72 = nvvm.read.ptx.sreg.lanemask.lt : i32
150+
%73 = nvvm.read.ptx.sreg.lanemask.lt : i32
149151
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.ge
150-
%73 = nvvm.read.ptx.sreg.lanemask.ge : i32
152+
%74 = nvvm.read.ptx.sreg.lanemask.ge : i32
151153
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.gt
152-
%74 = nvvm.read.ptx.sreg.lanemask.gt : i32
154+
%75 = nvvm.read.ptx.sreg.lanemask.gt : i32
153155
llvm.return %1 : i32
154156
}
155157

0 commit comments

Comments
 (0)