Skip to content

Commit e661957

Browse files
committed
pre-commit tests -- use update_llc_test_checks.py
1 parent 077497d commit e661957

File tree

2 files changed

+277
-116
lines changed

2 files changed

+277
-116
lines changed

llvm/test/CodeGen/NVPTX/ctlz.ll

Lines changed: 115 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
12
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s
23
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
34

@@ -10,67 +11,95 @@ declare i64 @llvm.ctlz.i64(i64, i1) readnone
1011
; There should be no difference between llvm.ctlz.i32(%a, true) and
1112
; llvm.ctlz.i32(%a, false), as ptx's clz(0) is defined to return 0.
1213

13-
; CHECK-LABEL: myctlz(
1414
define i32 @myctlz(i32 %a) {
15-
; CHECK: ld.param.
16-
; CHECK-NEXT: clz.b32
17-
; CHECK-NEXT: st.param.
18-
; CHECK-NEXT: ret;
15+
; CHECK-LABEL: myctlz(
16+
; CHECK: {
17+
; CHECK-NEXT: .reg .b32 %r<3>;
18+
; CHECK-EMPTY:
19+
; CHECK-NEXT: // %bb.0:
20+
; CHECK-NEXT: ld.param.u32 %r1, [myctlz_param_0];
21+
; CHECK-NEXT: clz.b32 %r2, %r1;
22+
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
23+
; CHECK-NEXT: ret;
1924
%val = call i32 @llvm.ctlz.i32(i32 %a, i1 false) readnone
2025
ret i32 %val
2126
}
22-
; CHECK-LABEL: myctlz_2(
2327
define i32 @myctlz_2(i32 %a) {
24-
; CHECK: ld.param.
25-
; CHECK-NEXT: clz.b32
26-
; CHECK-NEXT: st.param.
27-
; CHECK-NEXT: ret;
28+
; CHECK-LABEL: myctlz_2(
29+
; CHECK: {
30+
; CHECK-NEXT: .reg .b32 %r<3>;
31+
; CHECK-EMPTY:
32+
; CHECK-NEXT: // %bb.0:
33+
; CHECK-NEXT: ld.param.u32 %r1, [myctlz_2_param_0];
34+
; CHECK-NEXT: clz.b32 %r2, %r1;
35+
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
36+
; CHECK-NEXT: ret;
2837
%val = call i32 @llvm.ctlz.i32(i32 %a, i1 true) readnone
2938
ret i32 %val
3039
}
3140

3241
; PTX's clz.b64 returns a 32-bit value, but LLVM's intrinsic returns a 64-bit
3342
; value, so here we have to zero-extend it.
34-
; CHECK-LABEL: myctlz64(
3543
define i64 @myctlz64(i64 %a) {
36-
; CHECK: ld.param.
37-
; CHECK-NEXT: clz.b64
38-
; CHECK-NEXT: cvt.u64.u32
39-
; CHECK-NEXT: st.param.
40-
; CHECK-NEXT: ret;
44+
; CHECK-LABEL: myctlz64(
45+
; CHECK: {
46+
; CHECK-NEXT: .reg .b32 %r<2>;
47+
; CHECK-NEXT: .reg .b64 %rd<3>;
48+
; CHECK-EMPTY:
49+
; CHECK-NEXT: // %bb.0:
50+
; CHECK-NEXT: ld.param.u64 %rd1, [myctlz64_param_0];
51+
; CHECK-NEXT: clz.b64 %r1, %rd1;
52+
; CHECK-NEXT: cvt.u64.u32 %rd2, %r1;
53+
; CHECK-NEXT: st.param.b64 [func_retval0], %rd2;
54+
; CHECK-NEXT: ret;
4155
%val = call i64 @llvm.ctlz.i64(i64 %a, i1 false) readnone
4256
ret i64 %val
4357
}
44-
; CHECK-LABEL: myctlz64_2(
4558
define i64 @myctlz64_2(i64 %a) {
46-
; CHECK: ld.param.
47-
; CHECK-NEXT: clz.b64
48-
; CHECK-NEXT: cvt.u64.u32
49-
; CHECK-NEXT: st.param.
50-
; CHECK-NEXT: ret;
59+
; CHECK-LABEL: myctlz64_2(
60+
; CHECK: {
61+
; CHECK-NEXT: .reg .b32 %r<2>;
62+
; CHECK-NEXT: .reg .b64 %rd<3>;
63+
; CHECK-EMPTY:
64+
; CHECK-NEXT: // %bb.0:
65+
; CHECK-NEXT: ld.param.u64 %rd1, [myctlz64_2_param_0];
66+
; CHECK-NEXT: clz.b64 %r1, %rd1;
67+
; CHECK-NEXT: cvt.u64.u32 %rd2, %r1;
68+
; CHECK-NEXT: st.param.b64 [func_retval0], %rd2;
69+
; CHECK-NEXT: ret;
5170
%val = call i64 @llvm.ctlz.i64(i64 %a, i1 true) readnone
5271
ret i64 %val
5372
}
5473

5574
; Here we truncate the 64-bit value of LLVM's ctlz intrinsic to 32 bits, the
5675
; natural return width of ptx's clz.b64 instruction. No conversions should be
5776
; necessary in the PTX.
58-
; CHECK-LABEL: myctlz64_as_32(
5977
define i32 @myctlz64_as_32(i64 %a) {
60-
; CHECK: ld.param.
61-
; CHECK-NEXT: clz.b64
62-
; CHECK-NEXT: st.param.
63-
; CHECK-NEXT: ret;
78+
; CHECK-LABEL: myctlz64_as_32(
79+
; CHECK: {
80+
; CHECK-NEXT: .reg .b32 %r<2>;
81+
; CHECK-NEXT: .reg .b64 %rd<2>;
82+
; CHECK-EMPTY:
83+
; CHECK-NEXT: // %bb.0:
84+
; CHECK-NEXT: ld.param.u64 %rd1, [myctlz64_as_32_param_0];
85+
; CHECK-NEXT: clz.b64 %r1, %rd1;
86+
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
87+
; CHECK-NEXT: ret;
6488
%val = call i64 @llvm.ctlz.i64(i64 %a, i1 false) readnone
6589
%trunc = trunc i64 %val to i32
6690
ret i32 %trunc
6791
}
68-
; CHECK-LABEL: myctlz64_as_32_2(
6992
define i32 @myctlz64_as_32_2(i64 %a) {
70-
; CHECK: ld.param.
71-
; CHECK-NEXT: clz.b64
72-
; CHECK-NEXT: st.param.
73-
; CHECK-NEXT: ret;
93+
; CHECK-LABEL: myctlz64_as_32_2(
94+
; CHECK: {
95+
; CHECK-NEXT: .reg .b32 %r<2>;
96+
; CHECK-NEXT: .reg .b64 %rd<2>;
97+
; CHECK-EMPTY:
98+
; CHECK-NEXT: // %bb.0:
99+
; CHECK-NEXT: ld.param.u64 %rd1, [myctlz64_as_32_2_param_0];
100+
; CHECK-NEXT: clz.b64 %r1, %rd1;
101+
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
102+
; CHECK-NEXT: ret;
74103
%val = call i64 @llvm.ctlz.i64(i64 %a, i1 false) readnone
75104
%trunc = trunc i64 %val to i32
76105
ret i32 %trunc
@@ -80,53 +109,77 @@ define i32 @myctlz64_as_32_2(i64 %a) {
80109
; and then truncating the result back down to i16. But the NVPTX ABI
81110
; zero-extends i16 return values to i32, so the final truncation doesn't appear
82111
; in this function.
83-
; CHECK-LABEL: myctlz_ret16(
84112
define i16 @myctlz_ret16(i16 %a) {
85-
; CHECK: ld.param.
86-
; CHECK-NEXT: cvt.u32.u16
87-
; CHECK-NEXT: clz.b32
88-
; CHECK-NEXT: sub.
89-
; CHECK-NEXT: st.param.
90-
; CHECK-NEXT: ret;
113+
; CHECK-LABEL: myctlz_ret16(
114+
; CHECK: {
115+
; CHECK-NEXT: .reg .b16 %rs<2>;
116+
; CHECK-NEXT: .reg .b32 %r<4>;
117+
; CHECK-EMPTY:
118+
; CHECK-NEXT: // %bb.0:
119+
; CHECK-NEXT: ld.param.u16 %rs1, [myctlz_ret16_param_0];
120+
; CHECK-NEXT: cvt.u32.u16 %r1, %rs1;
121+
; CHECK-NEXT: clz.b32 %r2, %r1;
122+
; CHECK-NEXT: sub.s32 %r3, %r2, 16;
123+
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
124+
; CHECK-NEXT: ret;
91125
%val = call i16 @llvm.ctlz.i16(i16 %a, i1 false) readnone
92126
ret i16 %val
93127
}
94-
; CHECK-LABEL: myctlz_ret16_2(
95128
define i16 @myctlz_ret16_2(i16 %a) {
96-
; CHECK: ld.param.
97-
; CHECK-NEXT: cvt.u32.u16
98-
; CHECK-NEXT: clz.b32
99-
; CHECK-NEXT: sub.
100-
; CHECK-NEXT: st.param.
101-
; CHECK-NEXT: ret;
129+
; CHECK-LABEL: myctlz_ret16_2(
130+
; CHECK: {
131+
; CHECK-NEXT: .reg .b16 %rs<2>;
132+
; CHECK-NEXT: .reg .b32 %r<4>;
133+
; CHECK-EMPTY:
134+
; CHECK-NEXT: // %bb.0:
135+
; CHECK-NEXT: ld.param.u16 %rs1, [myctlz_ret16_2_param_0];
136+
; CHECK-NEXT: cvt.u32.u16 %r1, %rs1;
137+
; CHECK-NEXT: clz.b32 %r2, %r1;
138+
; CHECK-NEXT: sub.s32 %r3, %r2, 16;
139+
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
140+
; CHECK-NEXT: ret;
102141
%val = call i16 @llvm.ctlz.i16(i16 %a, i1 true) readnone
103142
ret i16 %val
104143
}
105144

106145
; Here we store the result of ctlz.16 into an i16 pointer, so the trunc should
107146
; remain.
108-
; CHECK-LABEL: myctlz_store16(
109147
define void @myctlz_store16(i16 %a, ptr %b) {
110-
; CHECK: ld.param.
111-
; CHECK-NEXT: cvt.u32.u16
112-
; CHECK-NEXT: clz.b32
113-
; CHECK-DAG: cvt.u16.u32
114-
; CHECK-DAG: sub.
115-
; CHECK: st.{{[a-z]}}16
116-
; CHECK: ret;
148+
; CHECK-LABEL: myctlz_store16(
149+
; CHECK: {
150+
; CHECK-NEXT: .reg .b16 %rs<4>;
151+
; CHECK-NEXT: .reg .b32 %r<3>;
152+
; CHECK-NEXT: .reg .b64 %rd<2>;
153+
; CHECK-EMPTY:
154+
; CHECK-NEXT: // %bb.0:
155+
; CHECK-NEXT: ld.param.u16 %rs1, [myctlz_store16_param_0];
156+
; CHECK-NEXT: cvt.u32.u16 %r1, %rs1;
157+
; CHECK-NEXT: clz.b32 %r2, %r1;
158+
; CHECK-NEXT: cvt.u16.u32 %rs2, %r2;
159+
; CHECK-NEXT: sub.s16 %rs3, %rs2, 16;
160+
; CHECK-NEXT: ld.param.u64 %rd1, [myctlz_store16_param_1];
161+
; CHECK-NEXT: st.u16 [%rd1], %rs3;
162+
; CHECK-NEXT: ret;
117163
%val = call i16 @llvm.ctlz.i16(i16 %a, i1 false) readnone
118164
store i16 %val, ptr %b
119165
ret void
120166
}
121-
; CHECK-LABEL: myctlz_store16_2(
122167
define void @myctlz_store16_2(i16 %a, ptr %b) {
123-
; CHECK: ld.param.
124-
; CHECK-NEXT: cvt.u32.u16
125-
; CHECK-NEXT: clz.b32
126-
; CHECK-DAG: cvt.u16.u32
127-
; CHECK-DAG: sub.
128-
; CHECK: st.{{[a-z]}}16
129-
; CHECK: ret;
168+
; CHECK-LABEL: myctlz_store16_2(
169+
; CHECK: {
170+
; CHECK-NEXT: .reg .b16 %rs<4>;
171+
; CHECK-NEXT: .reg .b32 %r<3>;
172+
; CHECK-NEXT: .reg .b64 %rd<2>;
173+
; CHECK-EMPTY:
174+
; CHECK-NEXT: // %bb.0:
175+
; CHECK-NEXT: ld.param.u16 %rs1, [myctlz_store16_2_param_0];
176+
; CHECK-NEXT: cvt.u32.u16 %r1, %rs1;
177+
; CHECK-NEXT: clz.b32 %r2, %r1;
178+
; CHECK-NEXT: cvt.u16.u32 %rs2, %r2;
179+
; CHECK-NEXT: sub.s16 %rs3, %rs2, 16;
180+
; CHECK-NEXT: ld.param.u64 %rd1, [myctlz_store16_2_param_1];
181+
; CHECK-NEXT: st.u16 [%rd1], %rs3;
182+
; CHECK-NEXT: ret;
130183
%val = call i16 @llvm.ctlz.i16(i16 %a, i1 false) readnone
131184
store i16 %val, ptr %b
132185
ret void

0 commit comments

Comments
 (0)