1+ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
12; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefix=SM20
23; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_30 -verify-machineinstrs | FileCheck %s --check-prefix=SM30
34; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
@@ -9,62 +10,142 @@ target triple = "nvptx-unknown-cuda"
910declare { float , float , float , float } @llvm.nvvm.tex.unified.1d.v4f32.s32 (i64 , i32 )
1011declare i64 @llvm.nvvm.texsurf.handle.internal.p1 (ptr addrspace (1 ))
1112
12- ; SM20-LABEL: .entry foo
13- ; SM30-LABEL: .entry foo
1413define void @foo (i64 %img , ptr %red , i32 %idx ) {
15- ; SM20: ld.param.u64 %rd[[TEXREG:[0-9]+]], [foo_param_0];
16- ; SM20: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [%rd[[TEXREG]], {%r{{[0-9]+}}}]
17- ; SM30: ld.param.u64 %rd[[TEXREG:[0-9]+]], [foo_param_0];
18- ; SM30: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [%rd[[TEXREG]], {%r{{[0-9]+}}}]
14+ ; SM20-LABEL: foo(
15+ ; SM20: {
16+ ; SM20-NEXT: .reg .b32 %r<2>;
17+ ; SM20-NEXT: .reg .f32 %f<5>;
18+ ; SM20-NEXT: .reg .b64 %rd<4>;
19+ ; SM20-EMPTY:
20+ ; SM20-NEXT: // %bb.0:
21+ ; SM20-NEXT: ld.param.u64 %rd1, [foo_param_0];
22+ ; SM20-NEXT: ld.param.u64 %rd2, [foo_param_1];
23+ ; SM20-NEXT: cvta.to.global.u64 %rd3, %rd2;
24+ ; SM20-NEXT: ld.param.u32 %r1, [foo_param_2];
25+ ; SM20-NEXT: tex.1d.v4.f32.s32 {%f1, %f2, %f3, %f4}, [%rd1, {%r1}];
26+ ; SM20-NEXT: st.global.f32 [%rd3], %f1;
27+ ; SM20-NEXT: ret;
28+ ;
29+ ; SM30-LABEL: foo(
30+ ; SM30: {
31+ ; SM30-NEXT: .reg .b32 %r<2>;
32+ ; SM30-NEXT: .reg .f32 %f<5>;
33+ ; SM30-NEXT: .reg .b64 %rd<4>;
34+ ; SM30-EMPTY:
35+ ; SM30-NEXT: // %bb.0:
36+ ; SM30-NEXT: ld.param.u64 %rd1, [foo_param_0];
37+ ; SM30-NEXT: ld.param.u64 %rd2, [foo_param_1];
38+ ; SM30-NEXT: cvta.to.global.u64 %rd3, %rd2;
39+ ; SM30-NEXT: ld.param.u32 %r1, [foo_param_2];
40+ ; SM30-NEXT: tex.1d.v4.f32.s32 {%f1, %f2, %f3, %f4}, [%rd1, {%r1}];
41+ ; SM30-NEXT: st.global.f32 [%rd3], %f1;
42+ ; SM30-NEXT: ret;
1943 %val = tail call { float , float , float , float } @llvm.nvvm.tex.unified.1d.v4f32.s32 (i64 %img , i32 %idx )
2044 %ret = extractvalue { float , float , float , float } %val , 0
21- ; SM20: st.global.f32 [%rd{{[0-9]+}}], %f[[RED]]
22- ; SM30: st.global.f32 [%rd{{[0-9]+}}], %f[[RED]]
2345 store float %ret , ptr %red
2446 ret void
2547}
2648
2749
2850@tex0 = internal addrspace (1 ) global i64 0 , align 8
2951
30- ; SM20-LABEL: .entry bar
31- ; SM30-LABEL: .entry bar
3252define void @bar (ptr %red , i32 %idx ) {
33- ; SM30: mov.u64 %rd[[TEXHANDLE:[0-9]+]], tex0
53+ ; SM20-LABEL: bar(
54+ ; SM20: {
55+ ; SM20-NEXT: .reg .b32 %r<2>;
56+ ; SM20-NEXT: .reg .f32 %f<5>;
57+ ; SM20-NEXT: .reg .b64 %rd<4>;
58+ ; SM20-EMPTY:
59+ ; SM20-NEXT: // %bb.0:
60+ ; SM20-NEXT: ld.param.u64 %rd1, [bar_param_0];
61+ ; SM20-NEXT: cvta.to.global.u64 %rd2, %rd1;
62+ ; SM20-NEXT: ld.param.u32 %r1, [bar_param_1];
63+ ; SM20-NEXT: tex.1d.v4.f32.s32 {%f1, %f2, %f3, %f4}, [tex0, {%r1}];
64+ ; SM20-NEXT: st.global.f32 [%rd2], %f1;
65+ ; SM20-NEXT: ret;
66+ ;
67+ ; SM30-LABEL: bar(
68+ ; SM30: {
69+ ; SM30-NEXT: .reg .b32 %r<2>;
70+ ; SM30-NEXT: .reg .f32 %f<5>;
71+ ; SM30-NEXT: .reg .b64 %rd<4>;
72+ ; SM30-EMPTY:
73+ ; SM30-NEXT: // %bb.0:
74+ ; SM30-NEXT: ld.param.u64 %rd1, [bar_param_0];
75+ ; SM30-NEXT: cvta.to.global.u64 %rd2, %rd1;
76+ ; SM30-NEXT: ld.param.u32 %r1, [bar_param_1];
77+ ; SM30-NEXT: mov.u64 %rd3, tex0;
78+ ; SM30-NEXT: tex.1d.v4.f32.s32 {%f1, %f2, %f3, %f4}, [%rd3, {%r1}];
79+ ; SM30-NEXT: st.global.f32 [%rd2], %f1;
80+ ; SM30-NEXT: ret;
3481 %texHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1 (ptr addrspace (1 ) @tex0 )
35- ; SM20: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [tex0, {%r{{[0-9]+}}}]
36- ; SM30: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [%rd[[TEXHANDLE]], {%r{{[0-9]+}}}]
3782 %val = tail call { float , float , float , float } @llvm.nvvm.tex.unified.1d.v4f32.s32 (i64 %texHandle , i32 %idx )
3883 %ret = extractvalue { float , float , float , float } %val , 0
39- ; SM20: st.global.f32 [%rd{{[0-9]+}}], %f[[RED]]
40- ; SM30: st.global.f32 [%rd{{[0-9]+}}], %f[[RED]]
4184 store float %ret , ptr %red
4285 ret void
4386}
4487
4588declare float @texfunc (i64 )
4689
47- ; SM20-LABEL: .entry baz
48- ; SM30-LABEL: .entry baz
4990define void @baz (ptr %red , i32 %idx ) {
50- ; SM30: mov.u64 %rd[[TEXHANDLE:[0-9]+]], tex0
91+ ; SM20-LABEL: baz(
92+ ; SM20: {
93+ ; SM20-NEXT: .reg .b32 %r<2>;
94+ ; SM20-NEXT: .reg .f32 %f<8>;
95+ ; SM20-NEXT: .reg .b64 %rd<4>;
96+ ; SM20-EMPTY:
97+ ; SM20-NEXT: // %bb.0:
98+ ; SM20-NEXT: ld.param.u64 %rd1, [baz_param_0];
99+ ; SM20-NEXT: cvta.to.global.u64 %rd2, %rd1;
100+ ; SM20-NEXT: ld.param.u32 %r1, [baz_param_1];
101+ ; SM20-NEXT: mov.u64 %rd3, tex0;
102+ ; SM20-NEXT: tex.1d.v4.f32.s32 {%f1, %f2, %f3, %f4}, [tex0, {%r1}];
103+ ; SM20-NEXT: { // callseq 0, 0
104+ ; SM20-NEXT: .param .b64 param0;
105+ ; SM20-NEXT: st.param.b64 [param0], %rd3;
106+ ; SM20-NEXT: .param .b32 retval0;
107+ ; SM20-NEXT: call.uni (retval0),
108+ ; SM20-NEXT: texfunc,
109+ ; SM20-NEXT: (
110+ ; SM20-NEXT: param0
111+ ; SM20-NEXT: );
112+ ; SM20-NEXT: ld.param.f32 %f5, [retval0];
113+ ; SM20-NEXT: } // callseq 0
114+ ; SM20-NEXT: add.rn.f32 %f7, %f1, %f5;
115+ ; SM20-NEXT: st.global.f32 [%rd2], %f7;
116+ ; SM20-NEXT: ret;
117+ ;
118+ ; SM30-LABEL: baz(
119+ ; SM30: {
120+ ; SM30-NEXT: .reg .b32 %r<2>;
121+ ; SM30-NEXT: .reg .f32 %f<8>;
122+ ; SM30-NEXT: .reg .b64 %rd<4>;
123+ ; SM30-EMPTY:
124+ ; SM30-NEXT: // %bb.0:
125+ ; SM30-NEXT: ld.param.u64 %rd1, [baz_param_0];
126+ ; SM30-NEXT: cvta.to.global.u64 %rd2, %rd1;
127+ ; SM30-NEXT: ld.param.u32 %r1, [baz_param_1];
128+ ; SM30-NEXT: mov.u64 %rd3, tex0;
129+ ; SM30-NEXT: tex.1d.v4.f32.s32 {%f1, %f2, %f3, %f4}, [%rd3, {%r1}];
130+ ; SM30-NEXT: { // callseq 0, 0
131+ ; SM30-NEXT: .param .b64 param0;
132+ ; SM30-NEXT: st.param.b64 [param0], %rd3;
133+ ; SM30-NEXT: .param .b32 retval0;
134+ ; SM30-NEXT: call.uni (retval0),
135+ ; SM30-NEXT: texfunc,
136+ ; SM30-NEXT: (
137+ ; SM30-NEXT: param0
138+ ; SM30-NEXT: );
139+ ; SM30-NEXT: ld.param.f32 %f5, [retval0];
140+ ; SM30-NEXT: } // callseq 0
141+ ; SM30-NEXT: add.rn.f32 %f7, %f1, %f5;
142+ ; SM30-NEXT: st.global.f32 [%rd2], %f7;
143+ ; SM30-NEXT: ret;
51144 %texHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1 (ptr addrspace (1 ) @tex0 )
52- ; SM20: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [tex0, {%r{{[0-9]+}}}]
53- ; SM30: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [%rd[[TEXHANDLE]], {%r{{[0-9]+}}}]
54145 %val = tail call { float , float , float , float } @llvm.nvvm.tex.unified.1d.v4f32.s32 (i64 %texHandle , i32 %idx )
55146 %ret = extractvalue { float , float , float , float } %val , 0
56- ; SM20: call.uni ([[RETVAL:.*]]),
57- ; SM30: call.uni ([[RETVAL:.*]]),
58- ; SM20: texfunc,
59- ; SM30: texfunc,
60147 %texcall = tail call float @texfunc (i64 %texHandle )
61- ; SM20: ld.param.f32 %f[[TEXCALL:[0-9]+]], [[[RETVAL]]]
62- ; SM30: ld.param.f32 %f[[TEXCALL:[0-9]+]], [[[RETVAL]]]
63- ; SM20: add.rn.f32 %f[[RET2:[0-9]+]], %f[[RED]], %f[[TEXCALL]]
64- ; SM30: add.rn.f32 %f[[RET2:[0-9]+]], %f[[RED]], %f[[TEXCALL]]
65148 %ret2 = fadd float %ret , %texcall
66- ; SM20: st.global.f32 [%rd{{[0-9]+}}], %f[[RET2]]
67- ; SM30: st.global.f32 [%rd{{[0-9]+}}], %f[[RET2]]
68149 store float %ret2 , ptr %red
69150 ret void
70151}
0 commit comments