1- ; RUN: llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_80 | FileCheck %s -check-prefixes=ALL,NOPTRCONV,CLS64
2- ; RUN: llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_80 --nvptx-short-ptr | FileCheck %s -check-prefixes=ALL,PTRCONV,CLS64
3- ; RUN: %if ptxas-12.8 %{ llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_80 | %ptxas-verify %}
4- ; RUN: %if ptxas-12.8 %{ llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_80 --nvptx-short-ptr | %ptxas-verify %}
1+ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+ ; RUN: llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx78 | FileCheck %s -check-prefixes=NOPTRCONV
3+ ; RUN: llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx78 --nvptx-short-ptr | FileCheck %s -check-prefixes=PTRCONV
4+ ; RUN: %if ptxas-12.8 %{ llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx78 | %ptxas-verify %}
5+ ; RUN: %if ptxas-12.8 %{ llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx78 --nvptx-short-ptr | %ptxas-verify %}
56
67; ALL-LABEL: conv_shared_cluster_to_generic
78define i32 @conv_shared_cluster_to_generic (ptr addrspace (7 ) %ptr ) {
89; CLS32: cvta.shared::cluster.u32
9- ; PTRCONV: cvt.u64.u32
10- ; NOPTRCONV-NOT: cvt.u64.u32
11- ; CLS64: cvta.shared::cluster.u64
12- ; ALL: ld.u32
10+ ; NOPTRCONV-LABEL: conv_shared_cluster_to_generic(
11+ ; NOPTRCONV: {
12+ ; NOPTRCONV-NEXT: .reg .b32 %r<2>;
13+ ; NOPTRCONV-NEXT: .reg .b64 %rd<3>;
14+ ; NOPTRCONV-EMPTY:
15+ ; NOPTRCONV-NEXT: // %bb.0:
16+ ; NOPTRCONV-NEXT: ld.param.u64 %rd1, [conv_shared_cluster_to_generic_param_0];
17+ ; NOPTRCONV-NEXT: cvta.shared::cluster.u64 %rd2, %rd1;
18+ ; NOPTRCONV-NEXT: ld.u32 %r1, [%rd2];
19+ ; NOPTRCONV-NEXT: st.param.b32 [func_retval0], %r1;
20+ ; NOPTRCONV-NEXT: ret;
21+ ;
22+ ; PTRCONV-LABEL: conv_shared_cluster_to_generic(
23+ ; PTRCONV: {
24+ ; PTRCONV-NEXT: .reg .b32 %r<3>;
25+ ; PTRCONV-NEXT: .reg .b64 %rd<3>;
26+ ; PTRCONV-EMPTY:
27+ ; PTRCONV-NEXT: // %bb.0:
28+ ; PTRCONV-NEXT: ld.param.u32 %r1, [conv_shared_cluster_to_generic_param_0];
29+ ; PTRCONV-NEXT: cvt.u64.u32 %rd1, %r1;
30+ ; PTRCONV-NEXT: cvta.shared::cluster.u64 %rd2, %rd1;
31+ ; PTRCONV-NEXT: ld.u32 %r2, [%rd2];
32+ ; PTRCONV-NEXT: st.param.b32 [func_retval0], %r2;
33+ ; PTRCONV-NEXT: ret;
1334 %genptr = addrspacecast ptr addrspace (7 ) %ptr to ptr
1435 %val = load i32 , ptr %genptr
1536 ret i32 %val
@@ -18,31 +39,99 @@ define i32 @conv_shared_cluster_to_generic(ptr addrspace(7) %ptr) {
1839; ALL-LABEL: conv_generic_to_shared_cluster
1940define i32 @conv_generic_to_shared_cluster (ptr %ptr ) {
2041; CLS32: cvta.to.shared::cluster.u32
21- ; CLS64: cvta.to.shared::cluster.u64
22- ; PTRCONV: cvt.u32.u64
23- ; NOPTRCONV-NOT: cvt.u32.u64
24- ; ALL: ld.shared::cluster.u32
42+ ; NOPTRCONV-LABEL: conv_generic_to_shared_cluster(
43+ ; NOPTRCONV: {
44+ ; NOPTRCONV-NEXT: .reg .b32 %r<2>;
45+ ; NOPTRCONV-NEXT: .reg .b64 %rd<3>;
46+ ; NOPTRCONV-EMPTY:
47+ ; NOPTRCONV-NEXT: // %bb.0:
48+ ; NOPTRCONV-NEXT: ld.param.u64 %rd1, [conv_generic_to_shared_cluster_param_0];
49+ ; NOPTRCONV-NEXT: cvta.to.shared::cluster.u64 %rd2, %rd1;
50+ ; NOPTRCONV-NEXT: ld.shared::cluster.u32 %r1, [%rd2];
51+ ; NOPTRCONV-NEXT: st.param.b32 [func_retval0], %r1;
52+ ; NOPTRCONV-NEXT: ret;
53+ ;
54+ ; PTRCONV-LABEL: conv_generic_to_shared_cluster(
55+ ; PTRCONV: {
56+ ; PTRCONV-NEXT: .reg .b32 %r<3>;
57+ ; PTRCONV-NEXT: .reg .b64 %rd<3>;
58+ ; PTRCONV-EMPTY:
59+ ; PTRCONV-NEXT: // %bb.0:
60+ ; PTRCONV-NEXT: ld.param.u64 %rd1, [conv_generic_to_shared_cluster_param_0];
61+ ; PTRCONV-NEXT: cvta.to.shared::cluster.u64 %rd2, %rd1;
62+ ; PTRCONV-NEXT: cvt.u32.u64 %r1, %rd2;
63+ ; PTRCONV-NEXT: ld.shared::cluster.u32 %r2, [%r1];
64+ ; PTRCONV-NEXT: st.param.b32 [func_retval0], %r2;
65+ ; PTRCONV-NEXT: ret;
2566 %specptr = addrspacecast ptr %ptr to ptr addrspace (7 )
2667 %val = load i32 , ptr addrspace (7 ) %specptr
2768 ret i32 %val
2869}
2970
3071; ALL-LABEL: conv_shared_to_shared_cluster
3172define i32 @conv_shared_to_shared_cluster (ptr addrspace (3 ) %ptr ) {
32- ; CLS64: cvta.shared.u64
33- ; CLS64: cvta.to.shared::cluster.u64
34- ; ALL: ld.shared::cluster.u32
73+ ; NOPTRCONV-LABEL: conv_shared_to_shared_cluster(
74+ ; NOPTRCONV: {
75+ ; NOPTRCONV-NEXT: .reg .b32 %r<2>;
76+ ; NOPTRCONV-NEXT: .reg .b64 %rd<4>;
77+ ; NOPTRCONV-EMPTY:
78+ ; NOPTRCONV-NEXT: // %bb.0:
79+ ; NOPTRCONV-NEXT: ld.param.u64 %rd1, [conv_shared_to_shared_cluster_param_0];
80+ ; NOPTRCONV-NEXT: cvta.shared.u64 %rd2, %rd1;
81+ ; NOPTRCONV-NEXT: cvta.to.shared::cluster.u64 %rd3, %rd2;
82+ ; NOPTRCONV-NEXT: ld.shared::cluster.u32 %r1, [%rd3];
83+ ; NOPTRCONV-NEXT: st.param.b32 [func_retval0], %r1;
84+ ; NOPTRCONV-NEXT: ret;
85+ ;
86+ ; PTRCONV-LABEL: conv_shared_to_shared_cluster(
87+ ; PTRCONV: {
88+ ; PTRCONV-NEXT: .reg .b32 %r<4>;
89+ ; PTRCONV-NEXT: .reg .b64 %rd<4>;
90+ ; PTRCONV-EMPTY:
91+ ; PTRCONV-NEXT: // %bb.0:
92+ ; PTRCONV-NEXT: ld.param.u32 %r1, [conv_shared_to_shared_cluster_param_0];
93+ ; PTRCONV-NEXT: cvt.u64.u32 %rd1, %r1;
94+ ; PTRCONV-NEXT: cvta.shared.u64 %rd2, %rd1;
95+ ; PTRCONV-NEXT: cvta.to.shared::cluster.u64 %rd3, %rd2;
96+ ; PTRCONV-NEXT: cvt.u32.u64 %r2, %rd3;
97+ ; PTRCONV-NEXT: ld.shared::cluster.u32 %r3, [%r2];
98+ ; PTRCONV-NEXT: st.param.b32 [func_retval0], %r3;
99+ ; PTRCONV-NEXT: ret;
35100 %specptr = addrspacecast ptr addrspace (3 ) %ptr to ptr addrspace (7 )
36101 %val = load i32 , ptr addrspace (7 ) %specptr
37102 ret i32 %val
38103}
39104
40105; ALL-LABEL: conv_shared_cluster_to_shared
41106define i32 @conv_shared_cluster_to_shared (ptr addrspace (7 ) %ptr ) {
42- ; CLS64: cvta.shared::cluster.u64
43- ; CLS64: cvta.to.shared.u64
44- ; ALL: ld.shared.u32
107+ ; NOPTRCONV-LABEL: conv_shared_cluster_to_shared(
108+ ; NOPTRCONV: {
109+ ; NOPTRCONV-NEXT: .reg .b32 %r<2>;
110+ ; NOPTRCONV-NEXT: .reg .b64 %rd<4>;
111+ ; NOPTRCONV-EMPTY:
112+ ; NOPTRCONV-NEXT: // %bb.0:
113+ ; NOPTRCONV-NEXT: ld.param.u64 %rd1, [conv_shared_cluster_to_shared_param_0];
114+ ; NOPTRCONV-NEXT: cvta.shared::cluster.u64 %rd2, %rd1;
115+ ; NOPTRCONV-NEXT: cvta.to.shared.u64 %rd3, %rd2;
116+ ; NOPTRCONV-NEXT: ld.shared.u32 %r1, [%rd3];
117+ ; NOPTRCONV-NEXT: st.param.b32 [func_retval0], %r1;
118+ ; NOPTRCONV-NEXT: ret;
119+ ;
120+ ; PTRCONV-LABEL: conv_shared_cluster_to_shared(
121+ ; PTRCONV: {
122+ ; PTRCONV-NEXT: .reg .b32 %r<4>;
123+ ; PTRCONV-NEXT: .reg .b64 %rd<4>;
124+ ; PTRCONV-EMPTY:
125+ ; PTRCONV-NEXT: // %bb.0:
126+ ; PTRCONV-NEXT: ld.param.u32 %r1, [conv_shared_cluster_to_shared_param_0];
127+ ; PTRCONV-NEXT: cvt.u64.u32 %rd1, %r1;
128+ ; PTRCONV-NEXT: cvta.shared::cluster.u64 %rd2, %rd1;
129+ ; PTRCONV-NEXT: cvta.to.shared.u64 %rd3, %rd2;
130+ ; PTRCONV-NEXT: cvt.u32.u64 %r2, %rd3;
131+ ; PTRCONV-NEXT: ld.shared.u32 %r3, [%r2];
132+ ; PTRCONV-NEXT: st.param.b32 [func_retval0], %r3;
133+ ; PTRCONV-NEXT: ret;
45134 %specptr = addrspacecast ptr addrspace (7 ) %ptr to ptr addrspace (3 )
46135 %val = load i32 , ptr addrspace (3 ) %specptr
47136 ret i32 %val
48- }
137+ }
0 commit comments