@@ -8,18 +8,13 @@ target triple = "nvptx64-nvidia-cuda"
88define void @t1 () {
99; CHECK-LABEL: t1(
1010; CHECK: {
11- ; CHECK-NEXT: .reg .b16 %rs<3>;
12- ; CHECK-NEXT: .reg .b32 %r<5>;
11+ ; CHECK-NEXT: .reg .b32 %r<2>;
1312; CHECK-NEXT: .reg .b64 %rd<2>;
1413; CHECK-EMPTY:
1514; CHECK-NEXT: // %bb.0: // %entry
1615; CHECK-NEXT: mov.b64 %rd1, 0;
17- ; CHECK-NEXT: ld.global.v2.b8 {%rs1, %rs2}, [%rd1];
18- ; CHECK-NEXT: cvt.u32.u16 %r1, %rs2;
19- ; CHECK-NEXT: cvt.u32.u16 %r2, %rs1;
20- ; CHECK-NEXT: prmt.b32 %r3, %r2, %r1, 0x3340U;
21- ; CHECK-NEXT: prmt.b32 %r4, %r3, 0, 0x5410U;
22- ; CHECK-NEXT: st.global.v4.b32 [%rd1], {%r4, 0, 0, 0};
16+ ; CHECK-NEXT: ld.global.b16 %r1, [%rd1];
17+ ; CHECK-NEXT: st.global.v4.b32 [%rd1], {%r1, 0, 0, 0};
2318; CHECK-NEXT: ret;
2419entry:
2520 %0 = load <2 x i8 >, ptr addrspace (1 ) null , align 4
@@ -33,18 +28,13 @@ entry:
3328define void @t2 () {
3429; CHECK-LABEL: t2(
3530; CHECK: {
36- ; CHECK-NEXT: .reg .b16 %rs<3>;
37- ; CHECK-NEXT: .reg .b32 %r<5>;
31+ ; CHECK-NEXT: .reg .b32 %r<2>;
3832; CHECK-NEXT: .reg .b64 %rd<2>;
3933; CHECK-EMPTY:
4034; CHECK-NEXT: // %bb.0: // %entry
4135; CHECK-NEXT: mov.b64 %rd1, 0;
42- ; CHECK-NEXT: ld.global.v2.b8 {%rs1, %rs2}, [%rd1];
43- ; CHECK-NEXT: cvt.u32.u16 %r1, %rs2;
44- ; CHECK-NEXT: cvt.u32.u16 %r2, %rs1;
45- ; CHECK-NEXT: prmt.b32 %r3, %r2, %r1, 0x3340U;
46- ; CHECK-NEXT: prmt.b32 %r4, %r3, 0, 0x5410U;
47- ; CHECK-NEXT: st.local.b32 [%rd1], %r4;
36+ ; CHECK-NEXT: ld.global.b16 %r1, [%rd1];
37+ ; CHECK-NEXT: st.local.b32 [%rd1], %r1;
4838; CHECK-NEXT: ret;
4939entry:
5040 %0 = load <2 x i8 >, ptr addrspace (1 ) null , align 8
@@ -58,19 +48,14 @@ declare <2 x i8> @llvm.nvvm.ldg.global.i.v2i8.p1(ptr addrspace(1) %ptr, i32 %ali
5848define void @ldg (ptr addrspace (1 ) %ptr ) {
5949; CHECK-LABEL: ldg(
6050; CHECK: {
61- ; CHECK-NEXT: .reg .b16 %rs<3>;
62- ; CHECK-NEXT: .reg .b32 %r<5>;
51+ ; CHECK-NEXT: .reg .b32 %r<2>;
6352; CHECK-NEXT: .reg .b64 %rd<3>;
6453; CHECK-EMPTY:
6554; CHECK-NEXT: // %bb.0: // %entry
6655; CHECK-NEXT: ld.param.b64 %rd1, [ldg_param_0];
67- ; CHECK-NEXT: ld.global.v2.b8 {%rs1, %rs2}, [%rd1];
68- ; CHECK-NEXT: cvt.u32.u16 %r1, %rs2;
69- ; CHECK-NEXT: cvt.u32.u16 %r2, %rs1;
70- ; CHECK-NEXT: prmt.b32 %r3, %r2, %r1, 0x3340U;
71- ; CHECK-NEXT: prmt.b32 %r4, %r3, 0, 0x5410U;
56+ ; CHECK-NEXT: ld.global.b16 %r1, [%rd1];
7257; CHECK-NEXT: mov.b64 %rd2, 0;
73- ; CHECK-NEXT: st.local.b32 [%rd2], %r4 ;
58+ ; CHECK-NEXT: st.local.b32 [%rd2], %r1 ;
7459; CHECK-NEXT: ret;
7560entry:
7661 %0 = tail call <2 x i8 > @llvm.nvvm.ldg.global.i.v2i8.p1 (ptr addrspace (1 ) %ptr , i32 2 )
@@ -84,19 +69,16 @@ declare <2 x i8> @llvm.nvvm.ldu.global.f.v2i8.p1(ptr addrspace(1) %ptr, i32 %ali
8469define void @ldu (ptr addrspace (1 ) %ptr ) {
8570; CHECK-LABEL: ldu(
8671; CHECK: {
87- ; CHECK-NEXT: .reg .b16 %rs<3 >;
88- ; CHECK-NEXT: .reg .b32 %r<5 >;
72+ ; CHECK-NEXT: .reg .b16 %rs<2 >;
73+ ; CHECK-NEXT: .reg .b32 %r<2 >;
8974; CHECK-NEXT: .reg .b64 %rd<3>;
9075; CHECK-EMPTY:
9176; CHECK-NEXT: // %bb.0: // %entry
9277; CHECK-NEXT: ld.param.b64 %rd1, [ldu_param_0];
93- ; CHECK-NEXT: ldu.global.v2.b8 {%rs1, %rs2}, [%rd1];
94- ; CHECK-NEXT: cvt.u32.u16 %r1, %rs2;
95- ; CHECK-NEXT: cvt.u32.u16 %r2, %rs1;
96- ; CHECK-NEXT: prmt.b32 %r3, %r2, %r1, 0x3340U;
97- ; CHECK-NEXT: prmt.b32 %r4, %r3, 0, 0x5410U;
78+ ; CHECK-NEXT: ldu.global.b16 %rs1, [%rd1];
79+ ; CHECK-NEXT: cvt.u32.u16 %r1, %rs1;
9880; CHECK-NEXT: mov.b64 %rd2, 0;
99- ; CHECK-NEXT: st.local.b32 [%rd2], %r4 ;
81+ ; CHECK-NEXT: st.local.b32 [%rd2], %r1 ;
10082; CHECK-NEXT: ret;
10183entry:
10284 %0 = tail call <2 x i8 > @llvm.nvvm.ldu.global.i.v2i8.p1 (ptr addrspace (1 ) %ptr , i32 2 )
@@ -108,18 +90,13 @@ entry:
10890define void @t3 () {
10991; CHECK-LABEL: t3(
11092; CHECK: {
111- ; CHECK-NEXT: .reg .b16 %rs<3>;
112- ; CHECK-NEXT: .reg .b32 %r<5>;
93+ ; CHECK-NEXT: .reg .b32 %r<2>;
11394; CHECK-NEXT: .reg .b64 %rd<2>;
11495; CHECK-EMPTY:
11596; CHECK-NEXT: // %bb.0:
11697; CHECK-NEXT: mov.b64 %rd1, 0;
117- ; CHECK-NEXT: ld.global.v2.b8 {%rs1, %rs2}, [%rd1];
118- ; CHECK-NEXT: cvt.u32.u16 %r1, %rs2;
119- ; CHECK-NEXT: cvt.u32.u16 %r2, %rs1;
120- ; CHECK-NEXT: prmt.b32 %r3, %r2, %r1, 0x3340U;
121- ; CHECK-NEXT: prmt.b32 %r4, %r3, 0, 0x5410U;
122- ; CHECK-NEXT: st.global.v2.b32 [%rd1], {%r4, 0};
98+ ; CHECK-NEXT: ld.global.b16 %r1, [%rd1];
99+ ; CHECK-NEXT: st.global.v2.b32 [%rd1], {%r1, 0};
123100; CHECK-NEXT: ret;
124101 %1 = load <2 x i8 >, ptr addrspace (1 ) null , align 2
125102 %insval2 = bitcast <2 x i8 > %1 to i16
0 commit comments