Skip to content

Commit 22c519a

Browse files
Initial test
1 parent f6641e2 commit 22c519a

File tree

1 file changed

+129
-0
lines changed

1 file changed

+129
-0
lines changed
Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -march=nvptx64 | FileCheck %s
3+
; RUN: %if ptxas %{ llc < %s -march=nvptx64 | %ptxas-verify %}
4+
5+
target datalayout = "e-p:64:64:64-p3:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-f128:128:128-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64-a:8:8"
6+
target triple = "nvptx64-nvidia-cuda"
7+
8+
define void @t1() {
9+
; CHECK-LABEL: t1(
10+
; CHECK: {
11+
; CHECK-NEXT: .reg .b16 %rs<3>;
12+
; CHECK-NEXT: .reg .b32 %r<5>;
13+
; CHECK-NEXT: .reg .b64 %rd<2>;
14+
; CHECK-EMPTY:
15+
; CHECK-NEXT: // %bb.0: // %entry
16+
; CHECK-NEXT: mov.b64 %rd1, 0;
17+
; CHECK-NEXT: ld.global.v2.b8 {%rs1, %rs2}, [%rd1];
18+
; CHECK-NEXT: cvt.u32.u16 %r1, %rs2;
19+
; CHECK-NEXT: cvt.u32.u16 %r2, %rs1;
20+
; CHECK-NEXT: prmt.b32 %r3, %r2, %r1, 0x3340U;
21+
; CHECK-NEXT: prmt.b32 %r4, %r3, 0, 0x5410U;
22+
; CHECK-NEXT: st.global.v4.b32 [%rd1], {%r4, 0, 0, 0};
23+
; CHECK-NEXT: ret;
24+
entry:
25+
%0 = load <2 x i8>, ptr addrspace(1) null, align 4
26+
%1 = shufflevector <2 x i8> %0, <2 x i8> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
27+
%2 = bitcast <4 x i8> %1 to i32
28+
%3 = insertelement <4 x i32> zeroinitializer, i32 %2, i64 0
29+
store <4 x i32> %3, ptr addrspace(1) null, align 16
30+
ret void
31+
}
32+
33+
define void @t2() {
34+
; CHECK-LABEL: t2(
35+
; CHECK: {
36+
; CHECK-NEXT: .reg .b16 %rs<3>;
37+
; CHECK-NEXT: .reg .b32 %r<5>;
38+
; CHECK-NEXT: .reg .b64 %rd<2>;
39+
; CHECK-EMPTY:
40+
; CHECK-NEXT: // %bb.0: // %entry
41+
; CHECK-NEXT: mov.b64 %rd1, 0;
42+
; CHECK-NEXT: ld.global.v2.b8 {%rs1, %rs2}, [%rd1];
43+
; CHECK-NEXT: cvt.u32.u16 %r1, %rs2;
44+
; CHECK-NEXT: cvt.u32.u16 %r2, %rs1;
45+
; CHECK-NEXT: prmt.b32 %r3, %r2, %r1, 0x3340U;
46+
; CHECK-NEXT: prmt.b32 %r4, %r3, 0, 0x5410U;
47+
; CHECK-NEXT: st.local.b32 [%rd1], %r4;
48+
; CHECK-NEXT: ret;
49+
entry:
50+
%0 = load <2 x i8>, ptr addrspace(1) null, align 8
51+
%1 = shufflevector <2 x i8> %0, <2 x i8> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
52+
store <4 x i8> %1, ptr addrspace(5) null, align 8
53+
ret void
54+
}
55+
56+
declare <2 x i8> @llvm.nvvm.ldg.global.i.v2i8.p1(ptr addrspace(1) %ptr, i32 %align)
57+
58+
define void @ldg(ptr addrspace(1) %ptr) {
59+
; CHECK-LABEL: ldg(
60+
; CHECK: {
61+
; CHECK-NEXT: .reg .b16 %rs<3>;
62+
; CHECK-NEXT: .reg .b32 %r<5>;
63+
; CHECK-NEXT: .reg .b64 %rd<3>;
64+
; CHECK-EMPTY:
65+
; CHECK-NEXT: // %bb.0: // %entry
66+
; CHECK-NEXT: ld.param.b64 %rd1, [ldg_param_0];
67+
; CHECK-NEXT: ld.global.v2.b8 {%rs1, %rs2}, [%rd1];
68+
; CHECK-NEXT: cvt.u32.u16 %r1, %rs2;
69+
; CHECK-NEXT: cvt.u32.u16 %r2, %rs1;
70+
; CHECK-NEXT: prmt.b32 %r3, %r2, %r1, 0x3340U;
71+
; CHECK-NEXT: prmt.b32 %r4, %r3, 0, 0x5410U;
72+
; CHECK-NEXT: mov.b64 %rd2, 0;
73+
; CHECK-NEXT: st.local.b32 [%rd2], %r4;
74+
; CHECK-NEXT: ret;
75+
entry:
76+
%0 = tail call <2 x i8> @llvm.nvvm.ldg.global.i.v2i8.p1(ptr addrspace(1) %ptr, i32 2)
77+
%1 = shufflevector <2 x i8> %0, <2 x i8> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
78+
store <4 x i8> %1, ptr addrspace(5) null, align 8
79+
ret void
80+
}
81+
82+
declare <2 x i8> @llvm.nvvm.ldu.global.f.v2i8.p1(ptr addrspace(1) %ptr, i32 %align)
83+
84+
define void @ldu(ptr addrspace(1) %ptr) {
85+
; CHECK-LABEL: ldu(
86+
; CHECK: {
87+
; CHECK-NEXT: .reg .b16 %rs<3>;
88+
; CHECK-NEXT: .reg .b32 %r<5>;
89+
; CHECK-NEXT: .reg .b64 %rd<3>;
90+
; CHECK-EMPTY:
91+
; CHECK-NEXT: // %bb.0: // %entry
92+
; CHECK-NEXT: ld.param.b64 %rd1, [ldu_param_0];
93+
; CHECK-NEXT: ldu.global.v2.b8 {%rs1, %rs2}, [%rd1];
94+
; CHECK-NEXT: cvt.u32.u16 %r1, %rs2;
95+
; CHECK-NEXT: cvt.u32.u16 %r2, %rs1;
96+
; CHECK-NEXT: prmt.b32 %r3, %r2, %r1, 0x3340U;
97+
; CHECK-NEXT: prmt.b32 %r4, %r3, 0, 0x5410U;
98+
; CHECK-NEXT: mov.b64 %rd2, 0;
99+
; CHECK-NEXT: st.local.b32 [%rd2], %r4;
100+
; CHECK-NEXT: ret;
101+
entry:
102+
%0 = tail call <2 x i8> @llvm.nvvm.ldu.global.i.v2i8.p1(ptr addrspace(1) %ptr, i32 2)
103+
%1 = shufflevector <2 x i8> %0, <2 x i8> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
104+
store <4 x i8> %1, ptr addrspace(5) null, align 8
105+
ret void
106+
}
107+
108+
define void @t3() {
109+
; CHECK-LABEL: t3(
110+
; CHECK: {
111+
; CHECK-NEXT: .reg .b16 %rs<3>;
112+
; CHECK-NEXT: .reg .b32 %r<5>;
113+
; CHECK-NEXT: .reg .b64 %rd<2>;
114+
; CHECK-EMPTY:
115+
; CHECK-NEXT: // %bb.0:
116+
; CHECK-NEXT: mov.b64 %rd1, 0;
117+
; CHECK-NEXT: ld.global.v2.b8 {%rs1, %rs2}, [%rd1];
118+
; CHECK-NEXT: cvt.u32.u16 %r1, %rs2;
119+
; CHECK-NEXT: cvt.u32.u16 %r2, %rs1;
120+
; CHECK-NEXT: prmt.b32 %r3, %r2, %r1, 0x3340U;
121+
; CHECK-NEXT: prmt.b32 %r4, %r3, 0, 0x5410U;
122+
; CHECK-NEXT: st.global.v2.b32 [%rd1], {%r4, 0};
123+
; CHECK-NEXT: ret;
124+
%1 = load <2 x i8>, ptr addrspace(1) null, align 2
125+
%insval2 = bitcast <2 x i8> %1 to i16
126+
%2 = insertelement <4 x i16> zeroinitializer, i16 %insval2, i32 0
127+
store <4 x i16> %2, ptr addrspace(1) null, align 8
128+
ret void
129+
}

0 commit comments

Comments
 (0)