Skip to content

Commit 31f0ab0

Browse files
authored
[LoongArch] Enable vector tests for 32-bit target (#152094)
1 parent 80f4183 commit 31f0ab0

File tree

514 files changed

+5396
-2304
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

514 files changed

+5396
-2304
lines changed

llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ void LoongArchDAGToDAGISel::Select(SDNode *Node) {
114114
unsigned SplatBitSize;
115115
bool HasAnyUndefs;
116116
unsigned Op;
117-
EVT ViaVecTy;
117+
EVT ResTy = BVN->getValueType(0);
118118
bool Is128Vec = BVN->getValueType(0).is128BitVector();
119119
bool Is256Vec = BVN->getValueType(0).is256BitVector();
120120

@@ -129,28 +129,25 @@ void LoongArchDAGToDAGISel::Select(SDNode *Node) {
129129
break;
130130
case 8:
131131
Op = Is256Vec ? LoongArch::PseudoXVREPLI_B : LoongArch::PseudoVREPLI_B;
132-
ViaVecTy = Is256Vec ? MVT::v32i8 : MVT::v16i8;
133132
break;
134133
case 16:
135134
Op = Is256Vec ? LoongArch::PseudoXVREPLI_H : LoongArch::PseudoVREPLI_H;
136-
ViaVecTy = Is256Vec ? MVT::v16i16 : MVT::v8i16;
137135
break;
138136
case 32:
139137
Op = Is256Vec ? LoongArch::PseudoXVREPLI_W : LoongArch::PseudoVREPLI_W;
140-
ViaVecTy = Is256Vec ? MVT::v8i32 : MVT::v4i32;
141138
break;
142139
case 64:
143140
Op = Is256Vec ? LoongArch::PseudoXVREPLI_D : LoongArch::PseudoVREPLI_D;
144-
ViaVecTy = Is256Vec ? MVT::v4i64 : MVT::v2i64;
145141
break;
146142
}
147143

148144
SDNode *Res;
149145
// If we have a signed 10 bit integer, we can splat it directly.
150146
if (SplatValue.isSignedIntN(10)) {
151-
SDValue Imm = CurDAG->getTargetConstant(SplatValue, DL,
152-
ViaVecTy.getVectorElementType());
153-
Res = CurDAG->getMachineNode(Op, DL, ViaVecTy, Imm);
147+
EVT EleType = ResTy.getVectorElementType();
148+
APInt Val = SplatValue.sextOrTrunc(EleType.getSizeInBits());
149+
SDValue Imm = CurDAG->getTargetConstant(Val, DL, EleType);
150+
Res = CurDAG->getMachineNode(Op, DL, ResTy, Imm);
154151
ReplaceNode(Node, Res);
155152
return;
156153
}

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 77 additions & 51 deletions
Large diffs are not rendered by default.

llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def SDT_LoongArchVShuf : SDTypeProfile<1, 3, [SDTCisVec<0>,
2222
def SDT_LoongArchV2R : SDTypeProfile<1, 2, [SDTCisVec<0>,
2323
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
2424
def SDT_LoongArchV1RUimm: SDTypeProfile<1, 2, [SDTCisVec<0>,
25-
SDTCisSameAs<0,1>, SDTCisVT<2, i64>]>;
25+
SDTCisSameAs<0,1>, SDTCisVT<2, GRLenVT>]>;
2626
def SDT_LoongArchV2RUimm
2727
: SDTypeProfile<1, 3,
2828
[SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,

llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll

Lines changed: 79 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,31 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2-
; RUN: llc --mtriple=loongarch64 -mattr=+lasx < %s | FileCheck %s
2+
; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32
3+
; RUN: llc --mtriple=loongarch64 -mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64
34

4-
; TODO: Load a element and splat it to a vector could be lowerd to xvldrepl
55

6-
; A load has more than one user shouldn't be lowered to xvldrepl
76
define <4 x i64> @should_not_be_optimized(ptr %ptr, ptr %dst) {
8-
; CHECK-LABEL: should_not_be_optimized:
9-
; CHECK: # %bb.0:
10-
; CHECK-NEXT: ld.d $a0, $a0, 0
11-
; CHECK-NEXT: xvreplgr2vr.d $xr0, $a0
12-
; CHECK-NEXT: st.d $a0, $a1, 0
13-
; CHECK-NEXT: ret
7+
; LA32-LABEL: should_not_be_optimized:
8+
; LA32: # %bb.0:
9+
; LA32-NEXT: ld.w $a2, $a0, 0
10+
; LA32-NEXT: ld.w $a0, $a0, 4
11+
; LA32-NEXT: st.w $a2, $a1, 0
12+
; LA32-NEXT: xvinsgr2vr.w $xr0, $a2, 0
13+
; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 1
14+
; LA32-NEXT: xvinsgr2vr.w $xr0, $a2, 2
15+
; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 3
16+
; LA32-NEXT: xvinsgr2vr.w $xr0, $a2, 4
17+
; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 5
18+
; LA32-NEXT: xvinsgr2vr.w $xr0, $a2, 6
19+
; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 7
20+
; LA32-NEXT: st.w $a0, $a1, 4
21+
; LA32-NEXT: ret
22+
;
23+
; LA64-LABEL: should_not_be_optimized:
24+
; LA64: # %bb.0:
25+
; LA64-NEXT: ld.d $a0, $a0, 0
26+
; LA64-NEXT: xvreplgr2vr.d $xr0, $a0
27+
; LA64-NEXT: st.d $a0, $a1, 0
28+
; LA64-NEXT: ret
1429
%tmp = load i64, ptr %ptr
1530
store i64 %tmp, ptr %dst
1631
%tmp1 = insertelement <4 x i64> zeroinitializer, i64 %tmp, i32 0
@@ -19,11 +34,25 @@ define <4 x i64> @should_not_be_optimized(ptr %ptr, ptr %dst) {
1934
}
2035

2136
define <4 x i64> @xvldrepl_d_unaligned_offset(ptr %ptr) {
22-
; CHECK-LABEL: xvldrepl_d_unaligned_offset:
23-
; CHECK: # %bb.0:
24-
; CHECK-NEXT: addi.d $a0, $a0, 4
25-
; CHECK-NEXT: xvldrepl.d $xr0, $a0, 0
26-
; CHECK-NEXT: ret
37+
; LA32-LABEL: xvldrepl_d_unaligned_offset:
38+
; LA32: # %bb.0:
39+
; LA32-NEXT: ld.w $a1, $a0, 4
40+
; LA32-NEXT: ld.w $a0, $a0, 8
41+
; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 0
42+
; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 1
43+
; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 2
44+
; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 3
45+
; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 4
46+
; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 5
47+
; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 6
48+
; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 7
49+
; LA32-NEXT: ret
50+
;
51+
; LA64-LABEL: xvldrepl_d_unaligned_offset:
52+
; LA64: # %bb.0:
53+
; LA64-NEXT: addi.d $a0, $a0, 4
54+
; LA64-NEXT: xvldrepl.d $xr0, $a0, 0
55+
; LA64-NEXT: ret
2756
%p = getelementptr i32, ptr %ptr, i32 1
2857
%tmp = load i64, ptr %p
2958
%tmp1 = insertelement <4 x i64> zeroinitializer, i64 %tmp, i32 0
@@ -103,21 +132,49 @@ define <8 x i32> @xvldrepl_w_offset(ptr %ptr) {
103132

104133

105134
define <4 x i64> @xvldrepl_d(ptr %ptr) {
106-
; CHECK-LABEL: xvldrepl_d:
107-
; CHECK: # %bb.0:
108-
; CHECK-NEXT: xvldrepl.d $xr0, $a0, 0
109-
; CHECK-NEXT: ret
135+
; LA32-LABEL: xvldrepl_d:
136+
; LA32: # %bb.0:
137+
; LA32-NEXT: ld.w $a1, $a0, 0
138+
; LA32-NEXT: ld.w $a0, $a0, 4
139+
; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 0
140+
; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 1
141+
; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 2
142+
; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 3
143+
; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 4
144+
; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 5
145+
; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 6
146+
; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 7
147+
; LA32-NEXT: ret
148+
;
149+
; LA64-LABEL: xvldrepl_d:
150+
; LA64: # %bb.0:
151+
; LA64-NEXT: xvldrepl.d $xr0, $a0, 0
152+
; LA64-NEXT: ret
110153
%tmp = load i64, ptr %ptr
111154
%tmp1 = insertelement <4 x i64> zeroinitializer, i64 %tmp, i32 0
112155
%tmp2 = shufflevector <4 x i64> %tmp1, <4 x i64> poison, <4 x i32> zeroinitializer
113156
ret <4 x i64> %tmp2
114157
}
115158

116159
define <4 x i64> @xvldrepl_d_offset(ptr %ptr) {
117-
; CHECK-LABEL: xvldrepl_d_offset:
118-
; CHECK: # %bb.0:
119-
; CHECK-NEXT: xvldrepl.d $xr0, $a0, 264
120-
; CHECK-NEXT: ret
160+
; LA32-LABEL: xvldrepl_d_offset:
161+
; LA32: # %bb.0:
162+
; LA32-NEXT: ld.w $a1, $a0, 264
163+
; LA32-NEXT: ld.w $a0, $a0, 268
164+
; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 0
165+
; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 1
166+
; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 2
167+
; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 3
168+
; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 4
169+
; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 5
170+
; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 6
171+
; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 7
172+
; LA32-NEXT: ret
173+
;
174+
; LA64-LABEL: xvldrepl_d_offset:
175+
; LA64: # %bb.0:
176+
; LA64-NEXT: xvldrepl.d $xr0, $a0, 264
177+
; LA64-NEXT: ret
121178
%p = getelementptr i64, ptr %ptr, i64 33
122179
%tmp = load i64, ptr %p
123180
%tmp1 = insertelement <4 x i64> zeroinitializer, i64 %tmp, i32 0

llvm/test/CodeGen/LoongArch/lasx/bswap.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s
23
; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
34

45
define void @bswap_v16i16(ptr %src, ptr %dst) nounwind {

0 commit comments

Comments
 (0)