Skip to content

Commit f87bcf1

Browse files
authored
[LoongArch] Add patterns for vstelm instructions (llvm#139201)
1 parent d50c85d commit f87bcf1

File tree

8 files changed

+140
-55
lines changed

8 files changed

+140
-55
lines changed

llvm/lib/Target/LoongArch/LoongArchInstrInfo.td

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -300,17 +300,31 @@ def simm5 : Operand<GRLenVT> {
300300
let DecoderMethod = "decodeSImmOperand<5>";
301301
}
302302

303-
def simm8 : Operand<GRLenVT> {
303+
def simm8 : Operand<GRLenVT>,
304+
ImmLeaf<GRLenVT, [{return isInt<8>(Imm);}]> {
304305
let ParserMatchClass = SImmAsmOperand<8>;
305306
let DecoderMethod = "decodeSImmOperand<8>";
306307
}
307308

308-
foreach I = [1, 2, 3] in {
309-
def simm8_lsl # I : Operand<GRLenVT> {
310-
let ParserMatchClass = SImmAsmOperand<8, "lsl" # I>;
311-
let EncoderMethod = "getImmOpValueAsr<" # I # ">";
312-
let DecoderMethod = "decodeSImmOperand<8," # I # ">";
309+
def simm8_lsl1 : Operand<GRLenVT>,
310+
ImmLeaf<GRLenVT, [{return isShiftedInt<8,1>(Imm);}]> {
311+
let ParserMatchClass = SImmAsmOperand<8, "lsl1">;
312+
let EncoderMethod = "getImmOpValueAsr<1>";
313+
let DecoderMethod = "decodeSImmOperand<8, 1>";
314+
}
315+
316+
def simm8_lsl2 : Operand<GRLenVT>,
317+
ImmLeaf<GRLenVT, [{return isShiftedInt<8,2>(Imm);}]> {
318+
let ParserMatchClass = SImmAsmOperand<8, "lsl2">;
319+
let EncoderMethod = "getImmOpValueAsr<2>";
320+
let DecoderMethod = "decodeSImmOperand<8, 2>";
313321
}
322+
323+
def simm8_lsl3 : Operand<GRLenVT>,
324+
ImmLeaf<GRLenVT, [{return isShiftedInt<8,3>(Imm);}]> {
325+
let ParserMatchClass = SImmAsmOperand<8, "lsl3">;
326+
let EncoderMethod = "getImmOpValueAsr<3>";
327+
let DecoderMethod = "decodeSImmOperand<8, 3>";
314328
}
315329

316330
def simm9_lsl3 : Operand<GRLenVT>,

llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1760,6 +1760,14 @@ def : Pat<(lasxsplatf32 FPR32:$fj),
17601760
def : Pat<(lasxsplatf64 FPR64:$fj),
17611761
(XVREPLVE0_D (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64))>;
17621762

1763+
// VSTELM
1764+
defm : VstelmPat<truncstorei8, v32i8, XVSTELM_B, simm8, uimm5>;
1765+
defm : VstelmPat<truncstorei16, v16i16, XVSTELM_H, simm8_lsl1, uimm4>;
1766+
defm : VstelmPat<truncstorei32, v8i32, XVSTELM_W, simm8_lsl2, uimm3>;
1767+
defm : VstelmPat<store, v4i64, XVSTELM_D, simm8_lsl3, uimm2>;
1768+
defm : VstelmPat<store, v8f32, XVSTELM_W, simm8_lsl2, uimm3, f32>;
1769+
defm : VstelmPat<store, v4f64, XVSTELM_D, simm8_lsl3, uimm2, f64>;
1770+
17631771
// Loads/Stores
17641772
foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in {
17651773
defm : LdPat<load, XVLD, vt>;

llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1451,6 +1451,20 @@ multiclass VldreplPat<ValueType vt, LAInst Inst, Operand ImmOpnd> {
14511451
(Inst BaseAddr:$rj, ImmOpnd:$imm)>;
14521452
}
14531453

1454+
multiclass VstelmPat<PatFrag StoreOp, ValueType vt, LAInst Inst,
1455+
Operand ImmOpnd, Operand IdxOpnd, ValueType elt = i64> {
1456+
def : Pat<(StoreOp(elt(vector_extract vt:$vd, IdxOpnd:$idx)), BaseAddr:$rj),
1457+
(Inst vt:$vd, BaseAddr:$rj, 0, IdxOpnd:$idx)>;
1458+
1459+
def : Pat<(StoreOp(elt(vector_extract vt:$vd, IdxOpnd:$idx)),
1460+
(AddrConstant GPR:$rj, ImmOpnd:$imm)),
1461+
(Inst vt:$vd, GPR:$rj, ImmOpnd:$imm, IdxOpnd:$idx)>;
1462+
1463+
def : Pat<(StoreOp(elt(vector_extract vt:$vd, IdxOpnd:$idx)),
1464+
(AddLike BaseAddr:$rj, ImmOpnd:$imm)),
1465+
(Inst vt:$vd, BaseAddr:$rj, ImmOpnd:$imm, IdxOpnd:$idx)>;
1466+
}
1467+
14541468
let Predicates = [HasExtLSX] in {
14551469

14561470
// VADD_{B/H/W/D}
@@ -1944,6 +1958,13 @@ def : Pat<(lsxsplatf32 FPR32:$fj),
19441958
def : Pat<(lsxsplatf64 FPR64:$fj),
19451959
(VREPLVEI_D (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64), 0)>;
19461960

1961+
defm : VstelmPat<truncstorei8, v16i8, VSTELM_B, simm8, uimm4>;
1962+
defm : VstelmPat<truncstorei16, v8i16, VSTELM_H, simm8_lsl1, uimm3>;
1963+
defm : VstelmPat<truncstorei32, v4i32, VSTELM_W, simm8_lsl2, uimm2>;
1964+
defm : VstelmPat<store, v2i64, VSTELM_D, simm8_lsl3, uimm1>;
1965+
defm : VstelmPat<store, v4f32, VSTELM_W, simm8_lsl2, uimm2, f32>;
1966+
defm : VstelmPat<store, v2f64, VSTELM_D, simm8_lsl3, uimm1, f64>;
1967+
19471968
// Loads/Stores
19481969
foreach vt = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in {
19491970
defm : LdPat<load, VLD, vt>;

llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,44 @@ bool LoongArchRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
140140

141141
bool FrameRegIsKill = false;
142142

143-
if (!isInt<12>(Offset.getFixed())) {
143+
int FixedOffset = Offset.getFixed();
144+
bool OffsetLegal = true;
145+
146+
// Handle offsets that exceed the immediate range of the instruction.
147+
switch (MIOpc) {
148+
case LoongArch::VSTELM_B:
149+
case LoongArch::XVSTELM_B:
150+
OffsetLegal = isInt<8>(FixedOffset);
151+
break;
152+
case LoongArch::VSTELM_H:
153+
case LoongArch::XVSTELM_H:
154+
OffsetLegal = isShiftedInt<8, 1>(FixedOffset);
155+
break;
156+
case LoongArch::VSTELM_W:
157+
case LoongArch::XVSTELM_W:
158+
OffsetLegal = isShiftedInt<8, 2>(FixedOffset);
159+
break;
160+
case LoongArch::VSTELM_D:
161+
case LoongArch::XVSTELM_D:
162+
OffsetLegal = isShiftedInt<8, 3>(FixedOffset);
163+
break;
164+
}
165+
166+
if (!OffsetLegal && isInt<12>(FixedOffset)) {
167+
unsigned Addi = IsLA64 ? LoongArch::ADDI_D : LoongArch::ADDI_W;
168+
169+
// The offset fits in si12 but is not legal for the instruction,
170+
// so use only one scratch register instead.
171+
Register ScratchReg = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
172+
BuildMI(MBB, II, DL, TII->get(Addi), ScratchReg)
173+
.addReg(FrameReg)
174+
.addImm(FixedOffset);
175+
Offset = StackOffset::getFixed(0);
176+
FrameReg = ScratchReg;
177+
FrameRegIsKill = true;
178+
}
179+
180+
if (!isInt<12>(FixedOffset)) {
144181
unsigned Addi = IsLA64 ? LoongArch::ADDI_D : LoongArch::ADDI_W;
145182
unsigned Add = IsLA64 ? LoongArch::ADD_D : LoongArch::ADD_W;
146183

llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,7 @@ define void @extract_32xi8(ptr %src, ptr %dst) nounwind {
55
; CHECK-LABEL: extract_32xi8:
66
; CHECK: # %bb.0:
77
; CHECK-NEXT: xvld $xr0, $a0, 0
8-
; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1
9-
; CHECK-NEXT: st.b $a0, $a1, 0
8+
; CHECK-NEXT: xvstelm.b $xr0, $a1, 0, 1
109
; CHECK-NEXT: ret
1110
%v = load volatile <32 x i8>, ptr %src
1211
%e = extractelement <32 x i8> %v, i32 1
@@ -18,8 +17,7 @@ define void @extract_16xi16(ptr %src, ptr %dst) nounwind {
1817
; CHECK-LABEL: extract_16xi16:
1918
; CHECK: # %bb.0:
2019
; CHECK-NEXT: xvld $xr0, $a0, 0
21-
; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1
22-
; CHECK-NEXT: st.h $a0, $a1, 0
20+
; CHECK-NEXT: xvstelm.h $xr0, $a1, 0, 1
2321
; CHECK-NEXT: ret
2422
%v = load volatile <16 x i16>, ptr %src
2523
%e = extractelement <16 x i16> %v, i32 1
@@ -31,8 +29,7 @@ define void @extract_8xi32(ptr %src, ptr %dst) nounwind {
3129
; CHECK-LABEL: extract_8xi32:
3230
; CHECK: # %bb.0:
3331
; CHECK-NEXT: xvld $xr0, $a0, 0
34-
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 1
35-
; CHECK-NEXT: st.w $a0, $a1, 0
32+
; CHECK-NEXT: xvstelm.w $xr0, $a1, 0, 1
3633
; CHECK-NEXT: ret
3734
%v = load volatile <8 x i32>, ptr %src
3835
%e = extractelement <8 x i32> %v, i32 1
@@ -44,8 +41,7 @@ define void @extract_4xi64(ptr %src, ptr %dst) nounwind {
4441
; CHECK-LABEL: extract_4xi64:
4542
; CHECK: # %bb.0:
4643
; CHECK-NEXT: xvld $xr0, $a0, 0
47-
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1
48-
; CHECK-NEXT: st.d $a0, $a1, 0
44+
; CHECK-NEXT: xvstelm.d $xr0, $a1, 0, 1
4945
; CHECK-NEXT: ret
5046
%v = load volatile <4 x i64>, ptr %src
5147
%e = extractelement <4 x i64> %v, i32 1
@@ -57,9 +53,7 @@ define void @extract_8xfloat(ptr %src, ptr %dst) nounwind {
5753
; CHECK-LABEL: extract_8xfloat:
5854
; CHECK: # %bb.0:
5955
; CHECK-NEXT: xvld $xr0, $a0, 0
60-
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 7
61-
; CHECK-NEXT: movgr2fr.w $fa0, $a0
62-
; CHECK-NEXT: fst.s $fa0, $a1, 0
56+
; CHECK-NEXT: xvstelm.w $xr0, $a1, 0, 7
6357
; CHECK-NEXT: ret
6458
%v = load volatile <8 x float>, ptr %src
6559
%e = extractelement <8 x float> %v, i32 7
@@ -71,9 +65,7 @@ define void @extract_4xdouble(ptr %src, ptr %dst) nounwind {
7165
; CHECK-LABEL: extract_4xdouble:
7266
; CHECK: # %bb.0:
7367
; CHECK-NEXT: xvld $xr0, $a0, 0
74-
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3
75-
; CHECK-NEXT: movgr2fr.d $fa0, $a0
76-
; CHECK-NEXT: fst.d $fa0, $a1, 0
68+
; CHECK-NEXT: xvstelm.d $xr0, $a1, 0, 3
7769
; CHECK-NEXT: ret
7870
%v = load volatile <4 x double>, ptr %src
7971
%e = extractelement <4 x double> %v, i32 3
@@ -230,3 +222,18 @@ define void @extract_4xdouble_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
230222
store double %e, ptr %dst
231223
ret void
232224
}
225+
226+
define void @eliminate_frame_index(<8 x i32> %a) nounwind {
227+
; CHECK-LABEL: eliminate_frame_index:
228+
; CHECK: # %bb.0:
229+
; CHECK-NEXT: addi.d $sp, $sp, -1040
230+
; CHECK-NEXT: addi.d $a0, $sp, 524
231+
; CHECK-NEXT: xvstelm.w $xr0, $a0, 0, 1
232+
; CHECK-NEXT: addi.d $sp, $sp, 1040
233+
; CHECK-NEXT: ret
234+
%1 = alloca [32 x [8 x i32]]
235+
%2 = getelementptr i8, ptr %1, i64 508
236+
%b = extractelement <8 x i32> %a, i64 1
237+
store i32 %b, ptr %2
238+
ret void
239+
}

llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,7 @@ define void @extract_16xi8(ptr %src, ptr %dst) nounwind {
55
; CHECK-LABEL: extract_16xi8:
66
; CHECK: # %bb.0:
77
; CHECK-NEXT: vld $vr0, $a0, 0
8-
; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1
9-
; CHECK-NEXT: st.b $a0, $a1, 0
8+
; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 1
109
; CHECK-NEXT: ret
1110
%v = load volatile <16 x i8>, ptr %src
1211
%e = extractelement <16 x i8> %v, i32 1
@@ -18,8 +17,7 @@ define void @extract_8xi16(ptr %src, ptr %dst) nounwind {
1817
; CHECK-LABEL: extract_8xi16:
1918
; CHECK: # %bb.0:
2019
; CHECK-NEXT: vld $vr0, $a0, 0
21-
; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1
22-
; CHECK-NEXT: st.h $a0, $a1, 0
20+
; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 1
2321
; CHECK-NEXT: ret
2422
%v = load volatile <8 x i16>, ptr %src
2523
%e = extractelement <8 x i16> %v, i32 1
@@ -31,8 +29,7 @@ define void @extract_4xi32(ptr %src, ptr %dst) nounwind {
3129
; CHECK-LABEL: extract_4xi32:
3230
; CHECK: # %bb.0:
3331
; CHECK-NEXT: vld $vr0, $a0, 0
34-
; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 1
35-
; CHECK-NEXT: st.w $a0, $a1, 0
32+
; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 1
3633
; CHECK-NEXT: ret
3734
%v = load volatile <4 x i32>, ptr %src
3835
%e = extractelement <4 x i32> %v, i32 1
@@ -44,8 +41,7 @@ define void @extract_2xi64(ptr %src, ptr %dst) nounwind {
4441
; CHECK-LABEL: extract_2xi64:
4542
; CHECK: # %bb.0:
4643
; CHECK-NEXT: vld $vr0, $a0, 0
47-
; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1
48-
; CHECK-NEXT: st.d $a0, $a1, 0
44+
; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 1
4945
; CHECK-NEXT: ret
5046
%v = load volatile <2 x i64>, ptr %src
5147
%e = extractelement <2 x i64> %v, i32 1
@@ -57,8 +53,7 @@ define void @extract_4xfloat(ptr %src, ptr %dst) nounwind {
5753
; CHECK-LABEL: extract_4xfloat:
5854
; CHECK: # %bb.0:
5955
; CHECK-NEXT: vld $vr0, $a0, 0
60-
; CHECK-NEXT: vreplvei.w $vr0, $vr0, 1
61-
; CHECK-NEXT: fst.s $fa0, $a1, 0
56+
; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 1
6257
; CHECK-NEXT: ret
6358
%v = load volatile <4 x float>, ptr %src
6459
%e = extractelement <4 x float> %v, i32 1
@@ -70,8 +65,7 @@ define void @extract_2xdouble(ptr %src, ptr %dst) nounwind {
7065
; CHECK-LABEL: extract_2xdouble:
7166
; CHECK: # %bb.0:
7267
; CHECK-NEXT: vld $vr0, $a0, 0
73-
; CHECK-NEXT: vreplvei.d $vr0, $vr0, 1
74-
; CHECK-NEXT: fst.d $fa0, $a1, 0
68+
; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 1
7569
; CHECK-NEXT: ret
7670
%v = load volatile <2 x double>, ptr %src
7771
%e = extractelement <2 x double> %v, i32 1
@@ -168,3 +162,18 @@ define void @extract_2xdouble_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
168162
store double %e, ptr %dst
169163
ret void
170164
}
165+
166+
define void @eliminate_frame_index(<4 x i32> %a) nounwind {
167+
; CHECK-LABEL: eliminate_frame_index:
168+
; CHECK: # %bb.0:
169+
; CHECK-NEXT: addi.d $sp, $sp, -1040
170+
; CHECK-NEXT: addi.d $a0, $sp, 524
171+
; CHECK-NEXT: vstelm.w $vr0, $a0, 0, 1
172+
; CHECK-NEXT: addi.d $sp, $sp, 1040
173+
; CHECK-NEXT: ret
174+
%1 = alloca [64 x [4 x i32]]
175+
%2 = getelementptr i8, ptr %1, i64 508
176+
%b = extractelement <4 x i32> %a, i64 1
177+
store i32 %b, ptr %2
178+
ret void
179+
}

llvm/test/CodeGen/LoongArch/lsx/vec-trunc.ll

Lines changed: 10 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,7 @@ define void @load_trunc_2i64_to_2i32(ptr %ptr, ptr %dst) nounwind {
66
; CHECK: # %bb.0:
77
; CHECK-NEXT: vld $vr0, $a0, 0
88
; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 8
9-
; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 0
10-
; CHECK-NEXT: st.d $a0, $a1, 0
9+
; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0
1110
; CHECK-NEXT: ret
1211
%a = load <2 x i64>, ptr %ptr
1312
%trunc = trunc <2 x i64> %a to <2 x i32>
@@ -22,8 +21,7 @@ define void @load_trunc_2i64_to_2i16(ptr %ptr, ptr %dst) nounwind {
2221
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0)
2322
; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI1_0)
2423
; CHECK-NEXT: vshuf.h $vr1, $vr0, $vr0
25-
; CHECK-NEXT: vpickve2gr.w $a0, $vr1, 0
26-
; CHECK-NEXT: st.w $a0, $a1, 0
24+
; CHECK-NEXT: vstelm.w $vr1, $a1, 0, 0
2725
; CHECK-NEXT: ret
2826
%a = load <2 x i64>, ptr %ptr
2927
%trunc = trunc <2 x i64> %a to <2 x i16>
@@ -38,8 +36,7 @@ define void @load_trunc_2i64_to_2i8(ptr %ptr, ptr %dst) nounwind {
3836
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0)
3937
; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI2_0)
4038
; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1
41-
; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0
42-
; CHECK-NEXT: st.h $a0, $a1, 0
39+
; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
4340
; CHECK-NEXT: ret
4441
%a = load <2 x i64>, ptr %ptr
4542
%trunc = trunc <2 x i64> %a to <2 x i8>
@@ -52,8 +49,7 @@ define void @load_trunc_4i32_to_4i16(ptr %ptr, ptr %dst) nounwind {
5249
; CHECK: # %bb.0:
5350
; CHECK-NEXT: vld $vr0, $a0, 0
5451
; CHECK-NEXT: vpickev.h $vr0, $vr0, $vr0
55-
; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 0
56-
; CHECK-NEXT: st.d $a0, $a1, 0
52+
; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0
5753
; CHECK-NEXT: ret
5854
%a = load <4 x i32>, ptr %ptr
5955
%trunc = trunc <4 x i32> %a to <4 x i16>
@@ -68,8 +64,7 @@ define void @load_trunc_4i32_to_4i8(ptr %ptr, ptr %dst) nounwind {
6864
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0)
6965
; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI4_0)
7066
; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1
71-
; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 0
72-
; CHECK-NEXT: st.w $a0, $a1, 0
67+
; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
7368
; CHECK-NEXT: ret
7469
%a = load <4 x i32>, ptr %ptr
7570
%trunc = trunc <4 x i32> %a to <4 x i8>
@@ -82,8 +77,7 @@ define void @load_trunc_8i16_to_8i8(ptr %ptr, ptr %dst) nounwind {
8277
; CHECK: # %bb.0:
8378
; CHECK-NEXT: vld $vr0, $a0, 0
8479
; CHECK-NEXT: vpickev.b $vr0, $vr0, $vr0
85-
; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 0
86-
; CHECK-NEXT: st.d $a0, $a1, 0
80+
; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0
8781
; CHECK-NEXT: ret
8882
%a = load <8 x i16>, ptr %ptr
8983
%trunc = trunc <8 x i16> %a to <8 x i8>
@@ -97,8 +91,7 @@ define void @load_trunc_2i32_to_2i16(ptr %ptr, ptr %dst) nounwind {
9791
; CHECK-NEXT: ld.d $a0, $a0, 0
9892
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
9993
; CHECK-NEXT: vshuf4i.h $vr0, $vr0, 8
100-
; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 0
101-
; CHECK-NEXT: st.w $a0, $a1, 0
94+
; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
10295
; CHECK-NEXT: ret
10396
%a = load <2 x i32>, ptr %ptr
10497
%trunc = trunc <2 x i32> %a to <2 x i16>
@@ -114,8 +107,7 @@ define void @load_trunc_2i32_to_2i8(ptr %ptr, ptr %dst) nounwind {
114107
; CHECK-NEXT: vld $vr0, $a2, %pc_lo12(.LCPI7_0)
115108
; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 0
116109
; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr1, $vr0
117-
; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0
118-
; CHECK-NEXT: st.h $a0, $a1, 0
110+
; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
119111
; CHECK-NEXT: ret
120112
%a = load <2 x i32>, ptr %ptr
121113
%trunc = trunc <2 x i32> %a to <2 x i8>
@@ -129,8 +121,7 @@ define void @load_trunc_4i16_to_4i8(ptr %ptr, ptr %dst) nounwind {
129121
; CHECK-NEXT: ld.d $a0, $a0, 0
130122
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
131123
; CHECK-NEXT: vpickev.b $vr0, $vr0, $vr0
132-
; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 0
133-
; CHECK-NEXT: st.w $a0, $a1, 0
124+
; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
134125
; CHECK-NEXT: ret
135126
%a = load <4 x i16>, ptr %ptr
136127
%trunc = trunc <4 x i16> %a to <4 x i8>
@@ -144,8 +135,7 @@ define void @load_trunc_2i16_to_2i8(ptr %ptr, ptr %dst) nounwind {
144135
; CHECK-NEXT: ld.w $a0, $a0, 0
145136
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
146137
; CHECK-NEXT: vshuf4i.b $vr0, $vr0, 8
147-
; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0
148-
; CHECK-NEXT: st.h $a0, $a1, 0
138+
; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
149139
; CHECK-NEXT: ret
150140
%a = load <2 x i16>, ptr %ptr
151141
%trunc = trunc <2 x i16> %a to <2 x i8>

0 commit comments

Comments
 (0)