Skip to content

Commit a108881

Browse files
authored
[LoongArch] Custom lowering for vector logical right shifts of integers (#171097)
After PR #169491, the DAG combiner can still recreate vector UDIV in an illegal type even after type legalization, which is the root cause of issue #170976. The optimization introduced in PR #169491 is still desirable, so this patch adds custom lowering for vector integer logical right shifts to prevent the DAG from producing nodes with illegal types. Fixes #170976
1 parent 62dbe57 commit a108881

File tree

10 files changed

+251
-60
lines changed

10 files changed

+251
-60
lines changed

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 45 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -338,7 +338,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
338338
setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM},
339339
VT, Legal);
340340
setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal);
341-
setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal);
341+
setOperationAction({ISD::SHL, ISD::SRA}, VT, Legal);
342342
setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal);
343343
setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal);
344344
setCondCodeAction(
@@ -354,6 +354,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
354354
setOperationAction(ISD::USUBSAT, VT, Legal);
355355
setOperationAction(ISD::ROTL, VT, Custom);
356356
setOperationAction(ISD::ROTR, VT, Custom);
357+
setOperationAction(ISD::SRL, VT, Custom);
357358
}
358359
for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
359360
setOperationAction(ISD::BITREVERSE, VT, Custom);
@@ -427,7 +428,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
427428
setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM},
428429
VT, Legal);
429430
setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal);
430-
setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal);
431+
setOperationAction({ISD::SHL, ISD::SRA}, VT, Legal);
431432
setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal);
432433
setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal);
433434
setCondCodeAction(
@@ -444,6 +445,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
444445
setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
445446
setOperationAction(ISD::ROTL, VT, Custom);
446447
setOperationAction(ISD::ROTR, VT, Custom);
448+
setOperationAction(ISD::SRL, VT, Custom);
447449
}
448450
for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})
449451
setOperationAction(ISD::BITREVERSE, VT, Custom);
@@ -618,10 +620,51 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
618620
return lowerVECREDUCE(Op, DAG);
619621
case ISD::ConstantFP:
620622
return lowerConstantFP(Op, DAG);
623+
case ISD::SRL:
624+
return lowerVectorSRL(Op, DAG);
621625
}
622626
return SDValue();
623627
}
624628

629+
/// getVShiftAmt - Check if this is a valid build_vector for the immediate
630+
/// operand of a vector shift operation, where all the elements of the
631+
/// build_vector must have the same constant integer value.
632+
static bool getVShiftAmt(SDValue Op, unsigned ElementBits, int64_t &Amt) {
633+
// Ignore bit_converts.
634+
while (Op.getOpcode() == ISD::BITCAST)
635+
Op = Op.getOperand(0);
636+
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
637+
APInt SplatBits, SplatUndef;
638+
unsigned SplatBitSize;
639+
bool HasAnyUndefs;
640+
if (!BVN ||
641+
!BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
642+
ElementBits) ||
643+
SplatBitSize > ElementBits)
644+
return false;
645+
Amt = SplatBits.getSExtValue();
646+
return true;
647+
}
648+
649+
SDValue LoongArchTargetLowering::lowerVectorSRL(SDValue Op,
650+
SelectionDAG &DAG) const {
651+
EVT VT = Op.getValueType();
652+
SDLoc DL(Op);
653+
int64_t Amt;
654+
655+
if (!Op.getOperand(1).getValueType().isVector())
656+
return Op;
657+
unsigned EltSize = VT.getScalarSizeInBits();
658+
MVT GRLenVT = Subtarget.getGRLenVT();
659+
660+
assert(Op.getOpcode() == ISD::SRL && "unexpected shift opcode");
661+
if (getVShiftAmt(Op.getOperand(1), EltSize, Amt) && Amt >= 0 && Amt < EltSize)
662+
return DAG.getNode(LoongArchISD::VSRLI, DL, VT, Op.getOperand(0),
663+
DAG.getConstant(Amt, DL, GRLenVT));
664+
return DAG.getNode(LoongArchISD::VSRL, DL, VT, Op.getOperand(0),
665+
Op.getOperand(1));
666+
}
667+
625668
// Helper to attempt to return a cheaper, bit-inverted version of \p V.
626669
static SDValue isNOT(SDValue V, SelectionDAG &DAG) {
627670
// TODO: don't always ignore oneuse constraints.

llvm/lib/Target/LoongArch/LoongArchISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,7 @@ class LoongArchTargetLowering : public TargetLowering {
240240
SDValue lowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
241241
SDValue lowerConstantFP(SDValue Op, SelectionDAG &DAG) const;
242242
SDValue lowerRotate(SDValue Op, SelectionDAG &DAG) const;
243+
SDValue lowerVectorSRL(SDValue Op, SelectionDAG &DAG) const;
243244

244245
bool isFPImmLegal(const APFloat &Imm, EVT VT,
245246
bool ForCodeSize) const override;

llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1437,9 +1437,8 @@ defm : PatShiftXrSplatUimm<shl, "XVSLLI">;
14371437
defm : PatShiftXrUimm<loongarch_vslli, "XVSLLI">;
14381438

14391439
// XVSRL[I]_{B/H/W/D}
1440-
defm : PatXrXr<srl, "XVSRL">;
1441-
defm : PatShiftXrXr<srl, "XVSRL">;
1442-
defm : PatShiftXrSplatUimm<srl, "XVSRLI">;
1440+
defm : PatXrXr<loongarch_vsrl, "XVSRL">;
1441+
defm : PatShiftXrXr<loongarch_vsrl, "XVSRL">;
14431442
defm : PatShiftXrUimm<loongarch_vsrli, "XVSRLI">;
14441443

14451444
// XVSRA[I]_{B/H/W/D}
@@ -2045,18 +2044,18 @@ defm : VAvgPat<sra, "XVAVG_B", v32i8>;
20452044
defm : VAvgPat<sra, "XVAVG_H", v16i16>;
20462045
defm : VAvgPat<sra, "XVAVG_W", v8i32>;
20472046
defm : VAvgPat<sra, "XVAVG_D", v4i64>;
2048-
defm : VAvgPat<srl, "XVAVG_BU", v32i8>;
2049-
defm : VAvgPat<srl, "XVAVG_HU", v16i16>;
2050-
defm : VAvgPat<srl, "XVAVG_WU", v8i32>;
2051-
defm : VAvgPat<srl, "XVAVG_DU", v4i64>;
2047+
defm : VAvgIPat<loongarch_vsrli, "XVAVG_BU", v32i8>;
2048+
defm : VAvgIPat<loongarch_vsrli, "XVAVG_HU", v16i16>;
2049+
defm : VAvgIPat<loongarch_vsrli, "XVAVG_WU", v8i32>;
2050+
defm : VAvgIPat<loongarch_vsrli, "XVAVG_DU", v4i64>;
20522051
defm : VAvgrPat<sra, "XVAVGR_B", v32i8>;
20532052
defm : VAvgrPat<sra, "XVAVGR_H", v16i16>;
20542053
defm : VAvgrPat<sra, "XVAVGR_W", v8i32>;
20552054
defm : VAvgrPat<sra, "XVAVGR_D", v4i64>;
2056-
defm : VAvgrPat<srl, "XVAVGR_BU", v32i8>;
2057-
defm : VAvgrPat<srl, "XVAVGR_HU", v16i16>;
2058-
defm : VAvgrPat<srl, "XVAVGR_WU", v8i32>;
2059-
defm : VAvgrPat<srl, "XVAVGR_DU", v4i64>;
2055+
defm : VAvgrIPat<loongarch_vsrli, "XVAVGR_BU", v32i8>;
2056+
defm : VAvgrIPat<loongarch_vsrli, "XVAVGR_HU", v16i16>;
2057+
defm : VAvgrIPat<loongarch_vsrli, "XVAVGR_WU", v8i32>;
2058+
defm : VAvgrIPat<loongarch_vsrli, "XVAVGR_DU", v4i64>;
20602059

20612060
// abs
20622061
def : Pat<(abs v32i8:$xj), (XVSIGNCOV_B v32i8:$xj, v32i8:$xj)>;

llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td

Lines changed: 25 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,9 @@ def loongarch_vreplgr2vr: SDNode<"LoongArchISD::VREPLGR2VR", SDT_LoongArchVreplg
7272
def loongarch_vfrecipe: SDNode<"LoongArchISD::FRECIPE", SDT_LoongArchVFRECIPE>;
7373
def loongarch_vfrsqrte: SDNode<"LoongArchISD::FRSQRTE", SDT_LoongArchVFRSQRTE>;
7474

75+
// Vector logicial right shift
76+
def loongarch_vsrl : SDNode<"LoongArchISD::VSRL", SDT_LoongArchV2R>;
77+
7578
// Vector logicial left / right shift by immediate
7679
def loongarch_vslli : SDNode<"LoongArchISD::VSLLI", SDT_LoongArchV1RUimm>;
7780
def loongarch_vsrli : SDNode<"LoongArchISD::VSRLI", SDT_LoongArchV1RUimm>;
@@ -1535,13 +1538,25 @@ multiclass VAvgPat<SDPatternOperator OpNode, string Inst, ValueType vt> {
15351538
(!cast<LAInst>(Inst) vt:$vj, vt:$vk)>;
15361539
}
15371540

1541+
multiclass VAvgIPat<SDPatternOperator OpNode, string Inst, ValueType vt> {
1542+
def : Pat<(OpNode (vt (add vt:$vj, vt:$vk)), (GRLenVT 1)),
1543+
(!cast<LAInst>(Inst) vt:$vj, vt:$vk)>;
1544+
}
1545+
15381546
multiclass VAvgrPat<SDPatternOperator OpNode, string Inst, ValueType vt> {
15391547
def : Pat<(OpNode (vt (add (vt (add vt:$vj, vt:$vk)),
15401548
(vt (vsplat_imm_eq_1)))),
15411549
(vt (vsplat_imm_eq_1))),
15421550
(!cast<LAInst>(Inst) vt:$vj, vt:$vk)>;
15431551
}
15441552

1553+
multiclass VAvgrIPat<SDPatternOperator OpNode, string Inst, ValueType vt> {
1554+
def : Pat<(OpNode (vt (add (vt (add vt:$vj, vt:$vk)),
1555+
(vt (vsplat_imm_eq_1)))),
1556+
(GRLenVT 1)),
1557+
(!cast<LAInst>(Inst) vt:$vj, vt:$vk)>;
1558+
}
1559+
15451560
let Predicates = [HasExtLSX] in {
15461561

15471562
// VADD_{B/H/W/D}
@@ -1647,9 +1662,8 @@ defm : PatShiftVrSplatUimm<shl, "VSLLI">;
16471662
defm : PatShiftVrUimm<loongarch_vslli, "VSLLI">;
16481663

16491664
// VSRL[I]_{B/H/W/D}
1650-
defm : PatVrVr<srl, "VSRL">;
1651-
defm : PatShiftVrVr<srl, "VSRL">;
1652-
defm : PatShiftVrSplatUimm<srl, "VSRLI">;
1665+
defm : PatVrVr<loongarch_vsrl, "VSRL">;
1666+
defm : PatShiftVrVr<loongarch_vsrl, "VSRL">;
16531667
defm : PatShiftVrUimm<loongarch_vsrli, "VSRLI">;
16541668

16551669
// VSRA[I]_{B/H/W/D}
@@ -2195,18 +2209,18 @@ defm : VAvgPat<sra, "VAVG_B", v16i8>;
21952209
defm : VAvgPat<sra, "VAVG_H", v8i16>;
21962210
defm : VAvgPat<sra, "VAVG_W", v4i32>;
21972211
defm : VAvgPat<sra, "VAVG_D", v2i64>;
2198-
defm : VAvgPat<srl, "VAVG_BU", v16i8>;
2199-
defm : VAvgPat<srl, "VAVG_HU", v8i16>;
2200-
defm : VAvgPat<srl, "VAVG_WU", v4i32>;
2201-
defm : VAvgPat<srl, "VAVG_DU", v2i64>;
2212+
defm : VAvgIPat<loongarch_vsrli, "VAVG_BU", v16i8>;
2213+
defm : VAvgIPat<loongarch_vsrli, "VAVG_HU", v8i16>;
2214+
defm : VAvgIPat<loongarch_vsrli, "VAVG_WU", v4i32>;
2215+
defm : VAvgIPat<loongarch_vsrli, "VAVG_DU", v2i64>;
22022216
defm : VAvgrPat<sra, "VAVGR_B", v16i8>;
22032217
defm : VAvgrPat<sra, "VAVGR_H", v8i16>;
22042218
defm : VAvgrPat<sra, "VAVGR_W", v4i32>;
22052219
defm : VAvgrPat<sra, "VAVGR_D", v2i64>;
2206-
defm : VAvgrPat<srl, "VAVGR_BU", v16i8>;
2207-
defm : VAvgrPat<srl, "VAVGR_HU", v8i16>;
2208-
defm : VAvgrPat<srl, "VAVGR_WU", v4i32>;
2209-
defm : VAvgrPat<srl, "VAVGR_DU", v2i64>;
2220+
defm : VAvgrIPat<loongarch_vsrli, "VAVGR_BU", v16i8>;
2221+
defm : VAvgrIPat<loongarch_vsrli, "VAVGR_HU", v8i16>;
2222+
defm : VAvgrIPat<loongarch_vsrli, "VAVGR_WU", v4i32>;
2223+
defm : VAvgrIPat<loongarch_vsrli, "VAVGR_DU", v2i64>;
22102224

22112225
// abs
22122226
def : Pat<(abs v16i8:$vj), (VSIGNCOV_B v16i8:$vj, v16i8:$vj)>;

llvm/test/CodeGen/LoongArch/lasx/bitreverse.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ define <32 x i8> @test_bitreverse_v32i8(<32 x i8> %a) nounwind {
1111
; LA32: # %bb.0:
1212
; LA32-NEXT: xvslli.b $xr1, $xr0, 4
1313
; LA32-NEXT: xvsrli.b $xr0, $xr0, 4
14+
; LA32-NEXT: xvandi.b $xr0, $xr0, 15
1415
; LA32-NEXT: xvor.v $xr0, $xr0, $xr1
1516
; LA32-NEXT: xvandi.b $xr1, $xr0, 51
1617
; LA32-NEXT: xvslli.b $xr1, $xr1, 2
@@ -163,6 +164,7 @@ define <4 x i64> @test_bitreverse_v4i64(<4 x i64> %a) nounwind {
163164
; LA32-NEXT: xvshuf.b $xr0, $xr0, $xr0, $xr1
164165
; LA32-NEXT: xvslli.b $xr1, $xr0, 4
165166
; LA32-NEXT: xvsrli.b $xr0, $xr0, 4
167+
; LA32-NEXT: xvandi.b $xr0, $xr0, 15
166168
; LA32-NEXT: xvor.v $xr0, $xr0, $xr1
167169
; LA32-NEXT: xvandi.b $xr1, $xr0, 51
168170
; LA32-NEXT: xvslli.b $xr1, $xr1, 2

llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll

Lines changed: 9 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -131,22 +131,13 @@ entry:
131131
}
132132

133133
define void @xvavg_du(ptr %res, ptr %a, ptr %b) nounwind {
134-
; LA32-LABEL: xvavg_du:
135-
; LA32: # %bb.0: # %entry
136-
; LA32-NEXT: xvld $xr0, $a1, 0
137-
; LA32-NEXT: xvld $xr1, $a2, 0
138-
; LA32-NEXT: xvadd.d $xr0, $xr0, $xr1
139-
; LA32-NEXT: xvsrli.d $xr0, $xr0, 1
140-
; LA32-NEXT: xvst $xr0, $a0, 0
141-
; LA32-NEXT: ret
142-
;
143-
; LA64-LABEL: xvavg_du:
144-
; LA64: # %bb.0: # %entry
145-
; LA64-NEXT: xvld $xr0, $a1, 0
146-
; LA64-NEXT: xvld $xr1, $a2, 0
147-
; LA64-NEXT: xvavg.du $xr0, $xr0, $xr1
148-
; LA64-NEXT: xvst $xr0, $a0, 0
149-
; LA64-NEXT: ret
134+
; CHECK-LABEL: xvavg_du:
135+
; CHECK: # %bb.0: # %entry
136+
; CHECK-NEXT: xvld $xr0, $a1, 0
137+
; CHECK-NEXT: xvld $xr1, $a2, 0
138+
; CHECK-NEXT: xvavg.du $xr0, $xr0, $xr1
139+
; CHECK-NEXT: xvst $xr0, $a0, 0
140+
; CHECK-NEXT: ret
150141
entry:
151142
%va = load <4 x i64>, ptr %a
152143
%vb = load <4 x i64>, ptr %b
@@ -298,8 +289,8 @@ define void @xvavgr_du(ptr %res, ptr %a, ptr %b) nounwind {
298289
; LA32-NEXT: xvld $xr0, $a1, 0
299290
; LA32-NEXT: xvld $xr1, $a2, 0
300291
; LA32-NEXT: xvadd.d $xr0, $xr0, $xr1
301-
; LA32-NEXT: xvaddi.du $xr0, $xr0, 1
302-
; LA32-NEXT: xvsrli.d $xr0, $xr0, 1
292+
; LA32-NEXT: xvrepli.d $xr1, 1
293+
; LA32-NEXT: xvavg.du $xr0, $xr0, $xr1
303294
; LA32-NEXT: xvst $xr0, $a0, 0
304295
; LA32-NEXT: ret
305296
;
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32
3+
; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64
4+
5+
define <64 x i8> @test_i8(<64 x i8> %shuffle) {
6+
; CHECK-LABEL: test_i8:
7+
; CHECK: # %bb.0: # %entry
8+
; CHECK-NEXT: xvrepli.b $xr2, -85
9+
; CHECK-NEXT: xvmuh.bu $xr0, $xr0, $xr2
10+
; CHECK-NEXT: xvsrli.b $xr0, $xr0, 1
11+
; CHECK-NEXT: xvmuh.bu $xr1, $xr1, $xr2
12+
; CHECK-NEXT: xvsrli.b $xr1, $xr1, 1
13+
; CHECK-NEXT: ret
14+
entry:
15+
%div = udiv <64 x i8> %shuffle, splat (i8 3)
16+
ret <64 x i8> %div
17+
}
18+
19+
define <32 x i16> @test_i16(<32 x i16> %shuffle) {
20+
; CHECK-LABEL: test_i16:
21+
; CHECK: # %bb.0: # %entry
22+
; CHECK-NEXT: lu12i.w $a0, 10
23+
; CHECK-NEXT: ori $a0, $a0, 2731
24+
; CHECK-NEXT: xvreplgr2vr.h $xr2, $a0
25+
; CHECK-NEXT: xvmuh.hu $xr0, $xr0, $xr2
26+
; CHECK-NEXT: xvsrli.h $xr0, $xr0, 1
27+
; CHECK-NEXT: xvmuh.hu $xr1, $xr1, $xr2
28+
; CHECK-NEXT: xvsrli.h $xr1, $xr1, 1
29+
; CHECK-NEXT: ret
30+
entry:
31+
%div = udiv <32 x i16> %shuffle, splat (i16 3)
32+
ret <32 x i16> %div
33+
}
34+
35+
define <16 x i32> @test_i32(<16 x i32> %shuffle) {
36+
; CHECK-LABEL: test_i32:
37+
; CHECK: # %bb.0: # %entry
38+
; CHECK-NEXT: lu12i.w $a0, -349526
39+
; CHECK-NEXT: ori $a0, $a0, 2731
40+
; CHECK-NEXT: xvreplgr2vr.w $xr2, $a0
41+
; CHECK-NEXT: xvmuh.wu $xr0, $xr0, $xr2
42+
; CHECK-NEXT: xvsrli.w $xr0, $xr0, 1
43+
; CHECK-NEXT: xvmuh.wu $xr1, $xr1, $xr2
44+
; CHECK-NEXT: xvsrli.w $xr1, $xr1, 1
45+
; CHECK-NEXT: ret
46+
entry:
47+
%div = udiv <16 x i32> %shuffle, splat (i32 3)
48+
ret <16 x i32> %div
49+
}
50+
51+
define <8 x i64> @test_i64(<8 x i64> %shuffle) {
52+
; LA32-LABEL: test_i64:
53+
; LA32: # %bb.0: # %entry
54+
; LA32-NEXT: xvrepli.d $xr2, 3
55+
; LA32-NEXT: xvdiv.du $xr0, $xr0, $xr2
56+
; LA32-NEXT: xvdiv.du $xr1, $xr1, $xr2
57+
; LA32-NEXT: ret
58+
;
59+
; LA64-LABEL: test_i64:
60+
; LA64: # %bb.0: # %entry
61+
; LA64-NEXT: lu12i.w $a0, -349526
62+
; LA64-NEXT: ori $a0, $a0, 2731
63+
; LA64-NEXT: lu32i.d $a0, -349526
64+
; LA64-NEXT: lu52i.d $a0, $a0, -1366
65+
; LA64-NEXT: xvreplgr2vr.d $xr2, $a0
66+
; LA64-NEXT: xvmuh.du $xr0, $xr0, $xr2
67+
; LA64-NEXT: xvsrli.d $xr0, $xr0, 1
68+
; LA64-NEXT: xvmuh.du $xr1, $xr1, $xr2
69+
; LA64-NEXT: xvsrli.d $xr1, $xr1, 1
70+
; LA64-NEXT: ret
71+
entry:
72+
%div = udiv <8 x i64> %shuffle, splat (i64 3)
73+
ret <8 x i64> %div
74+
}

llvm/test/CodeGen/LoongArch/lsx/bitreverse.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ define <16 x i8> @test_bitreverse_v16i8(<16 x i8> %a) nounwind {
1111
; LA32: # %bb.0:
1212
; LA32-NEXT: vslli.b $vr1, $vr0, 4
1313
; LA32-NEXT: vsrli.b $vr0, $vr0, 4
14+
; LA32-NEXT: vandi.b $vr0, $vr0, 15
1415
; LA32-NEXT: vor.v $vr0, $vr0, $vr1
1516
; LA32-NEXT: vandi.b $vr1, $vr0, 51
1617
; LA32-NEXT: vslli.b $vr1, $vr1, 2
@@ -116,6 +117,7 @@ define <2 x i64> @test_bitreverse_v2i64(<2 x i64> %a) nounwind {
116117
; LA32-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1
117118
; LA32-NEXT: vslli.b $vr1, $vr0, 4
118119
; LA32-NEXT: vsrli.b $vr0, $vr0, 4
120+
; LA32-NEXT: vandi.b $vr0, $vr0, 15
119121
; LA32-NEXT: vor.v $vr0, $vr0, $vr1
120122
; LA32-NEXT: vandi.b $vr1, $vr0, 51
121123
; LA32-NEXT: vslli.b $vr1, $vr1, 2

llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avg.ll

Lines changed: 9 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -131,22 +131,13 @@ entry:
131131
}
132132

133133
define void @vavg_du(ptr %res, ptr %a, ptr %b) nounwind {
134-
; LA32-LABEL: vavg_du:
135-
; LA32: # %bb.0: # %entry
136-
; LA32-NEXT: vld $vr0, $a1, 0
137-
; LA32-NEXT: vld $vr1, $a2, 0
138-
; LA32-NEXT: vadd.d $vr0, $vr0, $vr1
139-
; LA32-NEXT: vsrli.d $vr0, $vr0, 1
140-
; LA32-NEXT: vst $vr0, $a0, 0
141-
; LA32-NEXT: ret
142-
;
143-
; LA64-LABEL: vavg_du:
144-
; LA64: # %bb.0: # %entry
145-
; LA64-NEXT: vld $vr0, $a1, 0
146-
; LA64-NEXT: vld $vr1, $a2, 0
147-
; LA64-NEXT: vavg.du $vr0, $vr0, $vr1
148-
; LA64-NEXT: vst $vr0, $a0, 0
149-
; LA64-NEXT: ret
134+
; CHECK-LABEL: vavg_du:
135+
; CHECK: # %bb.0: # %entry
136+
; CHECK-NEXT: vld $vr0, $a1, 0
137+
; CHECK-NEXT: vld $vr1, $a2, 0
138+
; CHECK-NEXT: vavg.du $vr0, $vr0, $vr1
139+
; CHECK-NEXT: vst $vr0, $a0, 0
140+
; CHECK-NEXT: ret
150141
entry:
151142
%va = load <2 x i64>, ptr %a
152143
%vb = load <2 x i64>, ptr %b
@@ -298,8 +289,8 @@ define void @vavgr_du(ptr %res, ptr %a, ptr %b) nounwind {
298289
; LA32-NEXT: vld $vr0, $a1, 0
299290
; LA32-NEXT: vld $vr1, $a2, 0
300291
; LA32-NEXT: vadd.d $vr0, $vr0, $vr1
301-
; LA32-NEXT: vaddi.du $vr0, $vr0, 1
302-
; LA32-NEXT: vsrli.d $vr0, $vr0, 1
292+
; LA32-NEXT: vrepli.d $vr1, 1
293+
; LA32-NEXT: vavg.du $vr0, $vr0, $vr1
303294
; LA32-NEXT: vst $vr0, $a0, 0
304295
; LA32-NEXT: ret
305296
;

0 commit comments

Comments
 (0)