Skip to content

Commit bd0769e

Browse files
authored
[LoongArch] Make rotl/rotr custom for lsx/lasx (#161154)
1 parent 1e7efca commit bd0769e

File tree

6 files changed

+140
-142
lines changed

6 files changed

+140
-142
lines changed

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
352352
setOperationAction(ISD::SSUBSAT, VT, Legal);
353353
setOperationAction(ISD::UADDSAT, VT, Legal);
354354
setOperationAction(ISD::USUBSAT, VT, Legal);
355+
setOperationAction(ISD::ROTL, VT, Custom);
356+
setOperationAction(ISD::ROTR, VT, Custom);
355357
}
356358
for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
357359
setOperationAction(ISD::BITREVERSE, VT, Custom);
@@ -440,6 +442,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
440442
setOperationAction(ISD::UADDSAT, VT, Legal);
441443
setOperationAction(ISD::USUBSAT, VT, Legal);
442444
setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
445+
setOperationAction(ISD::ROTL, VT, Custom);
446+
setOperationAction(ISD::ROTR, VT, Custom);
443447
}
444448
for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})
445449
setOperationAction(ISD::BITREVERSE, VT, Custom);
@@ -601,6 +605,9 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
601605
return lowerBF16_TO_FP(Op, DAG);
602606
case ISD::VECREDUCE_ADD:
603607
return lowerVECREDUCE_ADD(Op, DAG);
608+
case ISD::ROTL:
609+
case ISD::ROTR:
610+
return lowerRotate(Op, DAG);
604611
case ISD::VECREDUCE_AND:
605612
case ISD::VECREDUCE_OR:
606613
case ISD::VECREDUCE_XOR:
@@ -827,6 +834,58 @@ SDValue LoongArchTargetLowering::lowerPREFETCH(SDValue Op,
827834
return Op;
828835
}
829836

837+
SDValue LoongArchTargetLowering::lowerRotate(SDValue Op,
838+
SelectionDAG &DAG) const {
839+
MVT VT = Op.getSimpleValueType();
840+
assert(VT.isVector() && "Unexpected type");
841+
842+
SDLoc DL(Op);
843+
SDValue R = Op.getOperand(0);
844+
SDValue Amt = Op.getOperand(1);
845+
unsigned Opcode = Op.getOpcode();
846+
unsigned EltSizeInBits = VT.getScalarSizeInBits();
847+
848+
auto checkCstSplat = [](SDValue V, APInt &CstSplatValue) {
849+
if (V.getOpcode() != ISD::BUILD_VECTOR)
850+
return false;
851+
if (SDValue SplatValue =
852+
cast<BuildVectorSDNode>(V.getNode())->getSplatValue()) {
853+
if (auto *C = dyn_cast<ConstantSDNode>(SplatValue)) {
854+
CstSplatValue = C->getAPIntValue();
855+
return true;
856+
}
857+
}
858+
return false;
859+
};
860+
861+
// Check for constant splat rotation amount.
862+
APInt CstSplatValue;
863+
bool IsCstSplat = checkCstSplat(Amt, CstSplatValue);
864+
bool isROTL = Opcode == ISD::ROTL;
865+
866+
// Check for splat rotate by zero.
867+
if (IsCstSplat && CstSplatValue.urem(EltSizeInBits) == 0)
868+
return R;
869+
870+
// LoongArch targets always prefer ISD::ROTR.
871+
if (isROTL) {
872+
SDValue Zero = DAG.getConstant(0, DL, VT);
873+
return DAG.getNode(ISD::ROTR, DL, VT, R,
874+
DAG.getNode(ISD::SUB, DL, VT, Zero, Amt));
875+
}
876+
877+
// Rotate by a immediate.
878+
if (IsCstSplat) {
879+
// ISD::ROTR: Attemp to rotate by a positive immediate.
880+
SDValue Bits = DAG.getConstant(EltSizeInBits, DL, VT);
881+
if (SDValue Urem =
882+
DAG.FoldConstantArithmetic(ISD::UREM, DL, VT, {Amt, Bits}))
883+
return DAG.getNode(Opcode, DL, VT, R, Urem);
884+
}
885+
886+
return Op;
887+
}
888+
830889
// Return true if Val is equal to (setcc LHS, RHS, CC).
831890
// Return false if Val is the inverse of (setcc LHS, RHS, CC).
832891
// Otherwise, return std::nullopt.

llvm/lib/Target/LoongArch/LoongArchISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,7 @@ class LoongArchTargetLowering : public TargetLowering {
239239
SDValue lowerVECREDUCE_ADD(SDValue Op, SelectionDAG &DAG) const;
240240
SDValue lowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
241241
SDValue lowerConstantFP(SDValue Op, SelectionDAG &DAG) const;
242+
SDValue lowerRotate(SDValue Op, SelectionDAG &DAG) const;
242243

243244
bool isFPImmLegal(const APFloat &Imm, EVT VT,
244245
bool ForCodeSize) const override;

llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1447,6 +1447,11 @@ defm : PatXrXr<sra, "XVSRA">;
14471447
defm : PatShiftXrXr<sra, "XVSRA">;
14481448
defm : PatShiftXrSplatUimm<sra, "XVSRAI">;
14491449

1450+
// XVROTR[I]_{B/H/W/D}
1451+
defm : PatXrXr<rotr, "XVROTR">;
1452+
defm : PatShiftXrXr<rotr, "XVROTR">;
1453+
defm : PatShiftXrSplatUimm<rotr, "XVROTRI">;
1454+
14501455
// XVCLZ_{B/H/W/D}
14511456
defm : PatXr<ctlz, "XVCLZ">;
14521457

llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1657,6 +1657,11 @@ defm : PatVrVr<sra, "VSRA">;
16571657
defm : PatShiftVrVr<sra, "VSRA">;
16581658
defm : PatShiftVrSplatUimm<sra, "VSRAI">;
16591659

1660+
// VROTR[I]_{B/H/W/D}
1661+
defm : PatVrVr<rotr, "VROTR">;
1662+
defm : PatShiftVrVr<rotr, "VROTR">;
1663+
defm : PatShiftVrSplatUimm<rotr, "VROTRI">;
1664+
16601665
// VCLZ_{B/H/W/D}
16611666
defm : PatVr<ctlz, "VCLZ">;
16621667

llvm/test/CodeGen/LoongArch/lasx/rotl-rotr.ll

Lines changed: 36 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,8 @@ define void @rotl_v32i8(ptr %dst, ptr %src, i8 signext %a0) nounwind {
77
; CHECK: # %bb.0:
88
; CHECK-NEXT: xvld $xr0, $a1, 0
99
; CHECK-NEXT: xvreplgr2vr.b $xr1, $a2
10-
; CHECK-NEXT: xvrepli.b $xr2, 8
11-
; CHECK-NEXT: xvsub.b $xr2, $xr2, $xr1
12-
; CHECK-NEXT: xvsll.b $xr1, $xr0, $xr1
13-
; CHECK-NEXT: xvsrl.b $xr0, $xr0, $xr2
14-
; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0
10+
; CHECK-NEXT: xvneg.b $xr1, $xr1
11+
; CHECK-NEXT: xvrotr.b $xr0, $xr0, $xr1
1512
; CHECK-NEXT: xvst $xr0, $a0, 0
1613
; CHECK-NEXT: ret
1714
%v0 = load <32 x i8>, ptr %src
@@ -30,11 +27,7 @@ define void @rotr_v32i8(ptr %dst, ptr %src, i8 signext %a0) nounwind {
3027
; CHECK: # %bb.0:
3128
; CHECK-NEXT: xvld $xr0, $a1, 0
3229
; CHECK-NEXT: xvreplgr2vr.b $xr1, $a2
33-
; CHECK-NEXT: xvrepli.b $xr2, 8
34-
; CHECK-NEXT: xvsub.b $xr2, $xr2, $xr1
35-
; CHECK-NEXT: xvsrl.b $xr1, $xr0, $xr1
36-
; CHECK-NEXT: xvsll.b $xr0, $xr0, $xr2
37-
; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0
30+
; CHECK-NEXT: xvrotr.b $xr0, $xr0, $xr1
3831
; CHECK-NEXT: xvst $xr0, $a0, 0
3932
; CHECK-NEXT: ret
4033
%v0 = load <32 x i8>, ptr %src
@@ -52,9 +45,7 @@ define void @rotr_v32i8_imm(ptr %dst, ptr %src) nounwind {
5245
; CHECK-LABEL: rotr_v32i8_imm:
5346
; CHECK: # %bb.0:
5447
; CHECK-NEXT: xvld $xr0, $a1, 0
55-
; CHECK-NEXT: xvsrli.b $xr1, $xr0, 2
56-
; CHECK-NEXT: xvslli.b $xr0, $xr0, 6
57-
; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
48+
; CHECK-NEXT: xvrotri.b $xr0, $xr0, 2
5849
; CHECK-NEXT: xvst $xr0, $a0, 0
5950
; CHECK-NEXT: ret
6051
%v0 = load <32 x i8>, ptr %src
@@ -70,11 +61,8 @@ define void @rotl_v16i16(ptr %dst, ptr %src, i16 signext %a0) nounwind {
7061
; CHECK: # %bb.0:
7162
; CHECK-NEXT: xvld $xr0, $a1, 0
7263
; CHECK-NEXT: xvreplgr2vr.h $xr1, $a2
73-
; CHECK-NEXT: xvrepli.h $xr2, 16
74-
; CHECK-NEXT: xvsub.h $xr2, $xr2, $xr1
75-
; CHECK-NEXT: xvsll.h $xr1, $xr0, $xr1
76-
; CHECK-NEXT: xvsrl.h $xr0, $xr0, $xr2
77-
; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0
64+
; CHECK-NEXT: xvneg.h $xr1, $xr1
65+
; CHECK-NEXT: xvrotr.h $xr0, $xr0, $xr1
7866
; CHECK-NEXT: xvst $xr0, $a0, 0
7967
; CHECK-NEXT: ret
8068
%v0 = load <16 x i16>, ptr %src
@@ -93,11 +81,7 @@ define void @rotr_v16i16(ptr %dst, ptr %src, i16 signext %a0) nounwind {
9381
; CHECK: # %bb.0:
9482
; CHECK-NEXT: xvld $xr0, $a1, 0
9583
; CHECK-NEXT: xvreplgr2vr.h $xr1, $a2
96-
; CHECK-NEXT: xvrepli.h $xr2, 16
97-
; CHECK-NEXT: xvsub.h $xr2, $xr2, $xr1
98-
; CHECK-NEXT: xvsrl.h $xr1, $xr0, $xr1
99-
; CHECK-NEXT: xvsll.h $xr0, $xr0, $xr2
100-
; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0
84+
; CHECK-NEXT: xvrotr.h $xr0, $xr0, $xr1
10185
; CHECK-NEXT: xvst $xr0, $a0, 0
10286
; CHECK-NEXT: ret
10387
%v0 = load <16 x i16>, ptr %src
@@ -115,9 +99,7 @@ define void @rotr_v16i16_imm(ptr %dst, ptr %src) nounwind {
11599
; CHECK-LABEL: rotr_v16i16_imm:
116100
; CHECK: # %bb.0:
117101
; CHECK-NEXT: xvld $xr0, $a1, 0
118-
; CHECK-NEXT: xvsrli.h $xr1, $xr0, 2
119-
; CHECK-NEXT: xvslli.h $xr0, $xr0, 14
120-
; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
102+
; CHECK-NEXT: xvrotri.h $xr0, $xr0, 2
121103
; CHECK-NEXT: xvst $xr0, $a0, 0
122104
; CHECK-NEXT: ret
123105
%v0 = load <16 x i16>, ptr %src
@@ -133,11 +115,8 @@ define void @rotl_v8i32(ptr %dst, ptr %src, i32 signext %a0) nounwind {
133115
; CHECK: # %bb.0:
134116
; CHECK-NEXT: xvld $xr0, $a1, 0
135117
; CHECK-NEXT: xvreplgr2vr.w $xr1, $a2
136-
; CHECK-NEXT: xvrepli.w $xr2, 32
137-
; CHECK-NEXT: xvsub.w $xr2, $xr2, $xr1
138-
; CHECK-NEXT: xvsll.w $xr1, $xr0, $xr1
139-
; CHECK-NEXT: xvsrl.w $xr0, $xr0, $xr2
140-
; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0
118+
; CHECK-NEXT: xvneg.w $xr1, $xr1
119+
; CHECK-NEXT: xvrotr.w $xr0, $xr0, $xr1
141120
; CHECK-NEXT: xvst $xr0, $a0, 0
142121
; CHECK-NEXT: ret
143122
%v0 = load <8 x i32>, ptr %src
@@ -156,11 +135,7 @@ define void @rotr_v8i32(ptr %dst, ptr %src, i32 signext %a0) nounwind {
156135
; CHECK: # %bb.0:
157136
; CHECK-NEXT: xvld $xr0, $a1, 0
158137
; CHECK-NEXT: xvreplgr2vr.w $xr1, $a2
159-
; CHECK-NEXT: xvrepli.w $xr2, 32
160-
; CHECK-NEXT: xvsub.w $xr2, $xr2, $xr1
161-
; CHECK-NEXT: xvsrl.w $xr1, $xr0, $xr1
162-
; CHECK-NEXT: xvsll.w $xr0, $xr0, $xr2
163-
; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0
138+
; CHECK-NEXT: xvrotr.w $xr0, $xr0, $xr1
164139
; CHECK-NEXT: xvst $xr0, $a0, 0
165140
; CHECK-NEXT: ret
166141
%v0 = load <8 x i32>, ptr %src
@@ -178,9 +153,7 @@ define void @rotr_v8i32_imm(ptr %dst, ptr %src) nounwind {
178153
; CHECK-LABEL: rotr_v8i32_imm:
179154
; CHECK: # %bb.0:
180155
; CHECK-NEXT: xvld $xr0, $a1, 0
181-
; CHECK-NEXT: xvsrli.w $xr1, $xr0, 2
182-
; CHECK-NEXT: xvslli.w $xr0, $xr0, 30
183-
; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
156+
; CHECK-NEXT: xvrotri.w $xr0, $xr0, 2
184157
; CHECK-NEXT: xvst $xr0, $a0, 0
185158
; CHECK-NEXT: ret
186159
%v0 = load <8 x i32>, ptr %src
@@ -196,25 +169,19 @@ define void @rotl_v4i64(ptr %dst, ptr %src, i64 %a0) nounwind {
196169
; LA32: # %bb.0:
197170
; LA32-NEXT: xvld $xr0, $a1, 0
198171
; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0
199-
; LA32-NEXT: vinsgr2vr.w $vr1, $a3, 1
200-
; LA32-NEXT: xvreplve0.d $xr1, $xr1
201-
; LA32-NEXT: xvrepli.d $xr2, 64
202-
; LA32-NEXT: xvsub.d $xr2, $xr2, $xr1
203-
; LA32-NEXT: xvsll.d $xr1, $xr0, $xr1
204-
; LA32-NEXT: xvsrl.d $xr0, $xr0, $xr2
205-
; LA32-NEXT: xvor.v $xr0, $xr1, $xr0
172+
; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 2
173+
; LA32-NEXT: xvpermi.q $xr1, $xr1, 2
174+
; LA32-NEXT: xvneg.d $xr1, $xr1
175+
; LA32-NEXT: xvrotr.d $xr0, $xr0, $xr1
206176
; LA32-NEXT: xvst $xr0, $a0, 0
207177
; LA32-NEXT: ret
208178
;
209179
; LA64-LABEL: rotl_v4i64:
210180
; LA64: # %bb.0:
211181
; LA64-NEXT: xvld $xr0, $a1, 0
212182
; LA64-NEXT: xvreplgr2vr.d $xr1, $a2
213-
; LA64-NEXT: xvrepli.d $xr2, 64
214-
; LA64-NEXT: xvsub.d $xr2, $xr2, $xr1
215-
; LA64-NEXT: xvsll.d $xr1, $xr0, $xr1
216-
; LA64-NEXT: xvsrl.d $xr0, $xr0, $xr2
217-
; LA64-NEXT: xvor.v $xr0, $xr1, $xr0
183+
; LA64-NEXT: xvneg.d $xr1, $xr1
184+
; LA64-NEXT: xvrotr.d $xr0, $xr0, $xr1
218185
; LA64-NEXT: xvst $xr0, $a0, 0
219186
; LA64-NEXT: ret
220187
%v0 = load <4 x i64>, ptr %src
@@ -233,25 +200,17 @@ define void @rotr_v4i64(ptr %dst, ptr %src, i64 %a0) nounwind {
233200
; LA32: # %bb.0:
234201
; LA32-NEXT: xvld $xr0, $a1, 0
235202
; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0
236-
; LA32-NEXT: vinsgr2vr.w $vr1, $a3, 1
237-
; LA32-NEXT: xvreplve0.d $xr1, $xr1
238-
; LA32-NEXT: xvrepli.d $xr2, 64
239-
; LA32-NEXT: xvsub.d $xr2, $xr2, $xr1
240-
; LA32-NEXT: xvsrl.d $xr1, $xr0, $xr1
241-
; LA32-NEXT: xvsll.d $xr0, $xr0, $xr2
242-
; LA32-NEXT: xvor.v $xr0, $xr1, $xr0
203+
; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 2
204+
; LA32-NEXT: xvpermi.q $xr1, $xr1, 2
205+
; LA32-NEXT: xvrotr.d $xr0, $xr0, $xr1
243206
; LA32-NEXT: xvst $xr0, $a0, 0
244207
; LA32-NEXT: ret
245208
;
246209
; LA64-LABEL: rotr_v4i64:
247210
; LA64: # %bb.0:
248211
; LA64-NEXT: xvld $xr0, $a1, 0
249212
; LA64-NEXT: xvreplgr2vr.d $xr1, $a2
250-
; LA64-NEXT: xvrepli.d $xr2, 64
251-
; LA64-NEXT: xvsub.d $xr2, $xr2, $xr1
252-
; LA64-NEXT: xvsrl.d $xr1, $xr0, $xr1
253-
; LA64-NEXT: xvsll.d $xr0, $xr0, $xr2
254-
; LA64-NEXT: xvor.v $xr0, $xr1, $xr0
213+
; LA64-NEXT: xvrotr.d $xr0, $xr0, $xr1
255214
; LA64-NEXT: xvst $xr0, $a0, 0
256215
; LA64-NEXT: ret
257216
%v0 = load <4 x i64>, ptr %src
@@ -266,14 +225,20 @@ define void @rotr_v4i64(ptr %dst, ptr %src, i64 %a0) nounwind {
266225
}
267226

268227
define void @rotr_v4i64_imm(ptr %dst, ptr %src) nounwind {
269-
; CHECK-LABEL: rotr_v4i64_imm:
270-
; CHECK: # %bb.0:
271-
; CHECK-NEXT: xvld $xr0, $a1, 0
272-
; CHECK-NEXT: xvsrli.d $xr1, $xr0, 2
273-
; CHECK-NEXT: xvslli.d $xr0, $xr0, 62
274-
; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
275-
; CHECK-NEXT: xvst $xr0, $a0, 0
276-
; CHECK-NEXT: ret
228+
; LA32-LABEL: rotr_v4i64_imm:
229+
; LA32: # %bb.0:
230+
; LA32-NEXT: xvld $xr0, $a1, 0
231+
; LA32-NEXT: xvrepli.w $xr1, -62
232+
; LA32-NEXT: xvrotr.d $xr0, $xr0, $xr1
233+
; LA32-NEXT: xvst $xr0, $a0, 0
234+
; LA32-NEXT: ret
235+
;
236+
; LA64-LABEL: rotr_v4i64_imm:
237+
; LA64: # %bb.0:
238+
; LA64-NEXT: xvld $xr0, $a1, 0
239+
; LA64-NEXT: xvrotri.d $xr0, $xr0, 2
240+
; LA64-NEXT: xvst $xr0, $a0, 0
241+
; LA64-NEXT: ret
277242
%v0 = load <4 x i64>, ptr %src
278243
%b = lshr <4 x i64> %v0, splat (i64 2)
279244
%c = shl <4 x i64> %v0, splat (i64 62)

0 commit comments

Comments
 (0)