Skip to content

Commit b8375c5

Browse files
authored
Merge branch 'main' into users/rovka/remove-dvgpr-target-features
2 parents 82d978a + 3257dc3 commit b8375c5

File tree

10 files changed

+91
-78
lines changed

10 files changed

+91
-78
lines changed

llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2730,7 +2730,7 @@ bool SIInsertWaitcnts::shouldFlushVmCnt(MachineLoop *ML,
27302730
HasVMemStore = true;
27312731
}
27322732
for (const MachineOperand &Op : MI.all_uses()) {
2733-
if (!TRI->isVectorRegister(*MRI, Op.getReg()))
2733+
if (Op.isDebug() || !TRI->isVectorRegister(*MRI, Op.getReg()))
27342734
continue;
27352735
RegInterval Interval = Brackets.getRegInterval(&MI, MRI, TRI, Op);
27362736
// Vgpr use

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5573,7 +5573,7 @@ static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
55735573
llvm_unreachable("Unknown VFP cmp argument!");
55745574
}
55755575

5576-
/// OptimizeVFPBrcond - With nnan, it's legal to optimize some
5576+
/// OptimizeVFPBrcond - With nnan and without daz, it's legal to optimize some
55775577
/// f32 and even f64 comparisons to integer ones.
55785578
SDValue
55795579
ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
@@ -5729,9 +5729,9 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
57295729
}
57305730

57315731
SDNodeFlags Flags = Op->getFlags();
5732-
if ((getTargetMachine().Options.UnsafeFPMath || Flags.hasNoNaNs()) &&
5733-
(DAG.getDenormalMode(MVT::f32) == DenormalMode::getIEEE() &&
5734-
DAG.getDenormalMode(MVT::f64) == DenormalMode::getIEEE()) &&
5732+
if (Flags.hasNoNaNs() &&
5733+
DAG.getDenormalMode(MVT::f32) == DenormalMode::getIEEE() &&
5734+
DAG.getDenormalMode(MVT::f64) == DenormalMode::getIEEE() &&
57355735
(CC == ISD::SETEQ || CC == ISD::SETOEQ || CC == ISD::SETNE ||
57365736
CC == ISD::SETUNE)) {
57375737
if (SDValue Result = OptimizeVFPBrcond(Op, DAG))

llvm/lib/Target/ARM/ARMSubtarget.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -222,8 +222,7 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
222222
// NEON f32 ops are non-IEEE 754 compliant. Darwin is ok with it by default.
223223
const FeatureBitset &Bits = getFeatureBits();
224224
if ((Bits[ARM::ProcA5] || Bits[ARM::ProcA8]) && // Where this matters
225-
(Options.UnsafeFPMath || isTargetDarwin() ||
226-
DM == DenormalMode::getPreserveSign()))
225+
(isTargetDarwin() || DM == DenormalMode::getPreserveSign()))
227226
HasNEONForFP = true;
228227

229228
if (isRWPI())

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 30 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1603,7 +1603,7 @@ static SDValue lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(const SDLoc &DL,
16031603
/// value is necessary in order to fit the above form.
16041604
static SDValue
16051605
lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
1606-
SDValue V1, SDValue V2, SelectionDAG &DAG,
1606+
SDValue V1, SelectionDAG &DAG,
16071607
const LoongArchSubtarget &Subtarget) {
16081608
int SplatIndex = -1;
16091609
for (const auto &M : Mask) {
@@ -1996,8 +1996,8 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
19961996
SDValue Result;
19971997
// TODO: Add more comparison patterns.
19981998
if (V2.isUndef()) {
1999-
if ((Result = lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, V2, DAG,
2000-
Subtarget)))
1999+
if ((Result =
2000+
lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
20012001
return Result;
20022002
if ((Result =
20032003
lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
@@ -2053,7 +2053,7 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
20532053
/// value is necessary in order to fit the above form.
20542054
static SDValue
20552055
lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
2056-
SDValue V1, SDValue V2, SelectionDAG &DAG,
2056+
SDValue V1, SelectionDAG &DAG,
20572057
const LoongArchSubtarget &Subtarget) {
20582058
int SplatIndex = -1;
20592059
for (const auto &M : Mask) {
@@ -2096,10 +2096,29 @@ lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
20962096
return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget);
20972097
}
20982098

2099+
/// Lower VECTOR_SHUFFLE into XVPERMI (if possible).
2100+
static SDValue
2101+
lowerVECTOR_SHUFFLE_XVPERMI(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
2102+
SDValue V1, SelectionDAG &DAG,
2103+
const LoongArchSubtarget &Subtarget) {
2104+
// Only consider XVPERMI_D.
2105+
if (Mask.size() != 4 || (VT != MVT::v4i64 && VT != MVT::v4f64))
2106+
return SDValue();
2107+
2108+
unsigned MaskImm = 0;
2109+
for (unsigned i = 0; i < Mask.size(); ++i) {
2110+
if (Mask[i] == -1)
2111+
continue;
2112+
MaskImm |= Mask[i] << (i * 2);
2113+
}
2114+
2115+
return DAG.getNode(LoongArchISD::XVPERMI, DL, VT, V1,
2116+
DAG.getConstant(MaskImm, DL, Subtarget.getGRLenVT()));
2117+
}
2118+
20992119
/// Lower VECTOR_SHUFFLE into XVPERM (if possible).
21002120
static SDValue lowerVECTOR_SHUFFLE_XVPERM(const SDLoc &DL, ArrayRef<int> Mask,
2101-
MVT VT, SDValue V1, SDValue V2,
2102-
SelectionDAG &DAG,
2121+
MVT VT, SDValue V1, SelectionDAG &DAG,
21032122
const LoongArchSubtarget &Subtarget) {
21042123
// LoongArch LASX only have XVPERM_W.
21052124
if (Mask.size() != 8 || (VT != MVT::v8i32 && VT != MVT::v8f32))
@@ -2540,14 +2559,16 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
25402559
SDValue Result;
25412560
// TODO: Add more comparison patterns.
25422561
if (V2.isUndef()) {
2543-
if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI(DL, Mask, VT, V1, V2, DAG,
2544-
Subtarget)))
2562+
if ((Result =
2563+
lowerVECTOR_SHUFFLE_XVREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
25452564
return Result;
25462565
if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask, VT, V1, V2, DAG,
25472566
Subtarget)))
25482567
return Result;
25492568
if ((Result =
2550-
lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2569+
lowerVECTOR_SHUFFLE_XVPERMI(DL, Mask, VT, V1, DAG, Subtarget)))
2570+
return Result;
2571+
if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, DAG, Subtarget)))
25512572
return Result;
25522573

25532574
// TODO: This comment may be enabled in the future to better match the

llvm/test/CodeGen/AMDGPU/waitcnt-vmcnt-loop.mir

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefix=GFX9 %s
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s -debugify-and-strip-all-safe | FileCheck -check-prefix=GFX9 %s
23
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefix=GFX10 %s
34
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefix=GFX12 %s
45

llvm/test/CodeGen/ARM/fnegs.ll

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,11 @@
1010
; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - \
1111
; RUN: | FileCheck %s -check-prefix=CORTEXA8
1212

13-
; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math %s -o - \
13+
; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 --denormal-fp-math=preserve-sign %s -o - \
1414
; RUN: | FileCheck %s -check-prefix=CORTEXA8U
1515

1616
; RUN: llc -mtriple=arm-darwin -mcpu=cortex-a8 %s -o - \
17-
; RUN: | FileCheck %s -check-prefix=CORTEXA8U
17+
; RUN: | FileCheck %s -check-prefix=CORTEXA8U-DARWIN
1818

1919
; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a9 %s -o - \
2020
; RUN: | FileCheck %s -check-prefix=CORTEXA9
@@ -41,7 +41,10 @@ entry:
4141
; CORTEXA8: vneg.f32 s{{.*}}, s{{.*}}
4242

4343
; CORTEXA8U-LABEL: test1:
44-
; CORTEXA8U: vneg.f32 d{{.*}}, d{{.*}}
44+
; CORTEXA8U: vsub.f32 d{{.*}}, d{{.*}}, d{{.*}}
45+
46+
; CORTEXA8U-DARWIN-LABEL: test1:
47+
; CORTEXA8U-DARWIN: vneg.f32 d{{.*}}, d{{.*}}
4548

4649
; CORTEXA9-LABEL: test1:
4750
; CORTEXA9: vneg.f32 s{{.*}}, s{{.*}}
@@ -110,9 +113,13 @@ define <2 x float> @fneg_bitcast(i64 %i) {
110113
; CORTEXA8-NOT: vneg.f32
111114

112115
; CORTEXA8U-LABEL: fneg_bitcast:
113-
; CORTEXA8U-DAG: eor r0, r0, #-2147483648
114-
; CORTEXA8U-DAG: eor r1, r1, #-2147483648
115-
; CORTEXA8U-NOT: vneg.f32
116+
; CORTEXA8U-DAG: vmov.i32 d{{.*}}, #0x80000000
117+
; CORTEXA8U-DAG: vsub.f32 d{{.*}}, d{{.*}}, d{{.*}}
118+
119+
; CORTEXA8U-DARWIN-LABEL: fneg_bitcast:
120+
; CORTEXA8U-DARWIN-DAG: eor r0, r0, #-2147483648
121+
; CORTEXA8U-DARWIN-DAG: eor r1, r1, #-2147483648
122+
; CORTEXA8U-DARWIN-NOT: vneg.f32
116123

117124
; CORTEXA9-LABEL: fneg_bitcast:
118125
; CORTEXA9-DAG: eor r0, r0, #-2147483648

llvm/test/CodeGen/ARM/fnmscs.ll

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,11 @@
1313
; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 -regalloc=basic %s -o - \
1414
; RUN: | FileCheck %s -check-prefix=A8
1515

16-
; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math %s -o - \
16+
; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 --denormal-fp-math=preserve-sign %s -o - \
1717
; RUN: | FileCheck %s -check-prefix=A8U
1818

1919
; RUN: llc -mtriple=arm-darwin -mcpu=cortex-a8 %s -o - \
20-
; RUN: | FileCheck %s -check-prefix=A8U
20+
; RUN: | FileCheck %s -check-prefix=A8U-DARWIN
2121

2222
define float @t1(float %acc, float %a, float %b) nounwind {
2323
entry:
@@ -31,15 +31,20 @@ entry:
3131
; NEON: vnmla.f32
3232

3333
; A8U-LABEL: t1:
34-
; A8U: vnmul.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}}
35-
; A8U: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}}
34+
; A8U: vmov.i32 d{{[0-9]+}}, #0x80000000
35+
; A8U: vsub.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
36+
; A8U: vsub.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
37+
38+
; A8U-DARWIN-LABEL: t1:
39+
; A8U-DARWIN: vnmul.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}}
40+
; A8U-DARWIN: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}}
3641

3742
; A8-LABEL: t1:
3843
; A8: vnmul.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}}
3944
; A8: vsub.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}}
4045
%0 = fmul float %a, %b
4146
%1 = fsub float -0.0, %0
42-
%2 = fsub float %1, %acc
47+
%2 = fsub float %1, %acc
4348
ret float %2
4449
}
4550

@@ -55,8 +60,13 @@ entry:
5560
; NEON: vnmla.f32
5661

5762
; A8U-LABEL: t2:
58-
; A8U: vnmul.f32 s{{[01234]}}, s{{[01234]}}, s{{[01234]}}
59-
; A8U: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}}
63+
; A8U: vmov.i32 d{{[0-9]+}}, #0x80000000
64+
; A8U: vsub.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
65+
; A8U: vsub.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
66+
67+
; A8U-DARWIN-LABEL: t2:
68+
; A8U-DARWIN: vnmul.f32 s{{[01234]}}, s{{[01234]}}, s{{[01234]}}
69+
; A8U-DARWIN: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}}
6070

6171
; A8-LABEL: t2:
6272
; A8: vnmul.f32 s{{[01234]}}, s{{[01234]}}, s{{[01234]}}
@@ -79,8 +89,12 @@ entry:
7989
; NEON: vnmla.f64
8090

8191
; A8U-LABEL: t3:
82-
; A8U: vnmul.f64 d
8392
; A8U: vsub.f64 d
93+
; A8U: vsub.f64 d
94+
95+
; A8U-DARWIN-LABEL: t3:
96+
; A8U-DARWIN: vnmul.f64 d
97+
; A8U-DARWIN: vsub.f64 d
8498

8599
; A8-LABEL: t3:
86100
; A8: vnmul.f64 d
@@ -103,8 +117,12 @@ entry:
103117
; NEON: vnmla.f64
104118

105119
; A8U-LABEL: t4:
106-
; A8U: vnmul.f64 d
107120
; A8U: vsub.f64 d
121+
; A8U: vsub.f64 d
122+
123+
; A8U-DARWIN-LABEL: t4:
124+
; A8U-DARWIN: vnmul.f64 d
125+
; A8U-DARWIN: vsub.f64 d
108126

109127
; A8-LABEL: t4:
110128
; A8: vnmul.f64 d

llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,12 @@
77
define <4 x double> @shufflevector_v4f64(<4 x double> %a, <4 x double> %b) {
88
; CHECK-LABEL: shufflevector_v4f64:
99
; CHECK: # %bb.0: # %entry
10-
; CHECK-NEXT: xvpickve.d $xr2, $xr1, 3
11-
; CHECK-NEXT: xvpermi.d $xr3, $xr0, 238
12-
; CHECK-NEXT: xvrepl128vei.d $xr3, $xr3, 1
13-
; CHECK-NEXT: vextrins.d $vr3, $vr2, 16
10+
; CHECK-NEXT: xvpermi.d $xr2, $xr0, 3
11+
; CHECK-NEXT: xvpickve.d $xr3, $xr1, 3
12+
; CHECK-NEXT: vextrins.d $vr2, $vr3, 16
1413
; CHECK-NEXT: xvpickve.d $xr1, $xr1, 2
1514
; CHECK-NEXT: vextrins.d $vr0, $vr1, 16
16-
; CHECK-NEXT: xvpermi.q $xr0, $xr3, 2
15+
; CHECK-NEXT: xvpermi.q $xr0, $xr2, 2
1716
; CHECK-NEXT: ret
1817
entry:
1918
%c = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 6, i32 3, i32 7>

llvm/test/CodeGen/LoongArch/lasx/shuffle-as-permute-and-shuffle.ll

Lines changed: 9 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,8 @@ define <32 x i8> @shuffle_v32i8(<32 x i8> %a) {
66
; CHECK-LABEL: shuffle_v32i8:
77
; CHECK: # %bb.0:
88
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_0)
9-
; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI0_0)
10-
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_1)
11-
; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI0_1)
12-
; CHECK-NEXT: xvpermi.d $xr3, $xr0, 78
13-
; CHECK-NEXT: xvshuf.d $xr2, $xr0, $xr3
9+
; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI0_0)
10+
; CHECK-NEXT: xvpermi.d $xr2, $xr0, 78
1411
; CHECK-NEXT: xvshuf.h $xr1, $xr2, $xr0
1512
; CHECK-NEXT: xvori.b $xr0, $xr1, 0
1613
; CHECK-NEXT: ret
@@ -34,11 +31,8 @@ define <16 x i16> @shuffle_v16i16(<16 x i16> %a) {
3431
; CHECK-LABEL: shuffle_v16i16:
3532
; CHECK: # %bb.0:
3633
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0)
37-
; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI2_0)
38-
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_1)
39-
; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI2_1)
40-
; CHECK-NEXT: xvpermi.d $xr3, $xr0, 78
41-
; CHECK-NEXT: xvshuf.d $xr2, $xr0, $xr3
34+
; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI2_0)
35+
; CHECK-NEXT: xvpermi.d $xr2, $xr0, 78
4236
; CHECK-NEXT: xvshuf.w $xr1, $xr2, $xr0
4337
; CHECK-NEXT: xvori.b $xr0, $xr1, 0
4438
; CHECK-NEXT: ret
@@ -72,10 +66,7 @@ define <8 x i32> @shuffle_v8i32(<8 x i32> %a) {
7266
define <8 x i32> @shuffle_v8i32_same_lane(<8 x i32> %a) {
7367
; CHECK-LABEL: shuffle_v8i32_same_lane:
7468
; CHECK: # %bb.0:
75-
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0)
76-
; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI5_0)
77-
; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr0
78-
; CHECK-NEXT: xvori.b $xr0, $xr1, 0
69+
; CHECK-NEXT: xvpermi.d $xr0, $xr0, 225
7970
; CHECK-NEXT: ret
8071
%shuffle = shufflevector <8 x i32> %a, <8 x i32> poison, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 4, i32 5, i32 6, i32 7>
8172
ret <8 x i32> %shuffle
@@ -84,14 +75,7 @@ define <8 x i32> @shuffle_v8i32_same_lane(<8 x i32> %a) {
8475
define <4 x i64> @shuffle_v4i64(<4 x i64> %a) {
8576
; CHECK-LABEL: shuffle_v4i64:
8677
; CHECK: # %bb.0:
87-
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_0)
88-
; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI6_0)
89-
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_1)
90-
; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI6_1)
91-
; CHECK-NEXT: xvpermi.d $xr3, $xr0, 78
92-
; CHECK-NEXT: xvshuf.d $xr2, $xr0, $xr3
93-
; CHECK-NEXT: xvshuf.d $xr1, $xr2, $xr0
94-
; CHECK-NEXT: xvori.b $xr0, $xr1, 0
78+
; CHECK-NEXT: xvpermi.d $xr0, $xr0, 39
9579
; CHECK-NEXT: ret
9680
%shuffle = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> <i32 3, i32 1, i32 2, i32 0>
9781
ret <4 x i64> %shuffle
@@ -100,10 +84,7 @@ define <4 x i64> @shuffle_v4i64(<4 x i64> %a) {
10084
define <4 x i64> @shuffle_v4i64_same_lane(<4 x i64> %a) {
10185
; CHECK-LABEL: shuffle_v4i64_same_lane:
10286
; CHECK: # %bb.0:
103-
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_0)
104-
; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI7_0)
105-
; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr0
106-
; CHECK-NEXT: xvori.b $xr0, $xr1, 0
87+
; CHECK-NEXT: xvpermi.d $xr0, $xr0, 225
10788
; CHECK-NEXT: ret
10889
%shuffle = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> <i32 1, i32 0, i32 2, i32 3>
10990
ret <4 x i64> %shuffle
@@ -136,14 +117,7 @@ define <8 x float> @shuffle_v8f32_same_lane(<8 x float> %a) {
136117
define <4 x double> @shuffle_v4f64(<4 x double> %a) {
137118
; CHECK-LABEL: shuffle_v4f64:
138119
; CHECK: # %bb.0:
139-
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI10_0)
140-
; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI10_0)
141-
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI10_1)
142-
; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI10_1)
143-
; CHECK-NEXT: xvpermi.d $xr3, $xr0, 78
144-
; CHECK-NEXT: xvshuf.d $xr2, $xr0, $xr3
145-
; CHECK-NEXT: xvshuf.d $xr1, $xr2, $xr0
146-
; CHECK-NEXT: xvori.b $xr0, $xr1, 0
120+
; CHECK-NEXT: xvpermi.d $xr0, $xr0, 39
147121
; CHECK-NEXT: ret
148122
%shuffle = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> <i32 3, i32 1, i32 2, i32 0>
149123
ret <4 x double> %shuffle
@@ -152,11 +126,7 @@ define <4 x double> @shuffle_v4f64(<4 x double> %a) {
152126
define <4 x double> @shuffle_v4f64_same_lane(<4 x double> %a) {
153127
; CHECK-LABEL: shuffle_v4f64_same_lane:
154128
; CHECK: # %bb.0:
155-
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI11_0)
156-
; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI11_0)
157-
; CHECK-NEXT: xvpermi.d $xr0, $xr0, 78
158-
; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr0
159-
; CHECK-NEXT: xvori.b $xr0, $xr1, 0
129+
; CHECK-NEXT: xvpermi.d $xr0, $xr0, 75
160130
; CHECK-NEXT: ret
161131
%shuffle = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 0, i32 1>
162132
ret <4 x double> %shuffle

llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -127,9 +127,7 @@ define <4 x i64> @byte_rotate_v4i64_2(<4 x i64> %a, <4 x i64> %b) nounwind {
127127
define <4 x i64> @byte_rotate_v4i64_3(<4 x i64> %a) nounwind {
128128
; CHECK-LABEL: byte_rotate_v4i64_3:
129129
; CHECK: # %bb.0:
130-
; CHECK-NEXT: xvbsrl.v $xr1, $xr0, 8
131-
; CHECK-NEXT: xvbsll.v $xr0, $xr0, 8
132-
; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
130+
; CHECK-NEXT: xvpermi.d $xr0, $xr0, 177
133131
; CHECK-NEXT: ret
134132
%shuffle = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
135133
ret <4 x i64> %shuffle

0 commit comments

Comments
 (0)