Skip to content

Commit 5a6dfbb

Browse files
committed
[ARM] Teach DemandedVectorElts about VMOVN lanes
The class of instructions that write to narrow top/bottom lanes only demand the even or odd elements of the input lanes. Which means that a pair of VMOVNT; VMOVNB demands no lanes from the original input. This teaches that to instcombine from the target hooks available through ARMTTIImpl. Differential Revision: https://reviews.llvm.org/D109325
1 parent a91cfd1 commit 5a6dfbb

File tree

3 files changed

+57
-16
lines changed

3 files changed

+57
-16
lines changed

llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,48 @@ ARMTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
248248
return None;
249249
}
250250

251+
Optional<Value *> ARMTTIImpl::simplifyDemandedVectorEltsIntrinsic(
252+
InstCombiner &IC, IntrinsicInst &II, APInt OrigDemandedElts,
253+
APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3,
254+
std::function<void(Instruction *, unsigned, APInt, APInt &)>
255+
SimplifyAndSetOp) const {
256+
257+
// Compute the demanded bits for a narrowing MVE intrinsic. The TopOpc is the
258+
// opcode specifying a Top/Bottom instruction, which can change between
259+
// instructions.
260+
auto SimplifyNarrowInstrTopBottom =[&](unsigned TopOpc) {
261+
unsigned NumElts = cast<FixedVectorType>(II.getType())->getNumElements();
262+
unsigned IsTop = cast<ConstantInt>(II.getOperand(TopOpc))->getZExtValue();
263+
264+
// The only odd/even lanes of operand 0 will only be demanded depending
265+
// on whether this is a top/bottom instruction.
266+
APInt DemandedElts =
267+
APInt::getSplat(NumElts, IsTop ? APInt::getLowBitsSet(2, 1)
268+
: APInt::getHighBitsSet(2, 1));
269+
SimplifyAndSetOp(&II, 0, OrigDemandedElts & DemandedElts, UndefElts);
270+
// The other lanes will be defined from the inserted elements.
271+
UndefElts &= APInt::getSplat(NumElts, !IsTop ? APInt::getLowBitsSet(2, 1)
272+
: APInt::getHighBitsSet(2, 1));
273+
return None;
274+
};
275+
276+
switch (II.getIntrinsicID()) {
277+
default:
278+
break;
279+
case Intrinsic::arm_mve_vcvt_narrow:
280+
SimplifyNarrowInstrTopBottom(2);
281+
break;
282+
case Intrinsic::arm_mve_vqmovn:
283+
SimplifyNarrowInstrTopBottom(4);
284+
break;
285+
case Intrinsic::arm_mve_vshrn:
286+
SimplifyNarrowInstrTopBottom(7);
287+
break;
288+
}
289+
290+
return None;
291+
}
292+
251293
InstructionCost ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
252294
TTI::TargetCostKind CostKind) {
253295
assert(Ty->isIntegerTy());

llvm/lib/Target/ARM/ARMTargetTransformInfo.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,11 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
120120

121121
Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
122122
IntrinsicInst &II) const;
123+
Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
124+
InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
125+
APInt &UndefElts2, APInt &UndefElts3,
126+
std::function<void(Instruction *, unsigned, APInt, APInt &)>
127+
SimplifyAndSetOp) const;
123128

124129
/// \name Scalar TTI Implementations
125130
/// @{

llvm/test/Transforms/InstCombine/ARM/mve-narrow.ll

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
77

88
define <8 x i16> @test_shrn_v8i16_t1(<8 x i16> %a, <8 x i16> %b, <4 x i32> %c, <4 x i32> %d) {
99
; CHECK-LABEL: @test_shrn_v8i16_t1(
10-
; CHECK-NEXT: [[X:%.*]] = add <8 x i16> [[A:%.*]], <i16 1, i16 -1, i16 1, i16 -1, i16 1, i16 -1, i16 1, i16 -1>
10+
; CHECK-NEXT: [[X:%.*]] = add <8 x i16> [[A:%.*]], <i16 1, i16 poison, i16 1, i16 poison, i16 1, i16 poison, i16 1, i16 poison>
1111
; CHECK-NEXT: [[Z:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> [[X]], <4 x i32> [[D:%.*]], i32 16, i32 0, i32 0, i32 0, i32 0, i32 1)
1212
; CHECK-NEXT: ret <8 x i16> [[Z]]
1313
;
@@ -18,7 +18,7 @@ define <8 x i16> @test_shrn_v8i16_t1(<8 x i16> %a, <8 x i16> %b, <4 x i32> %c, <
1818

1919
define <8 x i16> @test_shrn_v8i16_t2(<8 x i16> %a, <8 x i16> %b, <4 x i32> %c, <4 x i32> %d) {
2020
; CHECK-LABEL: @test_shrn_v8i16_t2(
21-
; CHECK-NEXT: [[X:%.*]] = add <8 x i16> [[A:%.*]], <i16 -1, i16 1, i16 -1, i16 1, i16 -1, i16 1, i16 -1, i16 1>
21+
; CHECK-NEXT: [[X:%.*]] = add <8 x i16> [[A:%.*]], <i16 -1, i16 poison, i16 -1, i16 poison, i16 -1, i16 poison, i16 -1, i16 poison>
2222
; CHECK-NEXT: [[Z:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> [[X]], <4 x i32> [[D:%.*]], i32 16, i32 0, i32 0, i32 0, i32 0, i32 1)
2323
; CHECK-NEXT: ret <8 x i16> [[Z]]
2424
;
@@ -29,7 +29,7 @@ define <8 x i16> @test_shrn_v8i16_t2(<8 x i16> %a, <8 x i16> %b, <4 x i32> %c, <
2929

3030
define <8 x i16> @test_shrn_v8i16_b1(<8 x i16> %a, <8 x i16> %b, <4 x i32> %c, <4 x i32> %d) {
3131
; CHECK-LABEL: @test_shrn_v8i16_b1(
32-
; CHECK-NEXT: [[X:%.*]] = add <8 x i16> [[A:%.*]], <i16 1, i16 -1, i16 1, i16 -1, i16 1, i16 -1, i16 1, i16 -1>
32+
; CHECK-NEXT: [[X:%.*]] = add <8 x i16> [[A:%.*]], <i16 poison, i16 -1, i16 poison, i16 -1, i16 poison, i16 -1, i16 poison, i16 -1>
3333
; CHECK-NEXT: [[Z:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> [[X]], <4 x i32> [[D:%.*]], i32 16, i32 0, i32 0, i32 0, i32 0, i32 0)
3434
; CHECK-NEXT: ret <8 x i16> [[Z]]
3535
;
@@ -40,7 +40,7 @@ define <8 x i16> @test_shrn_v8i16_b1(<8 x i16> %a, <8 x i16> %b, <4 x i32> %c, <
4040

4141
define <8 x i16> @test_shrn_v8i16_b2(<8 x i16> %a, <8 x i16> %b, <4 x i32> %c, <4 x i32> %d) {
4242
; CHECK-LABEL: @test_shrn_v8i16_b2(
43-
; CHECK-NEXT: [[X:%.*]] = add <8 x i16> [[A:%.*]], <i16 -1, i16 1, i16 -1, i16 1, i16 -1, i16 1, i16 -1, i16 1>
43+
; CHECK-NEXT: [[X:%.*]] = add <8 x i16> [[A:%.*]], <i16 poison, i16 1, i16 poison, i16 1, i16 poison, i16 1, i16 poison, i16 1>
4444
; CHECK-NEXT: [[Z:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> [[X]], <4 x i32> [[D:%.*]], i32 16, i32 0, i32 0, i32 0, i32 0, i32 0)
4545
; CHECK-NEXT: ret <8 x i16> [[Z]]
4646
;
@@ -51,8 +51,7 @@ define <8 x i16> @test_shrn_v8i16_b2(<8 x i16> %a, <8 x i16> %b, <4 x i32> %c, <
5151

5252
define <8 x i16> @test_shrn_v8i16_bt(<8 x i16> %a, <8 x i16> %b, <4 x i32> %c, <4 x i32> %d) {
5353
; CHECK-LABEL: @test_shrn_v8i16_bt(
54-
; CHECK-NEXT: [[X:%.*]] = add <8 x i16> [[A:%.*]], [[B:%.*]]
55-
; CHECK-NEXT: [[Y:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> [[X]], <4 x i32> [[C:%.*]], i32 16, i32 0, i32 0, i32 0, i32 0, i32 0)
54+
; CHECK-NEXT: [[Y:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> poison, <4 x i32> [[C:%.*]], i32 16, i32 0, i32 0, i32 0, i32 0, i32 0)
5655
; CHECK-NEXT: [[Z:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> [[Y]], <4 x i32> [[D:%.*]], i32 16, i32 0, i32 0, i32 0, i32 0, i32 1)
5756
; CHECK-NEXT: ret <8 x i16> [[Z]]
5857
;
@@ -64,8 +63,7 @@ define <8 x i16> @test_shrn_v8i16_bt(<8 x i16> %a, <8 x i16> %b, <4 x i32> %c, <
6463

6564
define <8 x i16> @test_shrn_v8i16_tb(<8 x i16> %a, <8 x i16> %b, <4 x i32> %c, <4 x i32> %d) {
6665
; CHECK-LABEL: @test_shrn_v8i16_tb(
67-
; CHECK-NEXT: [[X:%.*]] = add <8 x i16> [[A:%.*]], [[B:%.*]]
68-
; CHECK-NEXT: [[Y:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> [[X]], <4 x i32> [[C:%.*]], i32 16, i32 0, i32 0, i32 0, i32 0, i32 1)
66+
; CHECK-NEXT: [[Y:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> poison, <4 x i32> [[C:%.*]], i32 16, i32 0, i32 0, i32 0, i32 0, i32 1)
6967
; CHECK-NEXT: [[Z:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> [[Y]], <4 x i32> [[D:%.*]], i32 16, i32 0, i32 0, i32 0, i32 0, i32 0)
7068
; CHECK-NEXT: ret <8 x i16> [[Z]]
7169
;
@@ -105,8 +103,7 @@ define <8 x i16> @test_shrn_v8i16_tt(<8 x i16> %a, <8 x i16> %b, <4 x i32> %c, <
105103

106104
define <16 x i8> @test_shrn_v16i8_bt(<16 x i8> %a, <16 x i8> %b, <8 x i16> %c, <8 x i16> %d) {
107105
; CHECK-LABEL: @test_shrn_v16i8_bt(
108-
; CHECK-NEXT: [[X:%.*]] = add <16 x i8> [[A:%.*]], [[B:%.*]]
109-
; CHECK-NEXT: [[Y:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> [[X]], <8 x i16> [[C:%.*]], i32 16, i32 0, i32 0, i32 0, i32 0, i32 0)
106+
; CHECK-NEXT: [[Y:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> poison, <8 x i16> [[C:%.*]], i32 16, i32 0, i32 0, i32 0, i32 0, i32 0)
110107
; CHECK-NEXT: [[Z:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> [[Y]], <8 x i16> [[D:%.*]], i32 16, i32 0, i32 0, i32 0, i32 0, i32 1)
111108
; CHECK-NEXT: ret <16 x i8> [[Z]]
112109
;
@@ -171,8 +168,7 @@ define <16 x i8> @test_movnp_v16i8_bt(<16 x i8> %a, <16 x i8> %b, <8 x i16> %c,
171168

172169
define <8 x i16> @test_qmovn_v8i16_bt(<8 x i16> %a, <8 x i16> %b, <4 x i32> %c, <4 x i32> %d) {
173170
; CHECK-LABEL: @test_qmovn_v8i16_bt(
174-
; CHECK-NEXT: [[X:%.*]] = add <8 x i16> [[A:%.*]], [[B:%.*]]
175-
; CHECK-NEXT: [[Y:%.*]] = call <8 x i16> @llvm.arm.mve.vqmovn.v8i16.v4i32(<8 x i16> [[X]], <4 x i32> [[C:%.*]], i32 0, i32 0, i32 0)
171+
; CHECK-NEXT: [[Y:%.*]] = call <8 x i16> @llvm.arm.mve.vqmovn.v8i16.v4i32(<8 x i16> poison, <4 x i32> [[C:%.*]], i32 0, i32 0, i32 0)
176172
; CHECK-NEXT: [[Z:%.*]] = call <8 x i16> @llvm.arm.mve.vqmovn.v8i16.v4i32(<8 x i16> [[Y]], <4 x i32> [[D:%.*]], i32 0, i32 0, i32 1)
177173
; CHECK-NEXT: ret <8 x i16> [[Z]]
178174
;
@@ -184,8 +180,7 @@ define <8 x i16> @test_qmovn_v8i16_bt(<8 x i16> %a, <8 x i16> %b, <4 x i32> %c,
184180

185181
define <16 x i8> @test_qmovn_v16i8_bt(<16 x i8> %a, <16 x i8> %b, <8 x i16> %c, <8 x i16> %d) {
186182
; CHECK-LABEL: @test_qmovn_v16i8_bt(
187-
; CHECK-NEXT: [[X:%.*]] = add <16 x i8> [[A:%.*]], [[B:%.*]]
188-
; CHECK-NEXT: [[Y:%.*]] = call <16 x i8> @llvm.arm.mve.vqmovn.v16i8.v8i16(<16 x i8> [[X]], <8 x i16> [[C:%.*]], i32 0, i32 0, i32 0)
183+
; CHECK-NEXT: [[Y:%.*]] = call <16 x i8> @llvm.arm.mve.vqmovn.v16i8.v8i16(<16 x i8> poison, <8 x i16> [[C:%.*]], i32 0, i32 0, i32 0)
189184
; CHECK-NEXT: [[Z:%.*]] = call <16 x i8> @llvm.arm.mve.vqmovn.v16i8.v8i16(<16 x i8> [[Y]], <8 x i16> [[D:%.*]], i32 0, i32 0, i32 1)
190185
; CHECK-NEXT: ret <16 x i8> [[Z]]
191186
;
@@ -223,8 +218,7 @@ define <16 x i8> @test_qmovnp_v16i8_bt(<16 x i8> %a, <16 x i8> %b, <8 x i16> %c,
223218

224219
define <8 x half> @test_cvtn_v8i16_bt(<8 x half> %a, <8 x half> %b, <4 x float> %c, <4 x float> %d) {
225220
; CHECK-LABEL: @test_cvtn_v8i16_bt(
226-
; CHECK-NEXT: [[X:%.*]] = fadd <8 x half> [[A:%.*]], [[B:%.*]]
227-
; CHECK-NEXT: [[Y:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.narrow(<8 x half> [[X]], <4 x float> [[C:%.*]], i32 0)
221+
; CHECK-NEXT: [[Y:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.narrow(<8 x half> poison, <4 x float> [[C:%.*]], i32 0)
228222
; CHECK-NEXT: [[Z:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.narrow(<8 x half> [[Y]], <4 x float> [[D:%.*]], i32 1)
229223
; CHECK-NEXT: ret <8 x half> [[Z]]
230224
;

0 commit comments

Comments
 (0)