Skip to content

Commit d9d71bd

Browse files
authored
[AArch64] Move BSL generation to lowering. (#151855)
It is generally better to allow the target independent combines before creating AArch64 specific nodes (providing they don't mess it up). This moves the generation of BSL nodes to lowering, not a combine, so that intermediate nodes are more likely to be optimized. There is a small change in the constant handling to detect legalized buildvector arguments correctly. Fixes #149380 but not directly. #151856 contained a direct fix for expanding the pseudos.
1 parent 17a98f8 commit d9d71bd

File tree

5 files changed

+100
-121
lines changed

5 files changed

+100
-121
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 9 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1121,7 +1121,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
11211121
setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
11221122
setOperationAction(ISD::UBSANTRAP, MVT::Other, Legal);
11231123

1124-
// We combine OR nodes for bitfield operations.
1124+
// We combine OR nodes for ccmp operations.
11251125
setTargetDAGCombine(ISD::OR);
11261126
// Try to create BICs for vector ANDs.
11271127
setTargetDAGCombine(ISD::AND);
@@ -14799,23 +14799,15 @@ static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) {
1479914799
return ResultSLI;
1480014800
}
1480114801

14802-
static SDValue tryCombineToBSL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
14803-
const AArch64TargetLowering &TLI) {
14802+
static SDValue tryLowerToBSL(SDValue N, SelectionDAG &DAG) {
1480414803
EVT VT = N->getValueType(0);
14805-
SelectionDAG &DAG = DCI.DAG;
14804+
assert(VT.isVector() && "Expected vector type in tryLowerToBSL\n");
1480614805
SDLoc DL(N);
1480714806
const auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>();
1480814807

14809-
if (!VT.isVector())
14810-
return SDValue();
14811-
1481214808
if (VT.isScalableVector() && !Subtarget.hasSVE2())
1481314809
return SDValue();
1481414810

14815-
if (VT.isFixedLengthVector() &&
14816-
(!Subtarget.isNeonAvailable() || TLI.useSVEForFixedLengthVectorVT(VT)))
14817-
return SDValue();
14818-
1481914811
SDValue N0 = N->getOperand(0);
1482014812
if (N0.getOpcode() != ISD::AND)
1482114813
return SDValue();
@@ -14865,14 +14857,13 @@ static SDValue tryCombineToBSL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
1486514857
// We only have to look for constant vectors here since the general, variable
1486614858
// case can be handled in TableGen.
1486714859
unsigned Bits = VT.getScalarSizeInBits();
14868-
uint64_t BitMask = Bits == 64 ? -1ULL : ((1ULL << Bits) - 1);
1486914860
for (int i = 1; i >= 0; --i)
1487014861
for (int j = 1; j >= 0; --j) {
1487114862
APInt Val1, Val2;
1487214863

1487314864
if (ISD::isConstantSplatVector(N0->getOperand(i).getNode(), Val1) &&
1487414865
ISD::isConstantSplatVector(N1->getOperand(j).getNode(), Val2) &&
14875-
(BitMask & ~Val1.getZExtValue()) == Val2.getZExtValue()) {
14866+
~Val1.trunc(Bits) == Val2.trunc(Bits)) {
1487614867
return DAG.getNode(AArch64ISD::BSP, DL, VT, N0->getOperand(i),
1487714868
N0->getOperand(1 - i), N1->getOperand(1 - j));
1487814869
}
@@ -14886,7 +14877,8 @@ static SDValue tryCombineToBSL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
1488614877
ConstantSDNode *CN0 = dyn_cast<ConstantSDNode>(BVN0->getOperand(k));
1488714878
ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(BVN1->getOperand(k));
1488814879
if (!CN0 || !CN1 ||
14889-
CN0->getZExtValue() != (BitMask & ~CN1->getZExtValue())) {
14880+
CN0->getAPIntValue().trunc(Bits) !=
14881+
~CN1->getAsAPIntVal().trunc(Bits)) {
1489014882
FoundMatch = false;
1489114883
break;
1489214884
}
@@ -14905,6 +14897,9 @@ SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
1490514897
!Subtarget->isNeonAvailable()))
1490614898
return LowerToScalableOp(Op, DAG);
1490714899

14900+
if (SDValue Res = tryLowerToBSL(Op, DAG))
14901+
return Res;
14902+
1490814903
// Attempt to form a vector S[LR]I from (or (and X, C1), (lsl Y, C2))
1490914904
if (SDValue Res = tryLowerToSLI(Op.getNode(), DAG))
1491014905
return Res;
@@ -19658,17 +19653,10 @@ static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
1965819653
const AArch64Subtarget *Subtarget,
1965919654
const AArch64TargetLowering &TLI) {
1966019655
SelectionDAG &DAG = DCI.DAG;
19661-
EVT VT = N->getValueType(0);
1966219656

1966319657
if (SDValue R = performANDORCSELCombine(N, DAG))
1966419658
return R;
1966519659

19666-
if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
19667-
return SDValue();
19668-
19669-
if (SDValue Res = tryCombineToBSL(N, DCI, TLI))
19670-
return Res;
19671-
1967219660
return SDValue();
1967319661
}
1967419662

llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -155,16 +155,12 @@ define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) {
155155
; SDAG: // %bb.0:
156156
; SDAG-NEXT: movi v1.16b, #171
157157
; SDAG-NEXT: adrp x8, .LCPI4_0
158-
; SDAG-NEXT: adrp x9, .LCPI4_1
159-
; SDAG-NEXT: ldr q3, [x9, :lo12:.LCPI4_1]
160158
; SDAG-NEXT: umull2 v2.8h, v0.16b, v1.16b
161159
; SDAG-NEXT: umull v1.8h, v0.8b, v1.8b
162-
; SDAG-NEXT: and v0.16b, v0.16b, v3.16b
163160
; SDAG-NEXT: uzp2 v1.16b, v1.16b, v2.16b
164161
; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI4_0]
165162
; SDAG-NEXT: ushr v1.16b, v1.16b, #7
166-
; SDAG-NEXT: and v1.16b, v1.16b, v2.16b
167-
; SDAG-NEXT: orr v0.16b, v0.16b, v1.16b
163+
; SDAG-NEXT: bif v0.16b, v1.16b, v2.16b
168164
; SDAG-NEXT: ret
169165
;
170166
; GISEL-LABEL: combine_vec_udiv_nonuniform4:
@@ -192,7 +188,6 @@ define <8 x i16> @pr38477(<8 x i16> %a0) {
192188
; SDAG-LABEL: pr38477:
193189
; SDAG: // %bb.0:
194190
; SDAG-NEXT: adrp x8, .LCPI5_0
195-
; SDAG-NEXT: adrp x9, .LCPI5_4
196191
; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI5_0]
197192
; SDAG-NEXT: adrp x8, .LCPI5_1
198193
; SDAG-NEXT: ldr q3, [x8, :lo12:.LCPI5_1]
@@ -203,16 +198,13 @@ define <8 x i16> @pr38477(<8 x i16> %a0) {
203198
; SDAG-NEXT: sub v2.8h, v0.8h, v1.8h
204199
; SDAG-NEXT: umull2 v4.4s, v2.8h, v3.8h
205200
; SDAG-NEXT: umull v2.4s, v2.4h, v3.4h
206-
; SDAG-NEXT: ldr q3, [x9, :lo12:.LCPI5_4]
207-
; SDAG-NEXT: and v0.16b, v0.16b, v3.16b
208201
; SDAG-NEXT: uzp2 v2.8h, v2.8h, v4.8h
209202
; SDAG-NEXT: add v1.8h, v2.8h, v1.8h
210203
; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI5_2]
211204
; SDAG-NEXT: adrp x8, .LCPI5_3
212205
; SDAG-NEXT: ushl v1.8h, v1.8h, v2.8h
213206
; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI5_3]
214-
; SDAG-NEXT: and v1.16b, v1.16b, v2.16b
215-
; SDAG-NEXT: orr v0.16b, v0.16b, v1.16b
207+
; SDAG-NEXT: bif v0.16b, v1.16b, v2.16b
216208
; SDAG-NEXT: ret
217209
;
218210
; GISEL-LABEL: pr38477:

llvm/test/CodeGen/AArch64/combine-sdiv.ll

Lines changed: 34 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -230,14 +230,11 @@ define <16 x i8> @combine_vec_sdiv_by_pow2b_v16i8(<16 x i8> %x) {
230230
; CHECK-SD-NEXT: movi v3.2d, #0x000000000000ff
231231
; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI14_0]
232232
; CHECK-SD-NEXT: adrp x8, .LCPI14_1
233-
; CHECK-SD-NEXT: movi v4.2d, #0xffffffffffffff00
234233
; CHECK-SD-NEXT: ushl v1.16b, v1.16b, v2.16b
235234
; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI14_1]
236235
; CHECK-SD-NEXT: add v1.16b, v0.16b, v1.16b
237-
; CHECK-SD-NEXT: and v0.16b, v0.16b, v3.16b
238236
; CHECK-SD-NEXT: sshl v1.16b, v1.16b, v2.16b
239-
; CHECK-SD-NEXT: and v1.16b, v1.16b, v4.16b
240-
; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b
237+
; CHECK-SD-NEXT: bif v0.16b, v1.16b, v3.16b
241238
; CHECK-SD-NEXT: ret
242239
;
243240
; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2b_v16i8:
@@ -265,21 +262,17 @@ define <16 x i8> @combine_vec_sdiv_by_pow2b_v16i8(<16 x i8> %x) {
265262
define <8 x i16> @combine_vec_sdiv_by_pow2b_v8i16(<8 x i16> %x) {
266263
; CHECK-SD-LABEL: combine_vec_sdiv_by_pow2b_v8i16:
267264
; CHECK-SD: // %bb.0:
268-
; CHECK-SD-NEXT: adrp x8, .LCPI15_1
265+
; CHECK-SD-NEXT: adrp x8, .LCPI15_0
269266
; CHECK-SD-NEXT: cmlt v1.8h, v0.8h, #0
270-
; CHECK-SD-NEXT: adrp x9, .LCPI15_3
267+
; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI15_0]
268+
; CHECK-SD-NEXT: adrp x8, .LCPI15_1
269+
; CHECK-SD-NEXT: ushl v1.8h, v1.8h, v2.8h
271270
; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI15_1]
272271
; CHECK-SD-NEXT: adrp x8, .LCPI15_2
273-
; CHECK-SD-NEXT: ldr q3, [x9, :lo12:.LCPI15_3]
274-
; CHECK-SD-NEXT: ushl v1.8h, v1.8h, v2.8h
275-
; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI15_2]
276-
; CHECK-SD-NEXT: adrp x8, .LCPI15_0
277272
; CHECK-SD-NEXT: add v1.8h, v0.8h, v1.8h
278273
; CHECK-SD-NEXT: sshl v1.8h, v1.8h, v2.8h
279-
; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI15_0]
280-
; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b
281-
; CHECK-SD-NEXT: and v1.16b, v1.16b, v3.16b
282-
; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b
274+
; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI15_2]
275+
; CHECK-SD-NEXT: bif v0.16b, v1.16b, v2.16b
283276
; CHECK-SD-NEXT: ret
284277
;
285278
; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2b_v8i16:
@@ -308,28 +301,22 @@ define <8 x i16> @combine_vec_sdiv_by_pow2b_v8i16(<8 x i16> %x) {
308301
define <16 x i16> @combine_vec_sdiv_by_pow2b_v16i16(<16 x i16> %x) {
309302
; CHECK-SD-LABEL: combine_vec_sdiv_by_pow2b_v16i16:
310303
; CHECK-SD: // %bb.0:
311-
; CHECK-SD-NEXT: adrp x8, .LCPI16_1
304+
; CHECK-SD-NEXT: adrp x8, .LCPI16_0
312305
; CHECK-SD-NEXT: cmlt v2.8h, v0.8h, #0
313306
; CHECK-SD-NEXT: cmlt v3.8h, v1.8h, #0
314-
; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI16_1]
315-
; CHECK-SD-NEXT: adrp x8, .LCPI16_2
307+
; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI16_0]
308+
; CHECK-SD-NEXT: adrp x8, .LCPI16_1
316309
; CHECK-SD-NEXT: ushl v2.8h, v2.8h, v4.8h
317310
; CHECK-SD-NEXT: ushl v3.8h, v3.8h, v4.8h
318-
; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI16_2]
319-
; CHECK-SD-NEXT: adrp x8, .LCPI16_0
320-
; CHECK-SD-NEXT: ldr q5, [x8, :lo12:.LCPI16_0]
321-
; CHECK-SD-NEXT: adrp x8, .LCPI16_3
311+
; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI16_1]
312+
; CHECK-SD-NEXT: adrp x8, .LCPI16_2
322313
; CHECK-SD-NEXT: add v2.8h, v0.8h, v2.8h
323314
; CHECK-SD-NEXT: add v3.8h, v1.8h, v3.8h
324-
; CHECK-SD-NEXT: and v0.16b, v0.16b, v5.16b
325-
; CHECK-SD-NEXT: and v1.16b, v1.16b, v5.16b
326315
; CHECK-SD-NEXT: sshl v2.8h, v2.8h, v4.8h
327316
; CHECK-SD-NEXT: sshl v3.8h, v3.8h, v4.8h
328-
; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI16_3]
329-
; CHECK-SD-NEXT: and v2.16b, v2.16b, v4.16b
330-
; CHECK-SD-NEXT: and v3.16b, v3.16b, v4.16b
331-
; CHECK-SD-NEXT: orr v0.16b, v0.16b, v2.16b
332-
; CHECK-SD-NEXT: orr v1.16b, v1.16b, v3.16b
317+
; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI16_2]
318+
; CHECK-SD-NEXT: bif v0.16b, v2.16b, v4.16b
319+
; CHECK-SD-NEXT: bif v1.16b, v3.16b, v4.16b
333320
; CHECK-SD-NEXT: ret
334321
;
335322
; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2b_v16i16:
@@ -363,42 +350,32 @@ define <16 x i16> @combine_vec_sdiv_by_pow2b_v16i16(<16 x i16> %x) {
363350
define <32 x i16> @combine_vec_sdiv_by_pow2b_v32i16(<32 x i16> %x) {
364351
; CHECK-SD-LABEL: combine_vec_sdiv_by_pow2b_v32i16:
365352
; CHECK-SD: // %bb.0:
366-
; CHECK-SD-NEXT: adrp x8, .LCPI17_1
353+
; CHECK-SD-NEXT: adrp x8, .LCPI17_0
367354
; CHECK-SD-NEXT: cmlt v4.8h, v0.8h, #0
368355
; CHECK-SD-NEXT: cmlt v5.8h, v1.8h, #0
369356
; CHECK-SD-NEXT: cmlt v7.8h, v2.8h, #0
370357
; CHECK-SD-NEXT: cmlt v16.8h, v3.8h, #0
371-
; CHECK-SD-NEXT: ldr q6, [x8, :lo12:.LCPI17_1]
372-
; CHECK-SD-NEXT: adrp x8, .LCPI17_2
358+
; CHECK-SD-NEXT: ldr q6, [x8, :lo12:.LCPI17_0]
359+
; CHECK-SD-NEXT: adrp x8, .LCPI17_1
373360
; CHECK-SD-NEXT: ushl v4.8h, v4.8h, v6.8h
374361
; CHECK-SD-NEXT: ushl v5.8h, v5.8h, v6.8h
375362
; CHECK-SD-NEXT: ushl v7.8h, v7.8h, v6.8h
376363
; CHECK-SD-NEXT: ushl v6.8h, v16.8h, v6.8h
377-
; CHECK-SD-NEXT: ldr q16, [x8, :lo12:.LCPI17_2]
378-
; CHECK-SD-NEXT: adrp x8, .LCPI17_0
364+
; CHECK-SD-NEXT: ldr q16, [x8, :lo12:.LCPI17_1]
365+
; CHECK-SD-NEXT: adrp x8, .LCPI17_2
379366
; CHECK-SD-NEXT: add v4.8h, v0.8h, v4.8h
380367
; CHECK-SD-NEXT: add v5.8h, v1.8h, v5.8h
381-
; CHECK-SD-NEXT: ldr q17, [x8, :lo12:.LCPI17_0]
382368
; CHECK-SD-NEXT: add v7.8h, v2.8h, v7.8h
383369
; CHECK-SD-NEXT: add v6.8h, v3.8h, v6.8h
384-
; CHECK-SD-NEXT: adrp x8, .LCPI17_3
385-
; CHECK-SD-NEXT: and v0.16b, v0.16b, v17.16b
386-
; CHECK-SD-NEXT: and v1.16b, v1.16b, v17.16b
387-
; CHECK-SD-NEXT: and v2.16b, v2.16b, v17.16b
388370
; CHECK-SD-NEXT: sshl v4.8h, v4.8h, v16.8h
389371
; CHECK-SD-NEXT: sshl v5.8h, v5.8h, v16.8h
390-
; CHECK-SD-NEXT: and v3.16b, v3.16b, v17.16b
391372
; CHECK-SD-NEXT: sshl v7.8h, v7.8h, v16.8h
392373
; CHECK-SD-NEXT: sshl v6.8h, v6.8h, v16.8h
393-
; CHECK-SD-NEXT: ldr q16, [x8, :lo12:.LCPI17_3]
394-
; CHECK-SD-NEXT: and v4.16b, v4.16b, v16.16b
395-
; CHECK-SD-NEXT: and v5.16b, v5.16b, v16.16b
396-
; CHECK-SD-NEXT: and v7.16b, v7.16b, v16.16b
397-
; CHECK-SD-NEXT: and v6.16b, v6.16b, v16.16b
398-
; CHECK-SD-NEXT: orr v0.16b, v0.16b, v4.16b
399-
; CHECK-SD-NEXT: orr v1.16b, v1.16b, v5.16b
400-
; CHECK-SD-NEXT: orr v2.16b, v2.16b, v7.16b
401-
; CHECK-SD-NEXT: orr v3.16b, v3.16b, v6.16b
374+
; CHECK-SD-NEXT: ldr q16, [x8, :lo12:.LCPI17_2]
375+
; CHECK-SD-NEXT: bif v0.16b, v4.16b, v16.16b
376+
; CHECK-SD-NEXT: bif v1.16b, v5.16b, v16.16b
377+
; CHECK-SD-NEXT: bif v2.16b, v7.16b, v16.16b
378+
; CHECK-SD-NEXT: bif v3.16b, v6.16b, v16.16b
402379
; CHECK-SD-NEXT: ret
403380
;
404381
; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2b_v32i16:
@@ -904,29 +881,21 @@ define <16 x i8> @non_splat_minus_one_divisor_0(<16 x i8> %A) {
904881
define <16 x i8> @non_splat_minus_one_divisor_1(<16 x i8> %A) {
905882
; CHECK-SD-LABEL: non_splat_minus_one_divisor_1:
906883
; CHECK-SD: // %bb.0:
907-
; CHECK-SD-NEXT: adrp x8, .LCPI26_1
884+
; CHECK-SD-NEXT: adrp x8, .LCPI26_0
908885
; CHECK-SD-NEXT: cmlt v1.16b, v0.16b, #0
909-
; CHECK-SD-NEXT: adrp x9, .LCPI26_3
886+
; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI26_0]
887+
; CHECK-SD-NEXT: adrp x8, .LCPI26_1
888+
; CHECK-SD-NEXT: ushl v1.16b, v1.16b, v2.16b
910889
; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI26_1]
911890
; CHECK-SD-NEXT: adrp x8, .LCPI26_2
912-
; CHECK-SD-NEXT: ldr q3, [x9, :lo12:.LCPI26_3]
913-
; CHECK-SD-NEXT: adrp x9, .LCPI26_5
914-
; CHECK-SD-NEXT: ushl v1.16b, v1.16b, v2.16b
915-
; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI26_2]
916-
; CHECK-SD-NEXT: adrp x8, .LCPI26_0
917891
; CHECK-SD-NEXT: add v1.16b, v0.16b, v1.16b
918892
; CHECK-SD-NEXT: sshl v1.16b, v1.16b, v2.16b
919-
; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI26_0]
920-
; CHECK-SD-NEXT: adrp x8, .LCPI26_4
921-
; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b
922-
; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI26_4]
923-
; CHECK-SD-NEXT: and v1.16b, v1.16b, v3.16b
924-
; CHECK-SD-NEXT: ldr q3, [x9, :lo12:.LCPI26_5]
925-
; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b
893+
; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI26_2]
894+
; CHECK-SD-NEXT: adrp x8, .LCPI26_3
895+
; CHECK-SD-NEXT: bif v0.16b, v1.16b, v2.16b
896+
; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI26_3]
926897
; CHECK-SD-NEXT: neg v1.16b, v0.16b
927-
; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b
928-
; CHECK-SD-NEXT: and v1.16b, v1.16b, v3.16b
929-
; CHECK-SD-NEXT: orr v0.16b, v1.16b, v0.16b
898+
; CHECK-SD-NEXT: bit v0.16b, v1.16b, v2.16b
930899
; CHECK-SD-NEXT: ret
931900
;
932901
; CHECK-GI-LABEL: non_splat_minus_one_divisor_1:

0 commit comments

Comments
 (0)