Skip to content

Commit 0a29289

Browse files
committed
Support other shift amounts.
1 parent 39752d3 commit 0a29289

File tree

3 files changed

+68
-69
lines changed

3 files changed

+68
-69
lines changed

llvm/lib/Target/AArch64/AArch64ExpandImm.cpp

Lines changed: 39 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -239,15 +239,18 @@ static bool trySequenceOfOnes(uint64_t UImm,
239239
return true;
240240
}
241241

242-
// Attempt to expand 64-bit immediate values whose negated upper half match
243-
// the lower half (for example, 0x1234'5678'edcb'a987).
244-
// Immediates of this form can generally be expanded via a sequence of
245-
// MOVN+MOVK to expand the lower half, followed by an EOR to shift and negate
246-
// the result to the upper half, e.g.:
242+
// Attempt to expand 64-bit immediate values that consist of shifted negated
243+
// components such as 0x1234'5678'edcb'a987, where the upper half is the
244+
// negation of the lower half. Immediates of this form can generally be
245+
// expanded via a sequence of MOVN+MOVK to expand the lower half, followed by
246+
// an EOR or EON to shift and negate the result to the upper half, for example:
247247
// mov x0, #-22137 // =0xffffffffffffa987
248248
// movk x0, #60875, lsl #16 // =0xffffffffedcba987
249249
// eor x0, x0, x0, lsl #32 // =0xffffffffedcba987 ^ 0xedcba98700000000
250250
// =0x12345678edcba987.
251+
// The logic extends to other shift amounts in the range [17, 48) (outside that
252+
// range we get runs of ones/zeros that are optimised separately).
253+
//
251254
// When the lower half contains a 16-bit chunk of ones, such as
252255
// 0x0000'5678'ffff'a987, the intermediate MOVK is redundant.
253256
// Similarly, when it contains a 16-bit chunk of zeros, such as
@@ -261,35 +264,44 @@ static bool trySequenceOfOnes(uint64_t UImm,
261264
// compared to the default expansion based on MOV and MOVKs.
262265
static bool tryCopyWithNegation(uint64_t Imm, bool AllowThreeSequence,
263266
SmallVectorImpl<ImmInsnModel> &Insn) {
264-
// We need the negation of the upper half of Imm to match the lower half.
265267
// Degenerate cases where Imm is a run of ones should be handled separately.
266-
if ((~Imm >> 32) != (Imm & 0xffffffffULL) || llvm::isShiftedMask_64(Imm))
268+
if (!Imm || llvm::isShiftedMask_64(Imm))
267269
return false;
268270

269271
const unsigned Mask = 0xffff;
270-
unsigned Opc = AArch64::EORXrs;
271272

272-
// If we have a chunk of all zeros in the lower half, we can save a MOVK by
273-
// materialising the negated immediate and negating the result with an EON.
274-
if ((Imm & Mask) == 0 || ((Imm >> 16) & Mask) == 0) {
275-
Opc = AArch64::EONXrs;
276-
Imm = ~Imm;
277-
}
273+
auto tryExpansion = [&](unsigned Opc, uint64_t C, unsigned N) {
274+
assert((C >> 32) == 0xffffffffULL && "Invalid immediate");
275+
const unsigned Imm0 = C & Mask;
276+
const unsigned Imm16 = (C >> 16) & Mask;
277+
if (Imm0 != Mask && Imm16 != Mask && !AllowThreeSequence)
278+
return false;
279+
280+
if (Imm0 != Mask) {
281+
Insn.push_back({AArch64::MOVNXi, Imm0 ^ Mask, 0});
282+
if (Imm16 != Mask)
283+
Insn.push_back({AArch64::MOVKXi, Imm16, 16});
284+
} else {
285+
Insn.push_back({AArch64::MOVNXi, Imm16 ^ Mask, 16});
286+
}
278287

279-
unsigned Imm0 = Imm & Mask;
280-
unsigned Imm16 = (Imm >> 16) & Mask;
281-
if (Imm0 != Mask && Imm16 != Mask && !AllowThreeSequence)
282-
return false;
283-
if (Imm0 != Mask) {
284-
Insn.push_back({AArch64::MOVNXi, Imm0 ^ Mask, 0});
285-
if (Imm16 != Mask)
286-
Insn.push_back({AArch64::MOVKXi, Imm16, 16});
287-
} else {
288-
Insn.push_back({AArch64::MOVNXi, Imm16 ^ Mask, 16});
288+
Insn.push_back({Opc, 0, N});
289+
return true;
290+
};
291+
292+
for (unsigned N = 17; N < 48; ++N) {
293+
// Attempt EOR.
294+
uint64_t C = 0xffffffff00000000ULL | (Imm ^ (Imm << N));
295+
if ((C ^ (C << N)) == Imm && tryExpansion(AArch64::EORXrs, C, N))
296+
return true;
297+
298+
// Attempt EON.
299+
C = 0xffffffff00000000ULL | (Imm ^ ~(~Imm << N));
300+
if ((C ^ ~(C << N)) == Imm && tryExpansion(AArch64::EONXrs, C, N))
301+
return true;
289302
}
290303

291-
Insn.push_back({Opc, 0, 32});
292-
return true;
304+
return false;
293305
}
294306

295307
static uint64_t GetRunOfOnesStartingAt(uint64_t V, uint64_t StartPosition) {
@@ -698,7 +710,7 @@ void AArch64_IMM::expandMOVImm(uint64_t Imm, unsigned BitSize,
698710
if (BitSize == 64 && trySequenceOfOnes(UImm, Insn))
699711
return;
700712

701-
// Attempt to use a sequence of MOVN+MOVK+EOR (shifted register).
713+
// Attempt to use a sequence of MOVN+MOVK+EOR/EON (shifted register).
702714
if (tryCopyWithNegation(Imm, /*AllowThreeSequence=*/true, Insn))
703715
return;
704716

llvm/test/CodeGen/AArch64/arm64-movi.ll

Lines changed: 26 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -557,9 +557,8 @@ define i64 @orr_32_eor_64() nounwind {
557557
define i64 @movn_0_eon_lsl_17() {
558558
; CHECK-LABEL: movn_0_eon_lsl_17:
559559
; CHECK: // %bb.0:
560-
; CHECK-NEXT: mov x0, #-4370 // =0xffffffffffffeeee
561-
; CHECK-NEXT: movk x0, #8738, lsl #16
562-
; CHECK-NEXT: movk x0, #65534, lsl #32
560+
; CHECK-NEXT: mov x0, #-61167 // =0xffffffffffff1111
561+
; CHECK-NEXT: eon x0, x0, x0, lsl #17
563562
; CHECK-NEXT: ret
564563
ret i64 u0xfffffffe2222eeee
565564
}
@@ -576,19 +575,17 @@ define i64 @movn_0_eon_lsl_32() {
576575
define i64 @movn_0_eon_lsl_47() {
577576
; CHECK-LABEL: movn_0_eon_lsl_47:
578577
; CHECK: // %bb.0:
579-
; CHECK-NEXT: mov x0, #61166 // =0xeeee
580-
; CHECK-NEXT: movk x0, #32768, lsl #32
581-
; CHECK-NEXT: movk x0, #34952, lsl #48
578+
; CHECK-NEXT: mov x0, #-61167 // =0xffffffffffff1111
579+
; CHECK-NEXT: eon x0, x0, x0, lsl #47
582580
; CHECK-NEXT: ret
583581
ret i64 u0x888880000000eeee
584582
}
585583

586584
define i64 @movn_1_eon_lsl_17() {
587585
; CHECK-LABEL: movn_1_eon_lsl_17:
588586
; CHECK: // %bb.0:
589-
; CHECK-NEXT: mov x0, #286261248 // =0x11100000
590-
; CHECK-NEXT: movk x0, #8739, lsl #32
591-
; CHECK-NEXT: movk x0, #65534, lsl #48
587+
; CHECK-NEXT: mov x0, #-4008574977 // =0xffffffff1111ffff
588+
; CHECK-NEXT: eon x0, x0, x0, lsl #17
592589
; CHECK-NEXT: ret
593590
ret i64 u0xfffe222311100000
594591
}
@@ -605,19 +602,17 @@ define i64 @movn_1_eon_lsl_32() {
605602
define i64 @movn_1_eon_lsl_46() {
606603
; CHECK-LABEL: movn_1_eon_lsl_46:
607604
; CHECK: // %bb.0:
608-
; CHECK-NEXT: mov x0, #4008509440 // =0xeeed0000
609-
; CHECK-NEXT: movk x0, #49152, lsl #32
610-
; CHECK-NEXT: movk x0, #49151, lsl #48
605+
; CHECK-NEXT: mov x0, #-4008509441 // =0xffffffff1112ffff
606+
; CHECK-NEXT: eon x0, x0, x0, lsl #46
611607
; CHECK-NEXT: ret
612608
ret i64 u0xbfffc000eeed0000
613609
}
614610

615611
define i64 @movn_0_eor_lsl_17() {
616612
; CHECK-LABEL: movn_0_eor_lsl_17:
617613
; CHECK: // %bb.0:
618-
; CHECK-NEXT: mov x0, #4369 // =0x1111
619-
; CHECK-NEXT: movk x0, #56797, lsl #16
620-
; CHECK-NEXT: movk x0, #1, lsl #32
614+
; CHECK-NEXT: mov x0, #-61167 // =0xffffffffffff1111
615+
; CHECK-NEXT: eor x0, x0, x0, lsl #17
621616
; CHECK-NEXT: ret
622617
ret i64 u0x00000001dddd1111
623618
}
@@ -635,18 +630,16 @@ define i64 @movn_0_eor_lsl_47() {
635630
; CHECK-LABEL: movn_0_eor_lsl_47:
636631
; CHECK: // %bb.0:
637632
; CHECK-NEXT: mov x0, #-61167 // =0xffffffffffff1111
638-
; CHECK-NEXT: movk x0, #32767, lsl #32
639-
; CHECK-NEXT: movk x0, #30583, lsl #48
633+
; CHECK-NEXT: eor x0, x0, x0, lsl #47
640634
; CHECK-NEXT: ret
641635
ret i64 u0x77777fffffff1111
642636
}
643637

644638
define i64 @movn_1_eor_lsl_17() {
645639
; CHECK-LABEL: movn_1_eor_lsl_17:
646640
; CHECK: // %bb.0:
647-
; CHECK-NEXT: mov x0, #-286261249 // =0xffffffffeeefffff
648-
; CHECK-NEXT: movk x0, #56796, lsl #32
649-
; CHECK-NEXT: movk x0, #1, lsl #48
641+
; CHECK-NEXT: mov x0, #-4008574977 // =0xffffffff1111ffff
642+
; CHECK-NEXT: eor x0, x0, x0, lsl #17
650643
; CHECK-NEXT: ret
651644
ret i64 u0x0001dddceeefffff
652645
}
@@ -663,42 +656,38 @@ define i64 @movn_1_eor_lsl_32() {
663656
define i64 @movn_1_eor_lsl_46() {
664657
; CHECK-LABEL: movn_1_eor_lsl_46:
665658
; CHECK: // %bb.0:
666-
; CHECK-NEXT: mov x0, #-4008509441
667-
; CHECK-NEXT: movk x0, #16383, lsl #32
668-
; CHECK-NEXT: movk x0, #16384, lsl #48
659+
; CHECK-NEXT: mov x0, #-4008509441 // =0xffffffff1112ffff
660+
; CHECK-NEXT: eor x0, x0, x0, lsl #46
669661
; CHECK-NEXT: ret
670662
ret i64 u0x40003fff1112ffff
671663
}
672664

673665
define i64 @movn_movk_eon_lsl_17() {
674666
; CHECK-LABEL: movn_movk_eon_lsl_17:
675667
; CHECK: // %bb.0:
676-
; CHECK-NEXT: mov x0, #43399 // =0xa987
677-
; CHECK-NEXT: movk x0, #16699, lsl #16
678-
; CHECK-NEXT: movk x0, #9320, lsl #32
679-
; CHECK-NEXT: movk x0, #65534, lsl #48
668+
; CHECK-NEXT: mov x0, #-43400 // =0xffffffffffff5678
669+
; CHECK-NEXT: movk x0, #4660, lsl #16
670+
; CHECK-NEXT: eon x0, x0, x0, lsl #17
680671
; CHECK-NEXT: ret
681672
ret i64 u0xfffe2468413ba987
682673
}
683674

684675
define i64 @movn_movk_eon_lsl_47() {
685676
; CHECK-LABEL: movn_movk_eon_lsl_47:
686677
; CHECK: // %bb.0:
687-
; CHECK-NEXT: mov x0, #61166 // =0xeeee
688-
; CHECK-NEXT: movk x0, #21554, lsl #16
689-
; CHECK-NEXT: movk x0, #32768, lsl #32
690-
; CHECK-NEXT: movk x0, #34952, lsl #48
678+
; CHECK-NEXT: mov x0, #-61167 // =0xffffffffffff1111
679+
; CHECK-NEXT: movk x0, #43981, lsl #16
680+
; CHECK-NEXT: eon x0, x0, x0, lsl #47
691681
; CHECK-NEXT: ret
692682
ret i64 u0x888880005432eeee
693683
}
694684

695685
define i64 @movn_movk_eor_lsl_17() {
696686
; CHECK-LABEL: movn_movk_eor_lsl_17:
697687
; CHECK: // %bb.0:
698-
; CHECK-NEXT: mov x0, #22136 // =0x5678
699-
; CHECK-NEXT: movk x0, #48836, lsl #16
700-
; CHECK-NEXT: movk x0, #56215, lsl #32
701-
; CHECK-NEXT: movk x0, #1, lsl #48
688+
; CHECK-NEXT: mov x0, #-43400 // =0xffffffffffff5678
689+
; CHECK-NEXT: movk x0, #4660, lsl #16
690+
; CHECK-NEXT: eor x0, x0, x0, lsl #17
702691
; CHECK-NEXT: ret
703692
ret i64 u0x0001db97bec45678
704693
}
@@ -716,10 +705,9 @@ define i64 @movn_movk_eor_lsl_32() {
716705
define i64 @movn_movk_eor_lsl_47() {
717706
; CHECK-LABEL: movn_movk_eor_lsl_47:
718707
; CHECK: // %bb.0:
719-
; CHECK-NEXT: mov x0, #4369 // =0x1111
708+
; CHECK-NEXT: mov x0, #-61167 // =0xffffffffffff1111
720709
; CHECK-NEXT: movk x0, #43981, lsl #16
721-
; CHECK-NEXT: movk x0, #32767, lsl #32
722-
; CHECK-NEXT: movk x0, #30583, lsl #48
710+
; CHECK-NEXT: eor x0, x0, x0, lsl #47
723711
; CHECK-NEXT: ret
724712
ret i64 u0x77777fffabcd1111
725713
}

llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -61,14 +61,13 @@ define <3 x i1> @test_srem_vec(<3 x i33> %X) nounwind {
6161
; CHECK-NEXT: sbfx x8, x0, #0, #33
6262
; CHECK-NEXT: sbfx x10, x1, #0, #33
6363
; CHECK-NEXT: movk x9, #29127, lsl #16
64-
; CHECK-NEXT: mov x13, #7281 // =0x1c71
64+
; CHECK-NEXT: mov x13, #-7282 // =0xffffffffffffe38e
6565
; CHECK-NEXT: sbfx x12, x2, #0, #33
6666
; CHECK-NEXT: movk x9, #50972, lsl #32
67-
; CHECK-NEXT: movk x13, #29127, lsl #16
67+
; CHECK-NEXT: movk x13, #36408, lsl #16
6868
; CHECK-NEXT: movk x9, #7281, lsl #48
69-
; CHECK-NEXT: movk x13, #50972, lsl #32
69+
; CHECK-NEXT: eon x13, x13, x13, lsl #33
7070
; CHECK-NEXT: smulh x11, x8, x9
71-
; CHECK-NEXT: movk x13, #7281, lsl #48
7271
; CHECK-NEXT: smulh x9, x10, x9
7372
; CHECK-NEXT: smulh x13, x12, x13
7473
; CHECK-NEXT: add x11, x11, x11, lsr #63

0 commit comments

Comments
 (0)