Skip to content

Commit 151b440

Browse files
committed
[AArch64] Improve expansion of immediates of the form (~w << 32 | w).
When one half of a 64-bit immediate corresponds to the negation of the other half, we can use a sequence of MOVN, MOVK and EOR to expand the bottom half of the immediate and replicate its negation to the top half. In the general case, this saves us a MOVK compared to expanding the immediate explicitly. As a refinement, when the bottom half contains a 16-bit chunk of ones, the intermediate MOVK can be omitted. Similarly, when the bottom half contains a chunk of zeros, we can alternatively expand its negation and use a EON to reconstruct the expected result. In either case, this still saves us a MOVK compared to the default expansion.
1 parent 3056f39 commit 151b440

File tree

3 files changed

+69
-16
lines changed

3 files changed

+69
-16
lines changed

llvm/lib/Target/AArch64/AArch64ExpandImm.cpp

Lines changed: 57 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,57 @@ static bool trySequenceOfOnes(uint64_t UImm,
239239
return true;
240240
}
241241

242+
// Attempt to expand 64-bit immediate values whose negated upper half match
243+
// the lower half (for example, 0x1234'5678'edcb'a987).
244+
// Immediates of this form can generally be expanded via a sequence of
245+
// MOVN+MOVK to expand the lower half, followed by an EOR to shift and negate
246+
// the result to the upper half, e.g.:
247+
// mov x0, #-22137 // =0xffffffffffffa987
248+
// movk x0, #60875, lsl #16 // =0xffffffffedcba987
249+
// eor x0, x0, x0, lsl #32 // =0xffffffffedcba987 ^ 0xedcba98700000000
250+
// =0x12345678edcba987.
251+
// When the lower half contains a 16-bit chunk of ones, such as
252+
// 0x0000'5678'ffff'a987, the intermediate MOVK is redundant.
253+
// Similarly, when it contains a 16-bit chunk of zeros, such as
254+
// 0xffff'5678'0000'a987, the expansion can instead be effected by expanding
255+
// the negation of the lower half and negating the result with an EON, e.g.:
256+
// mov x0, #-43400 // =0xffffffffffff5678
257+
// eon x0, x0, x0, lsl #32 // =0xffffffffffff5678 ^ ~0xffff567800000000
258+
// =0xffffffffffff5678 ^ 0x0000a987ffffffff
259+
// =0xffff56780000a987.
260+
// In any of these cases, the expansion with EOR/EON saves an instruction
261+
// compared to the default expansion based on MOV and MOVKs.
262+
static bool tryCopyWithNegation(uint64_t Imm,
263+
SmallVectorImpl<ImmInsnModel> &Insn) {
264+
// We need the negation of the upper half of Imm to match the lower half.
265+
// Degenerate cases where Imm is a run of ones should be handled separately.
266+
if ((~Imm >> 32) != (Imm & 0xffffffffULL) || llvm::isShiftedMask_64(Imm))
267+
return false;
268+
269+
const unsigned Mask = 0xffff;
270+
unsigned Opc = AArch64::EORXrs;
271+
272+
// If we have a chunk of all zeros in the lower half, we can save a MOVK by
273+
// materialising the negated immediate and negating the result with an EON.
274+
if ((Imm & Mask) == 0 || ((Imm >> 16) & Mask) == 0) {
275+
Opc = AArch64::EONXrs;
276+
Imm = ~Imm;
277+
}
278+
279+
unsigned Imm0 = Imm & Mask;
280+
unsigned Imm16 = (Imm >> 16) & Mask;
281+
if (Imm0 != Mask) {
282+
Insn.push_back({AArch64::MOVNXi, Imm0 ^ Mask, 0});
283+
if (Imm16 != Mask)
284+
Insn.push_back({AArch64::MOVKXi, Imm16, 16});
285+
} else {
286+
Insn.push_back({AArch64::MOVNXi, Imm16 ^ Mask, 16});
287+
}
288+
289+
Insn.push_back({Opc, 0, 32});
290+
return true;
291+
}
292+
242293
static uint64_t GetRunOfOnesStartingAt(uint64_t V, uint64_t StartPosition) {
243294
uint64_t NumOnes = llvm::countr_one(V >> StartPosition);
244295

@@ -617,7 +668,12 @@ void AArch64_IMM::expandMOVImm(uint64_t Imm, unsigned BitSize,
617668
// FIXME: Add more two-instruction sequences.
618669

619670
// Three instruction sequences.
620-
//
671+
672+
// Attempt to use a sequence of MOVN+MOVK+EOR/EON (shifted register).
673+
// The MOVK can be avoided if Imm contains a zero / one chunk.
674+
if (tryCopyWithNegation(Imm, Insn))
675+
return;
676+
621677
// Prefer MOVZ/MOVN followed by two MOVK; it's more readable, and possibly
622678
// the fastest sequence with fast literal generation. (If neither MOVK is
623679
// part of a fast literal generation pair, it could be slower than the

llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,8 @@ bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
169169
.addImm(I->Op2));
170170
}
171171
break;
172+
case AArch64::EONXrs:
173+
case AArch64::EORXrs:
172174
case AArch64::ORRWrs:
173175
case AArch64::ORRXrs: {
174176
Register DstReg = MI.getOperand(0).getReg();

llvm/test/CodeGen/AArch64/arm64-movi.ll

Lines changed: 10 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -557,50 +557,45 @@ define i64 @orr_32_eor_64() nounwind {
557557
define i64 @movn_0_eon() {
558558
; CHECK-LABEL: movn_0_eon:
559559
; CHECK: // %bb.0:
560-
; CHECK-NEXT: mov x0, #43690 // =0xaaaa
561-
; CHECK-NEXT: movk x0, #21845, lsl #32
562-
; CHECK-NEXT: movk x0, #65535, lsl #48
560+
; CHECK-NEXT: mov x0, #-43691 // =0xffffffffffff5555
561+
; CHECK-NEXT: eon x0, x0, x0, lsl #32
563562
; CHECK-NEXT: ret
564563
ret i64 u0xffff55550000aaaa
565564
}
566565

567566
define i64 @movn_1_eon() {
568567
; CHECK-LABEL: movn_1_eon:
569568
; CHECK: // %bb.0:
570-
; CHECK-NEXT: mov x0, #2863267840 // =0xaaaa0000
571-
; CHECK-NEXT: movk x0, #65535, lsl #32
572-
; CHECK-NEXT: movk x0, #21845, lsl #48
569+
; CHECK-NEXT: mov x0, #-2863267841 // =0xffffffff5555ffff
570+
; CHECK-NEXT: eon x0, x0, x0, lsl #32
573571
; CHECK-NEXT: ret
574572
ret i64 u0x5555ffffaaaa0000
575573
}
576574

577575
define i64 @movn_0_eor() {
578576
; CHECK-LABEL: movn_0_eor:
579577
; CHECK: // %bb.0:
580-
; CHECK-NEXT: mov x0, #21845 // =0x5555
581-
; CHECK-NEXT: movk x0, #65535, lsl #16
582-
; CHECK-NEXT: movk x0, #43690, lsl #32
578+
; CHECK-NEXT: mov x0, #-43691 // =0xffffffffffff5555
579+
; CHECK-NEXT: eor x0, x0, x0, lsl #32
583580
; CHECK-NEXT: ret
584581
ret i64 u0x0000aaaaffff5555
585582
}
586583

587584
define i64 @movn_1_eor() {
588585
; CHECK-LABEL: movn_1_eor:
589586
; CHECK: // %bb.0:
590-
; CHECK-NEXT: mov x0, #65535 // =0xffff
591-
; CHECK-NEXT: movk x0, #21845, lsl #16
592-
; CHECK-NEXT: movk x0, #43690, lsl #48
587+
; CHECK-NEXT: mov x0, #-2863267841 // =0xffffffff5555ffff
588+
; CHECK-NEXT: eor x0, x0, x0, lsl #32
593589
; CHECK-NEXT: ret
594590
ret i64 u0xaaaa00005555ffff
595591
}
596592

597593
define i64 @movn_movk_eor() {
598594
; CHECK-LABEL: movn_movk_eor:
599595
; CHECK: // %bb.0:
600-
; CHECK-NEXT: mov x0, #43690 // =0xaaaa
596+
; CHECK-NEXT: mov x0, #-21846 // =0xffffffffffffaaaa
601597
; CHECK-NEXT: movk x0, #52428, lsl #16
602-
; CHECK-NEXT: movk x0, #21845, lsl #32
603-
; CHECK-NEXT: movk x0, #13107, lsl #48
598+
; CHECK-NEXT: eor x0, x0, x0, lsl #32
604599
; CHECK-NEXT: ret
605600
ret i64 u0x33335555ccccaaaa
606601
}

0 commit comments

Comments
 (0)