Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 61 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ExpandImm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,59 @@ static bool trySequenceOfOnes(uint64_t UImm,
return true;
}

// Attempt to expand 64-bit immediate values whose negated upper half match
// the lower half (for example, 0x1234'5678'edcb'a987).
// Immediates of this form can generally be expanded via a sequence of
// MOVN+MOVK to expand the lower half, followed by an EOR to shift and negate
// the result to the upper half, e.g.:
// mov x0, #-22137 // =0xffffffffffffa987
// movk x0, #60875, lsl #16 // =0xffffffffedcba987
// eor x0, x0, x0, lsl #32 // =0xffffffffedcba987 ^ 0xedcba98700000000
// =0x12345678edcba987.
// When the lower half contains a 16-bit chunk of ones, such as
// 0x0000'5678'ffff'a987, the intermediate MOVK is redundant.
// Similarly, when it contains a 16-bit chunk of zeros, such as
// 0xffff'5678'0000'a987, the expansion can instead be effected by expanding
// the negation of the lower half and negating the result with an EON, e.g.:
// mov x0, #-43400 // =0xffffffffffff5678
// eon x0, x0, x0, lsl #32 // =0xffffffffffff5678 ^ ~0xffff567800000000
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it make sense to do this with shift amounts other than 32? Would it overlap with some other pattern, or is it just more complicated?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It could be useful with shift amounts in $[17, 48)$ (outside that range we get runs of ones/zeros that are already optimised). I don't have practical examples to motivate supporting these cases, but I'm happy to extend the implementation if you think they are worth supporting too?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If it's not too hard, sure? But I don't want to spend too much time on extending this code in ways that don't actually help in practice, though; if you're going to spend more time on this code, probably better to spend it on constants from real code. So it would also be fine to just note the future extension in a comment.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Apologies for the delay, I didn't manage to get back to this last week. I've extended the code to support shift amounts other than 32. Given the new code structure, it might also be worth adding support for patterns with BIC (they overlap somewhat with EOR, but some instances are unique). If you'd like I'm happy to do so in this PR, otherwise we can revisit this once the PR is merged.

Please let me know what you think. :)

// =0xffffffffffff5678 ^ 0x0000a987ffffffff
// =0xffff56780000a987.
// In any of these cases, the expansion with EOR/EON saves an instruction
// compared to the default expansion based on MOV and MOVKs.
static bool tryCopyWithNegation(uint64_t Imm, bool AllowThreeSequence,
SmallVectorImpl<ImmInsnModel> &Insn) {
// We need the negation of the upper half of Imm to match the lower half.
// Degenerate cases where Imm is a run of ones should be handled separately.
if ((~Imm >> 32) != (Imm & 0xffffffffULL) || llvm::isShiftedMask_64(Imm))
return false;

const unsigned Mask = 0xffff;
unsigned Opc = AArch64::EORXrs;

// If we have a chunk of all zeros in the lower half, we can save a MOVK by
// materialising the negated immediate and negating the result with an EON.
if ((Imm & Mask) == 0 || ((Imm >> 16) & Mask) == 0) {
Opc = AArch64::EONXrs;
Imm = ~Imm;
}

unsigned Imm0 = Imm & Mask;
unsigned Imm16 = (Imm >> 16) & Mask;
if (Imm0 != Mask && Imm16 != Mask && !AllowThreeSequence)
return false;
if (Imm0 != Mask) {
Insn.push_back({AArch64::MOVNXi, Imm0 ^ Mask, 0});
if (Imm16 != Mask)
Insn.push_back({AArch64::MOVKXi, Imm16, 16});
} else {
Insn.push_back({AArch64::MOVNXi, Imm16 ^ Mask, 16});
}

Insn.push_back({Opc, 0, 32});
return true;
}

static uint64_t GetRunOfOnesStartingAt(uint64_t V, uint64_t StartPosition) {
uint64_t NumOnes = llvm::countr_one(V >> StartPosition);

Expand Down Expand Up @@ -614,6 +667,10 @@ void AArch64_IMM::expandMOVImm(uint64_t Imm, unsigned BitSize,
if (tryEorOfLogicalImmediates(UImm, Insn))
return;

// Attempt to use a sequence of MOVN+EOR/EON (shifted register).
if (tryCopyWithNegation(Imm, /*AllowThreeSequence=*/false, Insn))
return;

// FIXME: Add more two-instruction sequences.

// Three instruction sequences.
Expand Down Expand Up @@ -641,6 +698,10 @@ void AArch64_IMM::expandMOVImm(uint64_t Imm, unsigned BitSize,
if (BitSize == 64 && trySequenceOfOnes(UImm, Insn))
return;

// Attempt to use a sequence of MOVN+MOVK+EOR (shifted register).
if (tryCopyWithNegation(Imm, /*AllowThreeSequence=*/true, Insn))
return;

// We found no possible two or three instruction sequence; use the general
// four-instruction sequence.
expandMOVImmSimple(Imm, BitSize, OneChunks, ZeroChunks, Insn);
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,8 @@ bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
.addImm(I->Op2));
}
break;
case AArch64::EONXrs:
case AArch64::EORXrs:
case AArch64::ORRWrs:
case AArch64::ORRXrs: {
Register DstReg = MI.getOperand(0).getReg();
Expand Down
50 changes: 50 additions & 0 deletions llvm/test/CodeGen/AArch64/arm64-movi.ll
Original file line number Diff line number Diff line change
Expand Up @@ -549,3 +549,53 @@ define i64 @orr_32_eor_64() nounwind {
; CHECK-NEXT: ret
ret i64 18446604367017541391
}

;==--------------------------------------------------------------------------==
; Tests for EOR / EON with MOVN.
;==--------------------------------------------------------------------------==

define i64 @movn_0_eon() {
; CHECK-LABEL: movn_0_eon:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x0, #-43691 // =0xffffffffffff5555
; CHECK-NEXT: eon x0, x0, x0, lsl #32
; CHECK-NEXT: ret
ret i64 u0xffff55550000aaaa
}

define i64 @movn_1_eon() {
; CHECK-LABEL: movn_1_eon:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x0, #-2863267841 // =0xffffffff5555ffff
; CHECK-NEXT: eon x0, x0, x0, lsl #32
; CHECK-NEXT: ret
ret i64 u0x5555ffffaaaa0000
}

define i64 @movn_0_eor() {
; CHECK-LABEL: movn_0_eor:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x0, #-43691 // =0xffffffffffff5555
; CHECK-NEXT: eor x0, x0, x0, lsl #32
; CHECK-NEXT: ret
ret i64 u0x0000aaaaffff5555
}

define i64 @movn_1_eor() {
; CHECK-LABEL: movn_1_eor:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x0, #-2863267841 // =0xffffffff5555ffff
; CHECK-NEXT: eor x0, x0, x0, lsl #32
; CHECK-NEXT: ret
ret i64 u0xaaaa00005555ffff
}

define i64 @movn_movk_eor() {
; CHECK-LABEL: movn_movk_eor:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x0, #-21846 // =0xffffffffffffaaaa
; CHECK-NEXT: movk x0, #52428, lsl #16
; CHECK-NEXT: eor x0, x0, x0, lsl #32
; CHECK-NEXT: ret
ret i64 u0x33335555ccccaaaa
}