Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 73 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ExpandImm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,71 @@ static bool trySequenceOfOnes(uint64_t UImm,
return true;
}

// Attempt to expand 64-bit immediate values that consist of shifted negated
// components such as 0x1234'5678'edcb'a987, where the upper half is the
// negation of the lower half. Immediates of this form can generally be
// expanded via a sequence of MOVN+MOVK to expand the lower half, followed by
// an EOR or EON to shift and negate the result to the upper half, for example:
// mov x0, #-22137 // =0xffffffffffffa987
// movk x0, #60875, lsl #16 // =0xffffffffedcba987
// eor x0, x0, x0, lsl #32 // =0xffffffffedcba987 ^ 0xedcba98700000000
// =0x12345678edcba987.
// The logic extends to other shift amounts in the range [17, 48) (outside that
// range we get runs of ones/zeros that are optimised separately).
//
// When the lower half contains a 16-bit chunk of ones, such as
// 0x0000'5678'ffff'a987, the intermediate MOVK is redundant.
// Similarly, when it contains a 16-bit chunk of zeros, such as
// 0xffff'5678'0000'a987, the expansion can instead be effected by expanding
// the negation of the lower half and negating the result with an EON, e.g.:
// mov x0, #-43400 // =0xffffffffffff5678
// eon x0, x0, x0, lsl #32 // =0xffffffffffff5678 ^ ~0xffff567800000000
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it make sense to do this with shift amounts other than 32? Would it overlap with some other pattern, or is it just more complicated?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It could be useful with shift amounts in $[17, 48)$ (outside that range we get runs of ones/zeros that are already optimised). I don't have practical examples to motivate supporting these cases, but I'm happy to extend the implementation if you think they are worth supporting too?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If it's not too hard, sure? But I don't want to spend too much time on extending this code in ways that don't actually help in practice, though; if you're going to spend more time on this code, probably better to spend it on constants from real code. So it would also be fine to just note the future extension in a comment.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Apologies for the delay, I didn't manage to get back to this last week. I've extended the code to support shift amounts other than 32. Given the new code structure, it might also be worth adding support for patterns with BIC (they overlap somewhat with EOR, but some instances are unique). If you'd like I'm happy to do so in this PR, otherwise we can revisit this once the PR is merged.

Please let me know what you think. :)

// =0xffffffffffff5678 ^ 0x0000a987ffffffff
// =0xffff56780000a987.
// In any of these cases, the expansion with EOR/EON saves an instruction
// compared to the default expansion based on MOV and MOVKs.
static bool tryCopyWithNegation(uint64_t Imm, bool AllowThreeSequence,
SmallVectorImpl<ImmInsnModel> &Insn) {
// Degenerate cases where Imm is a run of ones should be handled separately.
if (!Imm || llvm::isShiftedMask_64(Imm))
return false;

const unsigned Mask = 0xffff;

auto tryExpansion = [&](unsigned Opc, uint64_t C, unsigned N) {
assert((C >> 32) == 0xffffffffULL && "Invalid immediate");
const unsigned Imm0 = C & Mask;
const unsigned Imm16 = (C >> 16) & Mask;
if (Imm0 != Mask && Imm16 != Mask && !AllowThreeSequence)
return false;

if (Imm0 != Mask) {
Insn.push_back({AArch64::MOVNXi, Imm0 ^ Mask, 0});
if (Imm16 != Mask)
Insn.push_back({AArch64::MOVKXi, Imm16, 16});
} else {
Insn.push_back({AArch64::MOVNXi, Imm16 ^ Mask, 16});
}

Insn.push_back({Opc, 0, N});
return true;
};

for (unsigned N = 17; N < 48; ++N) {
// Attempt EOR.
uint64_t C = 0xffffffff00000000ULL | (Imm ^ (Imm << N));
if ((C ^ (C << N)) == Imm && tryExpansion(AArch64::EORXrs, C, N))
return true;

// Attempt EON.
C = 0xffffffff00000000ULL | (Imm ^ ~(~Imm << N));
if ((C ^ ~(C << N)) == Imm && tryExpansion(AArch64::EONXrs, C, N))
return true;
}

return false;
}

static uint64_t GetRunOfOnesStartingAt(uint64_t V, uint64_t StartPosition) {
uint64_t NumOnes = llvm::countr_one(V >> StartPosition);

Expand Down Expand Up @@ -614,6 +679,10 @@ void AArch64_IMM::expandMOVImm(uint64_t Imm, unsigned BitSize,
if (tryEorOfLogicalImmediates(UImm, Insn))
return;

// Attempt to use a sequence of MOVN+EOR/EON (shifted register).
if (tryCopyWithNegation(Imm, /*AllowThreeSequence=*/false, Insn))
return;

// FIXME: Add more two-instruction sequences.

// Three instruction sequences.
Expand Down Expand Up @@ -641,6 +710,10 @@ void AArch64_IMM::expandMOVImm(uint64_t Imm, unsigned BitSize,
if (BitSize == 64 && trySequenceOfOnes(UImm, Insn))
return;

// Attempt to use a sequence of MOVN+MOVK+EOR/EON (shifted register).
if (tryCopyWithNegation(Imm, /*AllowThreeSequence=*/true, Insn))
return;

// We found no possible two or three instruction sequence; use the general
// four-instruction sequence.
expandMOVImmSimple(Imm, BitSize, OneChunks, ZeroChunks, Insn);
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,8 @@ bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
.addImm(I->Op2));
}
break;
case AArch64::EONXrs:
case AArch64::EORXrs:
case AArch64::ORRWrs:
case AArch64::ORRXrs: {
Register DstReg = MI.getOperand(0).getReg();
Expand Down
162 changes: 162 additions & 0 deletions llvm/test/CodeGen/AArch64/arm64-movi.ll
Original file line number Diff line number Diff line change
Expand Up @@ -549,3 +549,165 @@ define i64 @orr_32_eor_64() nounwind {
; CHECK-NEXT: ret
ret i64 18446604367017541391
}

;==--------------------------------------------------------------------------==
; Tests for EOR / EON with MOVN.
;==--------------------------------------------------------------------------==

define i64 @movn_0_eon_lsl_17() {
; CHECK-LABEL: movn_0_eon_lsl_17:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x0, #-61167 // =0xffffffffffff1111
; CHECK-NEXT: eon x0, x0, x0, lsl #17
; CHECK-NEXT: ret
ret i64 u0xfffffffe2222eeee
}

define i64 @movn_0_eon_lsl_32() {
; CHECK-LABEL: movn_0_eon_lsl_32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x0, #-43691 // =0xffffffffffff5555
; CHECK-NEXT: eon x0, x0, x0, lsl #32
; CHECK-NEXT: ret
ret i64 u0xffff55550000aaaa
}

define i64 @movn_0_eon_lsl_47() {
; CHECK-LABEL: movn_0_eon_lsl_47:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x0, #-61167 // =0xffffffffffff1111
; CHECK-NEXT: eon x0, x0, x0, lsl #47
; CHECK-NEXT: ret
ret i64 u0x888880000000eeee
}

define i64 @movn_1_eon_lsl_17() {
; CHECK-LABEL: movn_1_eon_lsl_17:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x0, #-4008574977 // =0xffffffff1111ffff
; CHECK-NEXT: eon x0, x0, x0, lsl #17
; CHECK-NEXT: ret
ret i64 u0xfffe222311100000
}

define i64 @movn_1_eon_lsl_32() {
; CHECK-LABEL: movn_1_eon_lsl_32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x0, #-2863267841 // =0xffffffff5555ffff
; CHECK-NEXT: eon x0, x0, x0, lsl #32
; CHECK-NEXT: ret
ret i64 u0x5555ffffaaaa0000
}

define i64 @movn_1_eon_lsl_46() {
; CHECK-LABEL: movn_1_eon_lsl_46:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x0, #-4008509441 // =0xffffffff1112ffff
; CHECK-NEXT: eon x0, x0, x0, lsl #46
; CHECK-NEXT: ret
ret i64 u0xbfffc000eeed0000
}

define i64 @movn_0_eor_lsl_17() {
; CHECK-LABEL: movn_0_eor_lsl_17:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x0, #-61167 // =0xffffffffffff1111
; CHECK-NEXT: eor x0, x0, x0, lsl #17
; CHECK-NEXT: ret
ret i64 u0x00000001dddd1111
}

define i64 @movn_0_eor_lsl_32() {
; CHECK-LABEL: movn_0_eor_lsl_32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x0, #-43691 // =0xffffffffffff5555
; CHECK-NEXT: eor x0, x0, x0, lsl #32
; CHECK-NEXT: ret
ret i64 u0x0000aaaaffff5555
}

define i64 @movn_0_eor_lsl_47() {
; CHECK-LABEL: movn_0_eor_lsl_47:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x0, #-61167 // =0xffffffffffff1111
; CHECK-NEXT: eor x0, x0, x0, lsl #47
; CHECK-NEXT: ret
ret i64 u0x77777fffffff1111
}

define i64 @movn_1_eor_lsl_17() {
; CHECK-LABEL: movn_1_eor_lsl_17:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x0, #-4008574977 // =0xffffffff1111ffff
; CHECK-NEXT: eor x0, x0, x0, lsl #17
; CHECK-NEXT: ret
ret i64 u0x0001dddceeefffff
}

define i64 @movn_1_eor_lsl_32() {
; CHECK-LABEL: movn_1_eor_lsl_32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x0, #-2863267841 // =0xffffffff5555ffff
; CHECK-NEXT: eor x0, x0, x0, lsl #32
; CHECK-NEXT: ret
ret i64 u0xaaaa00005555ffff
}

define i64 @movn_1_eor_lsl_46() {
; CHECK-LABEL: movn_1_eor_lsl_46:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x0, #-4008509441 // =0xffffffff1112ffff
; CHECK-NEXT: eor x0, x0, x0, lsl #46
; CHECK-NEXT: ret
ret i64 u0x40003fff1112ffff
}

define i64 @movn_movk_eon_lsl_17() {
; CHECK-LABEL: movn_movk_eon_lsl_17:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x0, #-43400 // =0xffffffffffff5678
; CHECK-NEXT: movk x0, #4660, lsl #16
; CHECK-NEXT: eon x0, x0, x0, lsl #17
; CHECK-NEXT: ret
ret i64 u0xfffe2468413ba987
}

define i64 @movn_movk_eon_lsl_47() {
; CHECK-LABEL: movn_movk_eon_lsl_47:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x0, #-61167 // =0xffffffffffff1111
; CHECK-NEXT: movk x0, #43981, lsl #16
; CHECK-NEXT: eon x0, x0, x0, lsl #47
; CHECK-NEXT: ret
ret i64 u0x888880005432eeee
}

define i64 @movn_movk_eor_lsl_17() {
; CHECK-LABEL: movn_movk_eor_lsl_17:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x0, #-43400 // =0xffffffffffff5678
; CHECK-NEXT: movk x0, #4660, lsl #16
; CHECK-NEXT: eor x0, x0, x0, lsl #17
; CHECK-NEXT: ret
ret i64 u0x0001db97bec45678
}

define i64 @movn_movk_eor_lsl_32() {
; CHECK-LABEL: movn_movk_eor_lsl_32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x0, #-21846 // =0xffffffffffffaaaa
; CHECK-NEXT: movk x0, #52428, lsl #16
; CHECK-NEXT: eor x0, x0, x0, lsl #32
; CHECK-NEXT: ret
ret i64 u0x33335555ccccaaaa
}

define i64 @movn_movk_eor_lsl_47() {
; CHECK-LABEL: movn_movk_eor_lsl_47:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x0, #-61167 // =0xffffffffffff1111
; CHECK-NEXT: movk x0, #43981, lsl #16
; CHECK-NEXT: eor x0, x0, x0, lsl #47
; CHECK-NEXT: ret
ret i64 u0x77777fffabcd1111
}
7 changes: 3 additions & 4 deletions llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll
Original file line number Diff line number Diff line change
Expand Up @@ -61,14 +61,13 @@ define <3 x i1> @test_srem_vec(<3 x i33> %X) nounwind {
; CHECK-NEXT: sbfx x8, x0, #0, #33
; CHECK-NEXT: sbfx x10, x1, #0, #33
; CHECK-NEXT: movk x9, #29127, lsl #16
; CHECK-NEXT: mov x13, #7281 // =0x1c71
; CHECK-NEXT: mov x13, #-7282 // =0xffffffffffffe38e
; CHECK-NEXT: sbfx x12, x2, #0, #33
; CHECK-NEXT: movk x9, #50972, lsl #32
; CHECK-NEXT: movk x13, #29127, lsl #16
; CHECK-NEXT: movk x13, #36408, lsl #16
; CHECK-NEXT: movk x9, #7281, lsl #48
; CHECK-NEXT: movk x13, #50972, lsl #32
; CHECK-NEXT: eon x13, x13, x13, lsl #33
; CHECK-NEXT: smulh x11, x8, x9
; CHECK-NEXT: movk x13, #7281, lsl #48
; CHECK-NEXT: smulh x9, x10, x9
; CHECK-NEXT: smulh x13, x12, x13
; CHECK-NEXT: add x11, x11, x11, lsr #63
Expand Down