Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,16 @@ class AArch64TargetLowering : public TargetLowering {
bool shouldFoldConstantShiftPairToMask(const SDNode *N,
CombineLevel Level) const override;

/// Return true if it is profitable to fold a pair of shifts into a mask.
bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override {
EVT VT = Y.getValueType();

if (VT.isVector())
return false;

return VT.getScalarSizeInBits() <= 64;
}

bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT,
unsigned SelectOpcode, SDValue X,
SDValue Y) const override;
Expand Down
10 changes: 10 additions & 0 deletions llvm/lib/Target/ARM/ARMISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -775,6 +775,16 @@ class VectorType;
bool shouldFoldConstantShiftPairToMask(const SDNode *N,
CombineLevel Level) const override;

/// Return true if it is profitable to fold a pair of shifts into a mask.
bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override {
EVT VT = Y.getValueType();

if (VT.isVector())
return false;

return VT.getScalarSizeInBits() <= 32;
}

bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT,
unsigned SelectOpcode, SDValue X,
SDValue Y) const override;
Expand Down
7 changes: 2 additions & 5 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3659,11 +3659,8 @@ bool X86TargetLowering::shouldFoldMaskToVariableShiftPair(SDValue Y) const {
if (VT.isVector())
return false;

// 64-bit shifts on 32-bit targets produce really bad bloated code.
if (VT == MVT::i64 && !Subtarget.is64Bit())
return false;

return true;
unsigned MaxWidth = Subtarget.is64Bit() ? 64 : 32;
return VT.getScalarSizeInBits() <= MaxWidth;
}

TargetLowering::ShiftLegalizationStrategy
Expand Down
80 changes: 80 additions & 0 deletions llvm/test/CodeGen/AArch64/and-mask-variable.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-none-elf -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc -mtriple=aarch64-none-elf -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI

define i32 @mask_pair(i32 %x, i32 %y) {
; CHECK-SD-LABEL: mask_pair:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: lsr w8, w0, w1
; CHECK-SD-NEXT: lsl w0, w8, w1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: mask_pair:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-GI-NEXT: lsl w8, w8, w1
; CHECK-GI-NEXT: and w0, w8, w0
; CHECK-GI-NEXT: ret
%shl = shl nsw i32 -1, %y
%and = and i32 %shl, %x
ret i32 %and
}

define i64 @mask_pair_64(i64 %x, i64 %y) {
; CHECK-SD-LABEL: mask_pair_64:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: lsr x8, x0, x1
; CHECK-SD-NEXT: lsl x0, x8, x1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: mask_pair_64:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: mov x8, #-1 // =0xffffffffffffffff
; CHECK-GI-NEXT: lsl x8, x8, x1
; CHECK-GI-NEXT: and x0, x8, x0
; CHECK-GI-NEXT: ret
%shl = shl nsw i64 -1, %y
%and = and i64 %shl, %x
ret i64 %and
}

define i128 @mask_pair_128(i128 %x, i128 %y) {
; CHECK-SD-LABEL: mask_pair_128:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: mov x8, #-1 // =0xffffffffffffffff
; CHECK-SD-NEXT: mvn w9, w2
; CHECK-SD-NEXT: mov x10, #9223372036854775807 // =0x7fffffffffffffff
; CHECK-SD-NEXT: lsl x8, x8, x2
; CHECK-SD-NEXT: lsr x9, x10, x9
; CHECK-SD-NEXT: tst x2, #0x40
; CHECK-SD-NEXT: orr x9, x8, x9
; CHECK-SD-NEXT: csel x9, x8, x9, ne
; CHECK-SD-NEXT: csel x8, xzr, x8, ne
; CHECK-SD-NEXT: and x0, x8, x0
; CHECK-SD-NEXT: and x1, x9, x1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: mask_pair_128:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: mov w8, #64 // =0x40
; CHECK-GI-NEXT: mov x9, #-1 // =0xffffffffffffffff
; CHECK-GI-NEXT: sub x10, x2, #64
; CHECK-GI-NEXT: sub x8, x8, x2
; CHECK-GI-NEXT: lsl x11, x9, x2
; CHECK-GI-NEXT: cmp x2, #64
; CHECK-GI-NEXT: lsr x8, x9, x8
; CHECK-GI-NEXT: lsl x9, x9, x10
; CHECK-GI-NEXT: csel x10, x11, xzr, lo
; CHECK-GI-NEXT: orr x8, x8, x11
; CHECK-GI-NEXT: and x0, x10, x0
; CHECK-GI-NEXT: csel x8, x8, x9, lo
; CHECK-GI-NEXT: cmp x2, #0
; CHECK-GI-NEXT: csinv x8, x8, xzr, ne
; CHECK-GI-NEXT: and x1, x8, x1
; CHECK-GI-NEXT: ret
%shl = shl nsw i128 -1, %y
%and = and i128 %shl, %x
ret i128 %and
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CHECK: {{.*}}
98 changes: 43 additions & 55 deletions llvm/test/CodeGen/AArch64/extract-bits.ll
Original file line number Diff line number Diff line change
Expand Up @@ -532,11 +532,10 @@ define i32 @bextr64_32_b2(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind {
define i32 @bextr32_c0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
; CHECK-LABEL: bextr32_c0:
; CHECK: // %bb.0:
; CHECK-NEXT: neg w8, w2
; CHECK-NEXT: mov w9, #-1 // =0xffffffff
; CHECK-NEXT: lsr w10, w0, w1
; CHECK-NEXT: lsr w8, w9, w8
; CHECK-NEXT: and w0, w8, w10
; CHECK-NEXT: lsr w8, w0, w1
; CHECK-NEXT: neg w9, w2
; CHECK-NEXT: lsl w8, w8, w9
; CHECK-NEXT: lsr w0, w8, w9
; CHECK-NEXT: ret
%shifted = lshr i32 %val, %numskipbits
%numhighbits = sub i32 32, %numlowbits
Expand All @@ -548,12 +547,11 @@ define i32 @bextr32_c0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
define i32 @bextr32_c1_indexzext(i32 %val, i8 %numskipbits, i8 %numlowbits) nounwind {
; CHECK-LABEL: bextr32_c1_indexzext:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #32 // =0x20
; CHECK-NEXT: mov w9, #-1 // =0xffffffff
; CHECK-NEXT: lsr w10, w0, w1
; CHECK-NEXT: sub w8, w8, w2
; CHECK-NEXT: lsr w8, w9, w8
; CHECK-NEXT: and w0, w8, w10
; CHECK-NEXT: lsr w8, w0, w1
; CHECK-NEXT: mov w9, #32 // =0x20
; CHECK-NEXT: sub w9, w9, w2
; CHECK-NEXT: lsl w8, w8, w9
; CHECK-NEXT: lsr w0, w8, w9
; CHECK-NEXT: ret
%skip = zext i8 %numskipbits to i32
%shifted = lshr i32 %val, %skip
Expand All @@ -569,10 +567,9 @@ define i32 @bextr32_c2_load(ptr %w, i32 %numskipbits, i32 %numlowbits) nounwind
; CHECK: // %bb.0:
; CHECK-NEXT: ldr w8, [x0]
; CHECK-NEXT: neg w9, w2
; CHECK-NEXT: mov w10, #-1 // =0xffffffff
; CHECK-NEXT: lsr w9, w10, w9
; CHECK-NEXT: lsr w8, w8, w1
; CHECK-NEXT: and w0, w9, w8
; CHECK-NEXT: lsl w8, w8, w9
; CHECK-NEXT: lsr w0, w8, w9
; CHECK-NEXT: ret
%val = load i32, ptr %w
%shifted = lshr i32 %val, %numskipbits
Expand All @@ -587,11 +584,10 @@ define i32 @bextr32_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n
; CHECK: // %bb.0:
; CHECK-NEXT: ldr w8, [x0]
; CHECK-NEXT: mov w9, #32 // =0x20
; CHECK-NEXT: mov w10, #-1 // =0xffffffff
; CHECK-NEXT: sub w9, w9, w2
; CHECK-NEXT: lsr w8, w8, w1
; CHECK-NEXT: lsr w9, w10, w9
; CHECK-NEXT: and w0, w9, w8
; CHECK-NEXT: lsl w8, w8, w9
; CHECK-NEXT: lsr w0, w8, w9
; CHECK-NEXT: ret
%val = load i32, ptr %w
%skip = zext i8 %numskipbits to i32
Expand All @@ -606,11 +602,10 @@ define i32 @bextr32_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n
define i32 @bextr32_c4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
; CHECK-LABEL: bextr32_c4_commutative:
; CHECK: // %bb.0:
; CHECK-NEXT: neg w8, w2
; CHECK-NEXT: mov w9, #-1 // =0xffffffff
; CHECK-NEXT: lsr w10, w0, w1
; CHECK-NEXT: lsr w8, w9, w8
; CHECK-NEXT: and w0, w10, w8
; CHECK-NEXT: lsr w8, w0, w1
; CHECK-NEXT: neg w9, w2
; CHECK-NEXT: lsl w8, w8, w9
; CHECK-NEXT: lsr w0, w8, w9
; CHECK-NEXT: ret
%shifted = lshr i32 %val, %numskipbits
%numhighbits = sub i32 32, %numlowbits
Expand All @@ -624,11 +619,10 @@ define i32 @bextr32_c4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits)
define i64 @bextr64_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
; CHECK-LABEL: bextr64_c0:
; CHECK: // %bb.0:
; CHECK-NEXT: neg x8, x2
; CHECK-NEXT: mov x9, #-1 // =0xffffffffffffffff
; CHECK-NEXT: lsr x10, x0, x1
; CHECK-NEXT: lsr x8, x9, x8
; CHECK-NEXT: and x0, x8, x10
; CHECK-NEXT: lsr x8, x0, x1
; CHECK-NEXT: neg x9, x2
; CHECK-NEXT: lsl x8, x8, x9
; CHECK-NEXT: lsr x0, x8, x9
; CHECK-NEXT: ret
%shifted = lshr i64 %val, %numskipbits
%numhighbits = sub i64 64, %numlowbits
Expand All @@ -640,13 +634,12 @@ define i64 @bextr64_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
define i64 @bextr64_c1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) nounwind {
; CHECK-LABEL: bextr64_c1_indexzext:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #64 // =0x40
; CHECK-NEXT: mov x9, #-1 // =0xffffffffffffffff
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-NEXT: lsr x10, x0, x1
; CHECK-NEXT: sub w8, w8, w2
; CHECK-NEXT: lsr x8, x9, x8
; CHECK-NEXT: and x0, x8, x10
; CHECK-NEXT: lsr x8, x0, x1
; CHECK-NEXT: mov w9, #64 // =0x40
; CHECK-NEXT: sub w9, w9, w2
; CHECK-NEXT: lsl x8, x8, x9
; CHECK-NEXT: lsr x0, x8, x9
; CHECK-NEXT: ret
%skip = zext i8 %numskipbits to i64
%shifted = lshr i64 %val, %skip
Expand All @@ -662,10 +655,9 @@ define i64 @bextr64_c2_load(ptr %w, i64 %numskipbits, i64 %numlowbits) nounwind
; CHECK: // %bb.0:
; CHECK-NEXT: ldr x8, [x0]
; CHECK-NEXT: neg x9, x2
; CHECK-NEXT: mov x10, #-1 // =0xffffffffffffffff
; CHECK-NEXT: lsr x9, x10, x9
; CHECK-NEXT: lsr x8, x8, x1
; CHECK-NEXT: and x0, x9, x8
; CHECK-NEXT: lsl x8, x8, x9
; CHECK-NEXT: lsr x0, x8, x9
; CHECK-NEXT: ret
%val = load i64, ptr %w
%shifted = lshr i64 %val, %numskipbits
Expand All @@ -679,13 +671,12 @@ define i64 @bextr64_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n
; CHECK-LABEL: bextr64_c3_load_indexzext:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr x8, [x0]
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-NEXT: mov w9, #64 // =0x40
; CHECK-NEXT: mov x10, #-1 // =0xffffffffffffffff
; CHECK-NEXT: sub w9, w9, w2
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-NEXT: lsr x8, x8, x1
; CHECK-NEXT: lsr x9, x10, x9
; CHECK-NEXT: and x0, x9, x8
; CHECK-NEXT: lsl x8, x8, x9
; CHECK-NEXT: lsr x0, x8, x9
; CHECK-NEXT: ret
%val = load i64, ptr %w
%skip = zext i8 %numskipbits to i64
Expand All @@ -700,11 +691,10 @@ define i64 @bextr64_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n
define i64 @bextr64_c4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
; CHECK-LABEL: bextr64_c4_commutative:
; CHECK: // %bb.0:
; CHECK-NEXT: neg x8, x2
; CHECK-NEXT: mov x9, #-1 // =0xffffffffffffffff
; CHECK-NEXT: lsr x10, x0, x1
; CHECK-NEXT: lsr x8, x9, x8
; CHECK-NEXT: and x0, x10, x8
; CHECK-NEXT: lsr x8, x0, x1
; CHECK-NEXT: neg x9, x2
; CHECK-NEXT: lsl x8, x8, x9
; CHECK-NEXT: lsr x0, x8, x9
; CHECK-NEXT: ret
%shifted = lshr i64 %val, %numskipbits
%numhighbits = sub i64 64, %numlowbits
Expand Down Expand Up @@ -737,11 +727,10 @@ define i32 @bextr64_32_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
define i32 @bextr64_32_c1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind {
; CHECK-LABEL: bextr64_32_c1:
; CHECK: // %bb.0:
; CHECK-NEXT: neg w8, w2
; CHECK-NEXT: mov w9, #-1 // =0xffffffff
; CHECK-NEXT: lsr x10, x0, x1
; CHECK-NEXT: lsr w8, w9, w8
; CHECK-NEXT: and w0, w8, w10
; CHECK-NEXT: lsr x8, x0, x1
; CHECK-NEXT: neg w9, w2
; CHECK-NEXT: lsl w8, w8, w9
; CHECK-NEXT: lsr w0, w8, w9
; CHECK-NEXT: ret
%shifted = lshr i64 %val, %numskipbits
%truncshifted = trunc i64 %shifted to i32
Expand All @@ -756,11 +745,10 @@ define i32 @bextr64_32_c1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
define i32 @bextr64_32_c2(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind {
; CHECK-LABEL: bextr64_32_c2:
; CHECK: // %bb.0:
; CHECK-NEXT: neg w8, w2
; CHECK-NEXT: mov w9, #-1 // =0xffffffff
; CHECK-NEXT: lsr x10, x0, x1
; CHECK-NEXT: lsr w8, w9, w8
; CHECK-NEXT: and w0, w8, w10
; CHECK-NEXT: lsr x8, x0, x1
; CHECK-NEXT: neg w9, w2
; CHECK-NEXT: lsl w8, w8, w9
; CHECK-NEXT: lsr w0, w8, w9
; CHECK-NEXT: ret
%shifted = lshr i64 %val, %numskipbits
%numhighbits = sub i32 32, %numlowbits
Expand Down
Loading