Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30702,6 +30702,19 @@ bool AArch64TargetLowering::SimplifyDemandedBitsForTargetNode(
Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
}

bool AArch64TargetLowering::canCreateUndefOrPoisonForTargetNode(
SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {

// TODO: Add more target nodes.
switch (Op.getOpcode()) {
case AArch64ISD::VASHR:
return false;
}
return TargetLowering::canCreateUndefOrPoisonForTargetNode(
Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth);
}

bool AArch64TargetLowering::isTargetCanonicalConstantNode(SDValue Op) const {
return Op.getOpcode() == AArch64ISD::DUP ||
Op.getOpcode() == AArch64ISD::MOVI ||
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -868,6 +868,12 @@ class AArch64TargetLowering : public TargetLowering {
TargetLoweringOpt &TLO,
unsigned Depth) const override;

bool canCreateUndefOrPoisonForTargetNode(SDValue Op,
const APInt &DemandedElts,
const SelectionDAG &DAG,
bool PoisonOnly, bool ConsiderFlags,
unsigned Depth) const override;

bool isTargetCanonicalConstantNode(SDValue Op) const override;

// With the exception of data-predicate transitions, no instructions are
Expand Down
25 changes: 12 additions & 13 deletions llvm/test/CodeGen/AArch64/vector-compress.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,15 @@ define <4 x i32> @test_compress_v4i32(<4 x i32> %vec, <4 x i1> %mask) {
; CHECK-NEXT: shl.4s v1, v1, #31
; CHECK-NEXT: cmlt.4s v1, v1, #0
; CHECK-NEXT: mov.s w9, v1[1]
; CHECK-NEXT: mov.s w10, v1[2]
; CHECK-NEXT: fmov w11, s1
; CHECK-NEXT: mov.s w10, v1[2]
; CHECK-NEXT: and x12, x11, #0x1
; CHECK-NEXT: bfi x8, x11, #2, #1
; CHECK-NEXT: and x11, x11, #0x1
; CHECK-NEXT: and x9, x9, #0x1
; CHECK-NEXT: and w10, w10, #0x1
; CHECK-NEXT: add x9, x11, x9
; CHECK-NEXT: mov x11, sp
; CHECK-NEXT: and x9, x9, #0x1
; CHECK-NEXT: add x9, x12, x9
; CHECK-NEXT: st1.s { v0 }[1], [x8]
; CHECK-NEXT: add w10, w9, w10
; CHECK-NEXT: sub w10, w9, w10
; CHECK-NEXT: orr x9, x11, x9, lsl #2
; CHECK-NEXT: bfi x11, x10, #2, #2
; CHECK-NEXT: st1.s { v0 }[2], [x9]
Expand Down Expand Up @@ -93,7 +92,8 @@ define <2 x double> @test_compress_v2f64(<2 x double> %vec, <2 x i1> %mask) {
; CHECK-NEXT: shl.2d v1, v1, #63
; CHECK-NEXT: cmlt.2d v1, v1, #0
; CHECK-NEXT: fmov x9, d1
; CHECK-NEXT: bfi x8, x9, #3, #1
; CHECK-NEXT: and x9, x9, #0x8
; CHECK-NEXT: orr x8, x8, x9
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I haven't entirely worked out what's going on here - but we've gone from:

    t24: v2i64 = AArch64ISD::VASHR t23, Constant:i32<63>
    t55: v2i64 = freeze t24
    t32: i64 = extract_vector_elt t55, Constant:i64<0>
    t53: i64 = shl t32, Constant:i64<3>
    t54: i64 = and t53, Constant:i64<8>
    t49: i64 = or disjoint FrameIndex:i64<0>, t54

to (what should be better):

    t56: v2i64 = freeze t23
    t24: v2i64 = AArch64ISD::VASHR t56, Constant:i32<63>
	t32: i64 = extract_vector_elt t24, Constant:i64<0>
    t54: i64 = and t32, Constant:i64<8>
    t49: i64 = or FrameIndex:i64<0>, t54

I suppose we now know the extracted element is all sign bits - but that screws up the BFI isel somehow?

; CHECK-NEXT: st1.d { v0 }[1], [x8]
; CHECK-NEXT: ldr q0, [sp], #16
; CHECK-NEXT: ret
Expand Down Expand Up @@ -420,16 +420,15 @@ define <3 x i32> @test_compress_narrow(<3 x i32> %vec, <3 x i1> %mask) {
; CHECK-NEXT: shl.4s v1, v1, #31
; CHECK-NEXT: cmlt.4s v1, v1, #0
; CHECK-NEXT: mov.s w8, v1[1]
; CHECK-NEXT: mov.s w9, v1[2]
; CHECK-NEXT: fmov w10, s1
; CHECK-NEXT: mov.s w9, v1[2]
; CHECK-NEXT: and x12, x10, #0x1
; CHECK-NEXT: bfi x11, x10, #2, #1
; CHECK-NEXT: and x10, x10, #0x1
; CHECK-NEXT: and x8, x8, #0x1
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: add x8, x10, x8
; CHECK-NEXT: mov x10, sp
; CHECK-NEXT: and x8, x8, #0x1
; CHECK-NEXT: add x8, x12, x8
; CHECK-NEXT: st1.s { v0 }[1], [x11]
; CHECK-NEXT: add w9, w8, w9
; CHECK-NEXT: sub w9, w8, w9
; CHECK-NEXT: orr x8, x10, x8, lsl #2
; CHECK-NEXT: bfi x10, x9, #2, #2
; CHECK-NEXT: st1.s { v0 }[2], [x8]
Expand Down
3 changes: 3 additions & 0 deletions llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,9 @@ TEST_F(AArch64SelectionDAGTest, ComputeNumSignBits_VASHR) {
auto VecA = DAG->getConstant(0xaa, Loc, VecVT);
auto Op2 = DAG->getNode(AArch64ISD::VASHR, Loc, VecVT, VecA, Shift);
EXPECT_EQ(DAG->ComputeNumSignBits(Op2), 5u);
// VASHR can't create undef/poison - FREEZE(VASHR(C1,C2)) -> VASHR(C1,C2).
auto Fr2 = DAG->getFreeze(Op2);
EXPECT_EQ(DAG->ComputeNumSignBits(Fr2), 5u);
}

TEST_F(AArch64SelectionDAGTest, SimplifyDemandedVectorElts_EXTRACT_SUBVECTOR) {
Expand Down
Loading