Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31004,6 +31004,9 @@ bool AArch64TargetLowering::canCreateUndefOrPoisonForTargetNode(
case AArch64ISD::MOVIshift:
case AArch64ISD::MVNImsl:
case AArch64ISD::MVNIshift:
case AArch64ISD::VASHR:
case AArch64ISD::VLSHR:
case AArch64ISD::VSHL:
return false;
}
return TargetLowering::canCreateUndefOrPoisonForTargetNode(
Expand Down
25 changes: 12 additions & 13 deletions llvm/test/CodeGen/AArch64/vector-compress.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,15 @@ define <4 x i32> @test_compress_v4i32(<4 x i32> %vec, <4 x i1> %mask) {
; CHECK-NEXT: shl.4s v1, v1, #31
; CHECK-NEXT: cmlt.4s v1, v1, #0
; CHECK-NEXT: mov.s w9, v1[1]
; CHECK-NEXT: mov.s w10, v1[2]
; CHECK-NEXT: fmov w11, s1
; CHECK-NEXT: mov.s w10, v1[2]
; CHECK-NEXT: and x12, x11, #0x1
; CHECK-NEXT: bfi x8, x11, #2, #1
; CHECK-NEXT: and x11, x11, #0x1
; CHECK-NEXT: and x9, x9, #0x1
; CHECK-NEXT: and w10, w10, #0x1
; CHECK-NEXT: add x9, x11, x9
; CHECK-NEXT: mov x11, sp
; CHECK-NEXT: and x9, x9, #0x1
; CHECK-NEXT: add x9, x12, x9
; CHECK-NEXT: st1.s { v0 }[1], [x8]
; CHECK-NEXT: add w10, w9, w10
; CHECK-NEXT: sub w10, w9, w10
; CHECK-NEXT: orr x9, x11, x9, lsl #2
; CHECK-NEXT: bfi x11, x10, #2, #2
; CHECK-NEXT: st1.s { v0 }[2], [x9]
Expand Down Expand Up @@ -93,7 +92,8 @@ define <2 x double> @test_compress_v2f64(<2 x double> %vec, <2 x i1> %mask) {
; CHECK-NEXT: shl.2d v1, v1, #63
; CHECK-NEXT: cmlt.2d v1, v1, #0
; CHECK-NEXT: fmov x9, d1
; CHECK-NEXT: bfi x8, x9, #3, #1
; CHECK-NEXT: and x9, x9, #0x8
; CHECK-NEXT: orr x8, x8, x9
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I haven't entirely worked out what's going on here - but we've gone from:

    t24: v2i64 = AArch64ISD::VASHR t23, Constant:i32<63>
    t55: v2i64 = freeze t24
    t32: i64 = extract_vector_elt t55, Constant:i64<0>
    t53: i64 = shl t32, Constant:i64<3>
    t54: i64 = and t53, Constant:i64<8>
    t49: i64 = or disjoint FrameIndex:i64<0>, t54

to (what should be better):

    t56: v2i64 = freeze t23
    t24: v2i64 = AArch64ISD::VASHR t56, Constant:i32<63>
	t32: i64 = extract_vector_elt t24, Constant:i64<0>
    t54: i64 = and t32, Constant:i64<8>
    t49: i64 = or FrameIndex:i64<0>, t54

I suppose we now know the extracted element is all sign bits - but that screws up the BFI isel somehow?

; CHECK-NEXT: st1.d { v0 }[1], [x8]
; CHECK-NEXT: ldr q0, [sp], #16
; CHECK-NEXT: ret
Expand Down Expand Up @@ -420,16 +420,15 @@ define <3 x i32> @test_compress_narrow(<3 x i32> %vec, <3 x i1> %mask) {
; CHECK-NEXT: shl.4s v1, v1, #31
; CHECK-NEXT: cmlt.4s v1, v1, #0
; CHECK-NEXT: mov.s w8, v1[1]
; CHECK-NEXT: mov.s w9, v1[2]
; CHECK-NEXT: fmov w10, s1
; CHECK-NEXT: mov.s w9, v1[2]
; CHECK-NEXT: and x12, x10, #0x1
; CHECK-NEXT: bfi x11, x10, #2, #1
; CHECK-NEXT: and x10, x10, #0x1
; CHECK-NEXT: and x8, x8, #0x1
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: add x8, x10, x8
; CHECK-NEXT: mov x10, sp
; CHECK-NEXT: and x8, x8, #0x1
; CHECK-NEXT: add x8, x12, x8
; CHECK-NEXT: st1.s { v0 }[1], [x11]
; CHECK-NEXT: add w9, w8, w9
; CHECK-NEXT: sub w9, w8, w9
; CHECK-NEXT: orr x8, x10, x8, lsl #2
; CHECK-NEXT: bfi x10, x9, #2, #2
; CHECK-NEXT: st1.s { v0 }[2], [x8]
Expand Down
18 changes: 18 additions & 0 deletions llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,9 @@ TEST_F(AArch64SelectionDAGTest, ComputeNumSignBits_VASHR) {
auto VecA = DAG->getConstant(0xaa, Loc, VecVT);
auto Op2 = DAG->getNode(AArch64ISD::VASHR, Loc, VecVT, VecA, Shift);
EXPECT_EQ(DAG->ComputeNumSignBits(Op2), 5u);
// VASHR can't create undef/poison - FREEZE(VASHR(C1,C2)) -> VASHR(C1,C2).
auto Fr2 = DAG->getFreeze(Op2);
EXPECT_EQ(DAG->ComputeNumSignBits(Fr2), 5u);
}

TEST_F(AArch64SelectionDAGTest, SimplifyDemandedVectorElts_EXTRACT_SUBVECTOR) {
Expand Down Expand Up @@ -564,6 +567,11 @@ TEST_F(AArch64SelectionDAGTest, ComputeKnownBits_VASHR) {
Known = DAG->computeKnownBits(Op1);
EXPECT_EQ(Known.Zero, APInt(8, 0x00));
EXPECT_EQ(Known.One, APInt(8, 0xFF));

auto Fr1 = DAG->getFreeze(Op1);
Known = DAG->computeKnownBits(Fr1);
EXPECT_EQ(Known.Zero, APInt(8, 0x00));
EXPECT_EQ(Known.One, APInt(8, 0xFF));
}

// Piggy-backing on the AArch64 tests to verify SelectionDAG::computeKnownBits.
Expand All @@ -584,6 +592,11 @@ TEST_F(AArch64SelectionDAGTest, ComputeKnownBits_VLSHR) {
Known = DAG->computeKnownBits(Op1);
EXPECT_EQ(Known.Zero, APInt(8, 0xFE));
EXPECT_EQ(Known.One, APInt(8, 0x1));

auto Fr1 = DAG->getFreeze(Op1);
Known = DAG->computeKnownBits(Fr1);
EXPECT_EQ(Known.Zero, APInt(8, 0xFE));
EXPECT_EQ(Known.One, APInt(8, 0x1));
}

// Piggy-backing on the AArch64 tests to verify SelectionDAG::computeKnownBits.
Expand All @@ -604,6 +617,11 @@ TEST_F(AArch64SelectionDAGTest, ComputeKnownBits_VSHL) {
Known = DAG->computeKnownBits(Op1);
EXPECT_EQ(Known.Zero, APInt(8, 0x7F));
EXPECT_EQ(Known.One, APInt(8, 0x80));

auto Fr1 = DAG->getFreeze(Op1);
Known = DAG->computeKnownBits(Fr1);
EXPECT_EQ(Known.Zero, APInt(8, 0x7F));
EXPECT_EQ(Known.One, APInt(8, 0x80));
}

TEST_F(AArch64SelectionDAGTest, isSplatValue_Fixed_BUILD_VECTOR) {
Expand Down