-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[AArch64] Improve code generation of bool vector reduce operations #115713
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -15841,11 +15841,27 @@ static SDValue getVectorBitwiseReduce(unsigned Opcode, SDValue Vec, EVT VT, | |
| return getVectorBitwiseReduce(Opcode, HalfVec, VT, DL, DAG); | ||
| } | ||
|
|
||
| // Vectors that are less than 64 bits get widened to neatly fit a 64 bit | ||
| // register, so e.g. <4 x i1> gets lowered to <4 x i16>. Sign extending to | ||
| // Results of setcc operations get widened to 128 bits for xor reduce if | ||
| // their input operands are 128 bits wide, otherwise vectors that are less | ||
| // than 64 bits get widened to neatly fit a 64 bit register, so e.g. | ||
| // <4 x i1> gets lowered to either <4 x i16> or <4 x i32>. Sign extending to | ||
| // this element size leads to the best codegen, since e.g. setcc results | ||
| // might need to be truncated otherwise. | ||
| EVT ExtendedVT = MVT::getIntegerVT(std::max(64u / NumElems, 8u)); | ||
| unsigned ExtendedWidth = 64; | ||
| if (ScalarOpcode == ISD::XOR && Vec.getOpcode() == ISD::SETCC && | ||
| Vec.getOperand(0).getValueSizeInBits() >= 128) { | ||
| ExtendedWidth = 128; | ||
| } | ||
| EVT ExtendedVT = MVT::getIntegerVT(std::max(ExtendedWidth / NumElems, 8u)); | ||
|
|
||
| // Negate the reduced vector value for reduce and operations that use | ||
| // fcmp. | ||
| if (ScalarOpcode == ISD::AND && NumElems < 16) { | ||
| Vec = DAG.getNode( | ||
| ISD::XOR, DL, VecVT, Vec, | ||
| DAG.getSplatVector( | ||
| VecVT, DL, DAG.getConstant(APInt::getAllOnes(32), DL, MVT::i32))); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. FYI there is a |
||
| } | ||
|
|
||
| // any_ext doesn't work with umin/umax, so only use it for uadd. | ||
| unsigned ExtendOp = | ||
|
|
@@ -15854,10 +15870,36 @@ static SDValue getVectorBitwiseReduce(unsigned Opcode, SDValue Vec, EVT VT, | |
| ExtendOp, DL, VecVT.changeVectorElementType(ExtendedVT), Vec); | ||
| switch (ScalarOpcode) { | ||
| case ISD::AND: | ||
| Result = DAG.getNode(ISD::VECREDUCE_UMIN, DL, ExtendedVT, Extended); | ||
| if (NumElems < 16) { | ||
| // Check if all lanes of the negated bool vector value are zero by | ||
| // comparing against 0.0 with ordered and equal predicate. The only | ||
| // non-zero bit pattern that compares ordered and equal to 0.0 is -0.0, | ||
| // where only the sign bit is set. However the bool vector is | ||
| // sign-extended so that each bit in a lane is either zero or one, | ||
| // meaning that it is impossible to get the bit pattern of -0.0. | ||
| assert(Extended.getValueSizeInBits() == 64); | ||
| Extended = DAG.getBitcast(MVT::f64, Extended); | ||
| Result = | ||
| DAG.getSetCC(DL, MVT::i32, Extended, | ||
| DAG.getConstantFP(0.0, DL, MVT::f64), ISD::SETOEQ); | ||
| } else { | ||
| Result = DAG.getNode(ISD::VECREDUCE_UMIN, DL, ExtendedVT, Extended); | ||
| } | ||
| break; | ||
| case ISD::OR: | ||
| Result = DAG.getNode(ISD::VECREDUCE_UMAX, DL, ExtendedVT, Extended); | ||
| if (NumElems < 16) { | ||
| // Check if any lane of the bool vector is set by comparing against 0.0. | ||
| // NaN bit patterns are handled by using the 'unordered or not equal' | ||
| // predicate. Similarly to the reduce and case, -0.0 doesn't have to be | ||
| // handled here (see explanation above). | ||
| assert(Extended.getValueSizeInBits() == 64); | ||
| Extended = DAG.getBitcast(MVT::f64, Extended); | ||
| Result = | ||
| DAG.getSetCC(DL, MVT::i32, Extended, | ||
| DAG.getConstantFP(0.0, DL, MVT::f64), ISD::SETUNE); | ||
| } else { | ||
| Result = DAG.getNode(ISD::VECREDUCE_UMAX, DL, ExtendedVT, Extended); | ||
| } | ||
| break; | ||
| case ISD::XOR: | ||
| Result = DAG.getNode(ISD::VECREDUCE_ADD, DL, ExtendedVT, Extended); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,11 +20,11 @@ define i1 @test_redand_v1i1(<1 x i1> %a) { | |
| define i1 @test_redand_v2i1(<2 x i1> %a) { | ||
| ; CHECK-LABEL: test_redand_v2i1: | ||
| ; CHECK: // %bb.0: | ||
| ; CHECK-NEXT: mvn v0.8b, v0.8b | ||
| ; CHECK-NEXT: shl v0.2s, v0.2s, #31 | ||
| ; CHECK-NEXT: cmlt v0.2s, v0.2s, #0 | ||
| ; CHECK-NEXT: uminp v0.2s, v0.2s, v0.2s | ||
| ; CHECK-NEXT: fmov w8, s0 | ||
| ; CHECK-NEXT: and w0, w8, #0x1 | ||
| ; CHECK-NEXT: fcmp d0, #0.0 | ||
|
||
| ; CHECK-NEXT: cset w0, eq | ||
| ; CHECK-NEXT: ret | ||
| ; | ||
| ; GISEL-LABEL: test_redand_v2i1: | ||
|
|
@@ -42,11 +42,11 @@ define i1 @test_redand_v2i1(<2 x i1> %a) { | |
| define i1 @test_redand_v4i1(<4 x i1> %a) { | ||
| ; CHECK-LABEL: test_redand_v4i1: | ||
| ; CHECK: // %bb.0: | ||
| ; CHECK-NEXT: mvn v0.8b, v0.8b | ||
| ; CHECK-NEXT: shl v0.4h, v0.4h, #15 | ||
| ; CHECK-NEXT: cmlt v0.4h, v0.4h, #0 | ||
| ; CHECK-NEXT: uminv h0, v0.4h | ||
| ; CHECK-NEXT: fmov w8, s0 | ||
| ; CHECK-NEXT: and w0, w8, #0x1 | ||
| ; CHECK-NEXT: fcmp d0, #0.0 | ||
| ; CHECK-NEXT: cset w0, eq | ||
| ; CHECK-NEXT: ret | ||
| ; | ||
| ; GISEL-LABEL: test_redand_v4i1: | ||
|
|
@@ -68,11 +68,11 @@ define i1 @test_redand_v4i1(<4 x i1> %a) { | |
| define i1 @test_redand_v8i1(<8 x i1> %a) { | ||
| ; CHECK-LABEL: test_redand_v8i1: | ||
| ; CHECK: // %bb.0: | ||
| ; CHECK-NEXT: mvn v0.8b, v0.8b | ||
| ; CHECK-NEXT: shl v0.8b, v0.8b, #7 | ||
| ; CHECK-NEXT: cmlt v0.8b, v0.8b, #0 | ||
| ; CHECK-NEXT: uminv b0, v0.8b | ||
| ; CHECK-NEXT: fmov w8, s0 | ||
| ; CHECK-NEXT: and w0, w8, #0x1 | ||
| ; CHECK-NEXT: fcmp d0, #0.0 | ||
| ; CHECK-NEXT: cset w0, eq | ||
| ; CHECK-NEXT: ret | ||
| ; | ||
| ; GISEL-LABEL: test_redand_v8i1: | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.