-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[DAG] Improve simplifyDivRem's handling of zero numerators #163531
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
Post legalization, simplifyDivRem will not work as well when folding zero numerators. This is due to how we disallow truncation when calling isConstOrConstSplat, similar to PR162706. Fix this by refactoring the code to use `isZeroOrZeroSplat`, which allows truncation by default.
|
@llvm/pr-subscribers-llvm-selectiondag Author: Hari Limaye (hazzlim) ChangesPost legalization, simplifyDivRem will not work as well when folding Fix this by refactoring the code to use Full diff: https://github.com/llvm/llvm-project/pull/163531.diff 2 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 358e060d2c6d3..30421f7173211 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -5060,8 +5060,7 @@ static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
// 0 / X -> 0
// 0 % X -> 0
- ConstantSDNode *N0C = isConstOrConstSplat(N0);
- if (N0C && N0C->isZero())
+ if (isZeroOrZeroSplat(N0))
return N0;
// X / X -> 1
diff --git a/llvm/test/CodeGen/AArch64/combine-sdiv.ll b/llvm/test/CodeGen/AArch64/combine-sdiv.ll
index cca190f08df2b..442fc1bb15e7f 100644
--- a/llvm/test/CodeGen/AArch64/combine-sdiv.ll
+++ b/llvm/test/CodeGen/AArch64/combine-sdiv.ll
@@ -1859,3 +1859,43 @@ define <16 x i16> @combine_vec_sdiv_by_one_obfuscated(<16 x i16> %x) "target-fea
%div = sdiv <16 x i16> %x, %zero_and_ones
ret <16 x i16> %div
}
+
+define <8 x i32> @combine_vec_sdiv_zero_obfuscated(<8 x i32> %x) {
+; CHECK-SD-LABEL: combine_vec_sdiv_zero_obfuscated:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fmov w10, s0
+; CHECK-SD-NEXT: mov w8, #1 // =0x1
+; CHECK-SD-NEXT: mov w9, v0.s[1]
+; CHECK-SD-NEXT: mov w11, v0.s[2]
+; CHECK-SD-NEXT: mov w12, v0.s[3]
+; CHECK-SD-NEXT: movi v1.2d, #0000000000000000
+; CHECK-SD-NEXT: sdiv w10, w8, w10
+; CHECK-SD-NEXT: sdiv w9, w8, w9
+; CHECK-SD-NEXT: fmov s0, w10
+; CHECK-SD-NEXT: sdiv w11, w8, w11
+; CHECK-SD-NEXT: mov v0.s[1], w9
+; CHECK-SD-NEXT: sdiv w8, w8, w12
+; CHECK-SD-NEXT: mov v0.s[2], w11
+; CHECK-SD-NEXT: mov v0.s[3], w8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: combine_vec_sdiv_zero_obfuscated:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fmov w9, s0
+; CHECK-GI-NEXT: mov w8, #1 // =0x1
+; CHECK-GI-NEXT: mov w10, v0.s[1]
+; CHECK-GI-NEXT: mov w11, v0.s[2]
+; CHECK-GI-NEXT: mov w12, v0.s[3]
+; CHECK-GI-NEXT: movi v1.2d, #0000000000000000
+; CHECK-GI-NEXT: sdiv w9, w8, w9
+; CHECK-GI-NEXT: sdiv w10, w8, w10
+; CHECK-GI-NEXT: fmov s0, w9
+; CHECK-GI-NEXT: sdiv w11, w8, w11
+; CHECK-GI-NEXT: mov v0.s[1], w10
+; CHECK-GI-NEXT: sdiv w8, w8, w12
+; CHECK-GI-NEXT: mov v0.s[2], w11
+; CHECK-GI-NEXT: mov v0.s[3], w8
+; CHECK-GI-NEXT: ret
+ %1 = sdiv <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 0, i32 0, i32 0, i32 0>, %x
+ ret <8 x i32> %1
+}
|
|
@llvm/pr-subscribers-backend-aarch64 Author: Hari Limaye (hazzlim) ChangesPost legalization, simplifyDivRem will not work as well when folding Fix this by refactoring the code to use Full diff: https://github.com/llvm/llvm-project/pull/163531.diff 2 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 358e060d2c6d3..30421f7173211 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -5060,8 +5060,7 @@ static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
// 0 / X -> 0
// 0 % X -> 0
- ConstantSDNode *N0C = isConstOrConstSplat(N0);
- if (N0C && N0C->isZero())
+ if (isZeroOrZeroSplat(N0))
return N0;
// X / X -> 1
diff --git a/llvm/test/CodeGen/AArch64/combine-sdiv.ll b/llvm/test/CodeGen/AArch64/combine-sdiv.ll
index cca190f08df2b..442fc1bb15e7f 100644
--- a/llvm/test/CodeGen/AArch64/combine-sdiv.ll
+++ b/llvm/test/CodeGen/AArch64/combine-sdiv.ll
@@ -1859,3 +1859,43 @@ define <16 x i16> @combine_vec_sdiv_by_one_obfuscated(<16 x i16> %x) "target-fea
%div = sdiv <16 x i16> %x, %zero_and_ones
ret <16 x i16> %div
}
+
+define <8 x i32> @combine_vec_sdiv_zero_obfuscated(<8 x i32> %x) {
+; CHECK-SD-LABEL: combine_vec_sdiv_zero_obfuscated:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fmov w10, s0
+; CHECK-SD-NEXT: mov w8, #1 // =0x1
+; CHECK-SD-NEXT: mov w9, v0.s[1]
+; CHECK-SD-NEXT: mov w11, v0.s[2]
+; CHECK-SD-NEXT: mov w12, v0.s[3]
+; CHECK-SD-NEXT: movi v1.2d, #0000000000000000
+; CHECK-SD-NEXT: sdiv w10, w8, w10
+; CHECK-SD-NEXT: sdiv w9, w8, w9
+; CHECK-SD-NEXT: fmov s0, w10
+; CHECK-SD-NEXT: sdiv w11, w8, w11
+; CHECK-SD-NEXT: mov v0.s[1], w9
+; CHECK-SD-NEXT: sdiv w8, w8, w12
+; CHECK-SD-NEXT: mov v0.s[2], w11
+; CHECK-SD-NEXT: mov v0.s[3], w8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: combine_vec_sdiv_zero_obfuscated:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fmov w9, s0
+; CHECK-GI-NEXT: mov w8, #1 // =0x1
+; CHECK-GI-NEXT: mov w10, v0.s[1]
+; CHECK-GI-NEXT: mov w11, v0.s[2]
+; CHECK-GI-NEXT: mov w12, v0.s[3]
+; CHECK-GI-NEXT: movi v1.2d, #0000000000000000
+; CHECK-GI-NEXT: sdiv w9, w8, w9
+; CHECK-GI-NEXT: sdiv w10, w8, w10
+; CHECK-GI-NEXT: fmov s0, w9
+; CHECK-GI-NEXT: sdiv w11, w8, w11
+; CHECK-GI-NEXT: mov v0.s[1], w10
+; CHECK-GI-NEXT: sdiv w8, w8, w12
+; CHECK-GI-NEXT: mov v0.s[2], w11
+; CHECK-GI-NEXT: mov v0.s[3], w8
+; CHECK-GI-NEXT: ret
+ %1 = sdiv <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 0, i32 0, i32 0, i32 0>, %x
+ ret <8 x i32> %1
+}
|
|
When I look at just the second commit I don't see any changes to the output for the new tests? |
Post legalization, simplifyDivRem will not work as well when folding
zero numerators. This is due to how we disallow truncation when calling
isConstOrConstSplat, similar to PR162706.
Fix this by refactoring the code to use
isZeroOrZeroSplat, whichallows truncation by default.