Skip to content

Commit 0d335f7

Browse files
committed
[InstCombine] Handle more commuted cases in matchesSquareSum()
1 parent a39a382 commit 0d335f7

File tree

2 files changed

+18
-31
lines changed

2 files changed

+18
-31
lines changed

llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1014,7 +1014,7 @@ static bool matchesSquareSum(BinaryOperator &I, Mul2Rhs M2Rhs, Value *&A,
10141014
// (a * a) + (((a * 2) + b) * b)
10151015
if (match(&I, m_c_BinOp(
10161016
AddOp, m_OneUse(m_BinOp(MulOp, m_Value(A), m_Deferred(A))),
1017-
m_OneUse(m_BinOp(
1017+
m_OneUse(m_c_BinOp(
10181018
MulOp,
10191019
m_c_BinOp(AddOp, m_BinOp(Mul2Op, m_Deferred(A), M2Rhs),
10201020
m_Value(B)),
@@ -1025,16 +1025,16 @@ static bool matchesSquareSum(BinaryOperator &I, Mul2Rhs M2Rhs, Value *&A,
10251025
// +
10261026
// (a * a + b * b) or (b * b + a * a)
10271027
return match(
1028-
&I,
1029-
m_c_BinOp(AddOp,
1030-
m_CombineOr(
1031-
m_OneUse(m_BinOp(
1032-
Mul2Op, m_BinOp(MulOp, m_Value(A), m_Value(B)), M2Rhs)),
1033-
m_OneUse(m_BinOp(MulOp, m_BinOp(Mul2Op, m_Value(A), M2Rhs),
1028+
&I, m_c_BinOp(
1029+
AddOp,
1030+
m_CombineOr(
1031+
m_OneUse(m_BinOp(
1032+
Mul2Op, m_BinOp(MulOp, m_Value(A), m_Value(B)), M2Rhs)),
1033+
m_OneUse(m_c_BinOp(MulOp, m_BinOp(Mul2Op, m_Value(A), M2Rhs),
10341034
m_Value(B)))),
1035-
m_OneUse(m_c_BinOp(
1036-
AddOp, m_BinOp(MulOp, m_Deferred(A), m_Deferred(A)),
1037-
m_BinOp(MulOp, m_Deferred(B), m_Deferred(B))))));
1035+
m_OneUse(
1036+
m_c_BinOp(AddOp, m_BinOp(MulOp, m_Deferred(A), m_Deferred(A)),
1037+
m_BinOp(MulOp, m_Deferred(B), m_Deferred(B))))));
10381038
}
10391039

10401040
// Fold integer variations of a^2 + 2*a*b + b^2 -> (a + b)^2

llvm/test/Transforms/InstCombine/add.ll

Lines changed: 8 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3287,11 +3287,8 @@ define i32 @add_reduce_sqr_sum_flipped(i32 %a, i32 %b) {
32873287
define i32 @add_reduce_sqr_sum_flipped2(i32 %a, i32 %bx) {
32883288
; CHECK-LABEL: @add_reduce_sqr_sum_flipped2(
32893289
; CHECK-NEXT: [[B:%.*]] = xor i32 [[BX:%.*]], 42
3290-
; CHECK-NEXT: [[A_SQ:%.*]] = mul nsw i32 [[A:%.*]], [[A]]
3291-
; CHECK-NEXT: [[TWO_A:%.*]] = shl i32 [[A]], 1
3292-
; CHECK-NEXT: [[TWO_A_PLUS_B:%.*]] = add i32 [[TWO_A]], [[B]]
3293-
; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[B]], [[TWO_A_PLUS_B]]
3294-
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[MUL]], [[A_SQ]]
3290+
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[B]], [[A:%.*]]
3291+
; CHECK-NEXT: [[ADD:%.*]] = mul i32 [[TMP1]], [[TMP1]]
32953292
; CHECK-NEXT: ret i32 [[ADD]]
32963293
;
32973294
%b = xor i32 %bx, 42 ; thwart complexity-based canonicalization
@@ -3350,11 +3347,8 @@ define i32 @add_reduce_sqr_sum_order2_flipped(i32 %a, i32 %b) {
33503347
define i32 @add_reduce_sqr_sum_order2_flipped2(i32 %a, i32 %bx) {
33513348
; CHECK-LABEL: @add_reduce_sqr_sum_order2_flipped2(
33523349
; CHECK-NEXT: [[B:%.*]] = xor i32 [[BX:%.*]], 42
3353-
; CHECK-NEXT: [[A_SQ:%.*]] = mul nsw i32 [[A:%.*]], [[A]]
3354-
; CHECK-NEXT: [[TWOA:%.*]] = shl i32 [[A]], 1
3355-
; CHECK-NEXT: [[TWOAB1:%.*]] = add i32 [[B]], [[TWOA]]
3356-
; CHECK-NEXT: [[TWOAB_B2:%.*]] = mul i32 [[B]], [[TWOAB1]]
3357-
; CHECK-NEXT: [[AB2:%.*]] = add i32 [[A_SQ]], [[TWOAB_B2]]
3350+
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[B]], [[A:%.*]]
3351+
; CHECK-NEXT: [[AB2:%.*]] = mul i32 [[TMP1]], [[TMP1]]
33583352
; CHECK-NEXT: ret i32 [[AB2]]
33593353
;
33603354
%b = xor i32 %bx, 42 ; thwart complexity-based canonicalization
@@ -3370,11 +3364,8 @@ define i32 @add_reduce_sqr_sum_order2_flipped2(i32 %a, i32 %bx) {
33703364
define i32 @add_reduce_sqr_sum_order2_flipped3(i32 %a, i32 %bx) {
33713365
; CHECK-LABEL: @add_reduce_sqr_sum_order2_flipped3(
33723366
; CHECK-NEXT: [[B:%.*]] = xor i32 [[BX:%.*]], 42
3373-
; CHECK-NEXT: [[A_SQ:%.*]] = mul nsw i32 [[A:%.*]], [[A]]
3374-
; CHECK-NEXT: [[TWOA:%.*]] = shl i32 [[A]], 1
3375-
; CHECK-NEXT: [[B_SQ1:%.*]] = add i32 [[TWOA]], [[B]]
3376-
; CHECK-NEXT: [[TWOAB_B2:%.*]] = mul i32 [[B]], [[B_SQ1]]
3377-
; CHECK-NEXT: [[AB2:%.*]] = add i32 [[A_SQ]], [[TWOAB_B2]]
3367+
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[B]], [[A:%.*]]
3368+
; CHECK-NEXT: [[AB2:%.*]] = mul i32 [[TMP1]], [[TMP1]]
33783369
; CHECK-NEXT: ret i32 [[AB2]]
33793370
;
33803371
%b = xor i32 %bx, 42 ; thwart complexity-based canonicalization
@@ -3570,12 +3561,8 @@ define i32 @add_reduce_sqr_sum_order5_flipped2(i32 %a, i32 %b) {
35703561
define i32 @add_reduce_sqr_sum_order5_flipped3(i32 %ax, i32 %b) {
35713562
; CHECK-LABEL: @add_reduce_sqr_sum_order5_flipped3(
35723563
; CHECK-NEXT: [[A:%.*]] = xor i32 [[AX:%.*]], 42
3573-
; CHECK-NEXT: [[A_SQ:%.*]] = mul nsw i32 [[A]], [[A]]
3574-
; CHECK-NEXT: [[TWOB:%.*]] = shl i32 [[B:%.*]], 1
3575-
; CHECK-NEXT: [[TWOAB:%.*]] = mul i32 [[A]], [[TWOB]]
3576-
; CHECK-NEXT: [[B_SQ:%.*]] = mul i32 [[B]], [[B]]
3577-
; CHECK-NEXT: [[A2_B2:%.*]] = add i32 [[A_SQ]], [[B_SQ]]
3578-
; CHECK-NEXT: [[AB2:%.*]] = add i32 [[TWOAB]], [[A2_B2]]
3564+
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[A]], [[B:%.*]]
3565+
; CHECK-NEXT: [[AB2:%.*]] = mul i32 [[TMP1]], [[TMP1]]
35793566
; CHECK-NEXT: ret i32 [[AB2]]
35803567
;
35813568
%a = xor i32 %ax, 42 ; thwart complexity-based canonicalization

0 commit comments

Comments
 (0)