Skip to content

Commit 1261c02

Browse files
authored
[LICM] Drop nsw/nuw flags on affected instructions in hoistMulAddAssociation. (llvm#85486)
Since we are introducing new multiplies earlier in the arithmetic, the nsw/nuw flags on later instructions are no longer accurate. Fixes llvm#85457.
1 parent 457f762 commit 1261c02

File tree

2 files changed

+57
-16
lines changed

2 files changed

+57
-16
lines changed

llvm/lib/Transforms/Scalar/LICM.cpp

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2701,6 +2701,7 @@ static bool hoistMulAddAssociation(Instruction &I, Loop &L,
27012701

27022702
// First, we need to make sure we should do the transformation.
27032703
SmallVector<Use *> Changes;
2704+
SmallVector<BinaryOperator *> Adds;
27042705
SmallVector<BinaryOperator *> Worklist;
27052706
if (BinaryOperator *VariantBinOp = dyn_cast<BinaryOperator>(VariantOp))
27062707
Worklist.push_back(VariantBinOp);
@@ -2713,6 +2714,7 @@ static bool hoistMulAddAssociation(Instruction &I, Loop &L,
27132714
isa<BinaryOperator>(BO->getOperand(1))) {
27142715
Worklist.push_back(cast<BinaryOperator>(BO->getOperand(0)));
27152716
Worklist.push_back(cast<BinaryOperator>(BO->getOperand(1)));
2717+
Adds.push_back(BO);
27162718
continue;
27172719
}
27182720
if (!isReassociableOp(BO, Instruction::Mul, Instruction::FMul) ||
@@ -2735,6 +2737,12 @@ static bool hoistMulAddAssociation(Instruction &I, Loop &L,
27352737
if (Changes.empty())
27362738
return false;
27372739

2740+
// Drop the poison flags for any adds we looked through.
2741+
if (I.getType()->isIntOrIntVectorTy()) {
2742+
for (auto *Add : Adds)
2743+
Add->dropPoisonGeneratingFlags();
2744+
}
2745+
27382746
// We know we should do it so let's do the transformation.
27392747
auto *Preheader = L.getLoopPreheader();
27402748
assert(Preheader && "Loop is not in simplify form?");
@@ -2743,9 +2751,11 @@ static bool hoistMulAddAssociation(Instruction &I, Loop &L,
27432751
assert(L.isLoopInvariant(U->get()));
27442752
Instruction *Ins = cast<Instruction>(U->getUser());
27452753
Value *Mul;
2746-
if (I.getType()->isIntOrIntVectorTy())
2754+
if (I.getType()->isIntOrIntVectorTy()) {
27472755
Mul = Builder.CreateMul(U->get(), Factor, "factor.op.mul");
2748-
else
2756+
// Drop the poison flags on the original multiply.
2757+
Ins->dropPoisonGeneratingFlags();
2758+
} else
27492759
Mul = Builder.CreateFMulFMF(U->get(), Factor, Ins, "factor.op.fmul");
27502760
U->set(Mul);
27512761
}

llvm/test/Transforms/LICM/expr-reassociate-int.ll

Lines changed: 45 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ define void @innermost_loop_1d_shouldhoist(i32 %i, i64 %d1, i64 %delta, ptr %cel
2323
; CHECK-LABEL: define void @innermost_loop_1d_shouldhoist
2424
; CHECK-SAME: (i32 [[I:%.*]], i64 [[D1:%.*]], i64 [[DELTA:%.*]], ptr [[CELLS:%.*]]) {
2525
; CHECK-NEXT: entry:
26-
; CHECK-NEXT: [[MUL_1:%.*]] = mul i64 [[DELTA]], [[D1]]
26+
; CHECK-NEXT: [[MUL_1:%.*]] = mul nuw nsw i64 [[DELTA]], [[D1]]
2727
; CHECK-NEXT: br label [[FOR_COND:%.*]]
2828
; CHECK: for.cond:
2929
; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_J_1:%.*]], [[FOR_BODY:%.*]] ]
@@ -55,7 +55,7 @@ for.body:
5555
%idxprom.j.1 = zext i32 %add.j.1 to i64
5656
%arrayidx.j.1 = getelementptr inbounds i64, ptr %cells, i64 %idxprom.j.1
5757
%cell.1 = load i64, ptr %arrayidx.j.1, align 8
58-
%mul.1 = mul i64 %delta, %d1
58+
%mul.1 = mul nsw nuw i64 %delta, %d1
5959
%mul.2 = mul i64 %mul.1, %cell.1
6060
%idxprom.j = zext i32 %j to i64
6161
%arrayidx.j = getelementptr inbounds i64, ptr %cells, i64 %idxprom.j
@@ -130,8 +130,8 @@ define void @innermost_loop_2d(i32 %i, i64 %d1, i64 %d2, i64 %delta, ptr %cells)
130130
; CONSTRAINED-NEXT: [[IDXPROM_J:%.*]] = zext i32 [[J]] to i64
131131
; CONSTRAINED-NEXT: [[ARRAYIDX_J:%.*]] = getelementptr inbounds i64, ptr [[CELLS]], i64 [[IDXPROM_J]]
132132
; CONSTRAINED-NEXT: [[CELL_2:%.*]] = load i64, ptr [[ARRAYIDX_J]], align 8
133-
; CONSTRAINED-NEXT: [[MUL_2:%.*]] = mul i64 [[CELL_2]], [[D2]]
134-
; CONSTRAINED-NEXT: [[REASS_ADD:%.*]] = add i64 [[MUL_2]], [[MUL_1]]
133+
; CONSTRAINED-NEXT: [[MUL_2:%.*]] = mul nuw nsw i64 [[CELL_2]], [[D2]]
134+
; CONSTRAINED-NEXT: [[REASS_ADD:%.*]] = add nuw nsw i64 [[MUL_2]], [[MUL_1]]
135135
; CONSTRAINED-NEXT: [[REASS_MUL:%.*]] = mul i64 [[REASS_ADD]], [[DELTA]]
136136
; CONSTRAINED-NEXT: store i64 [[REASS_MUL]], ptr [[ARRAYIDX_J]], align 8
137137
; CONSTRAINED-NEXT: br label [[FOR_COND]]
@@ -155,8 +155,8 @@ for.body:
155155
%idxprom.j = zext i32 %j to i64
156156
%arrayidx.j = getelementptr inbounds i64, ptr %cells, i64 %idxprom.j
157157
%cell.2 = load i64, ptr %arrayidx.j, align 8
158-
%mul.2 = mul i64 %cell.2, %d2
159-
%reass.add = add i64 %mul.2, %mul.1
158+
%mul.2 = mul nsw nuw i64 %cell.2, %d2
159+
%reass.add = add nsw nuw i64 %mul.2, %mul.1
160160
%reass.mul = mul i64 %reass.add, %delta
161161
store i64 %reass.mul, ptr %arrayidx.j, align 8
162162
br label %for.cond
@@ -243,10 +243,10 @@ define void @innermost_loop_3d(i32 %i, i64 %d1, i64 %d2, i64 %d3, i64 %delta, pt
243243
; CONSTRAINED-NEXT: [[IDXPROM_J_2:%.*]] = zext i32 [[ADD_J_2]] to i64
244244
; CONSTRAINED-NEXT: [[ARRAYIDX_J_2:%.*]] = getelementptr inbounds i64, ptr [[CELLS]], i64 [[IDXPROM_J_2]]
245245
; CONSTRAINED-NEXT: [[CELL_3:%.*]] = load i64, ptr [[ARRAYIDX_J_2]], align 8
246-
; CONSTRAINED-NEXT: [[MUL_3:%.*]] = mul i64 [[CELL_3]], [[D3]]
247-
; CONSTRAINED-NEXT: [[REASS_ADD:%.*]] = add i64 [[MUL_2]], [[MUL_1]]
248-
; CONSTRAINED-NEXT: [[REASS_ADD1:%.*]] = add i64 [[REASS_ADD]], [[MUL_3]]
249-
; CONSTRAINED-NEXT: [[REASS_MUL:%.*]] = mul i64 [[REASS_ADD1]], [[DELTA]]
246+
; CONSTRAINED-NEXT: [[MUL_3:%.*]] = mul nuw nsw i64 [[CELL_3]], [[D3]]
247+
; CONSTRAINED-NEXT: [[REASS_ADD:%.*]] = add nuw nsw i64 [[MUL_2]], [[MUL_1]]
248+
; CONSTRAINED-NEXT: [[REASS_ADD1:%.*]] = add nuw nsw i64 [[REASS_ADD]], [[MUL_3]]
249+
; CONSTRAINED-NEXT: [[REASS_MUL:%.*]] = mul nuw nsw i64 [[REASS_ADD1]], [[DELTA]]
250250
; CONSTRAINED-NEXT: store i64 [[REASS_MUL]], ptr [[ARRAYIDX_J_2]], align 8
251251
; CONSTRAINED-NEXT: br label [[FOR_COND]]
252252
; CONSTRAINED: for.end:
@@ -274,10 +274,10 @@ for.body:
274274
%idxprom.j.2 = zext i32 %add.j.2 to i64
275275
%arrayidx.j.2 = getelementptr inbounds i64, ptr %cells, i64 %idxprom.j.2
276276
%cell.3 = load i64, ptr %arrayidx.j.2, align 8
277-
%mul.3 = mul i64 %cell.3, %d3
278-
%reass.add = add i64 %mul.2, %mul.1
279-
%reass.add1 = add i64 %reass.add, %mul.3
280-
%reass.mul = mul i64 %reass.add1, %delta
277+
%mul.3 = mul nsw nuw i64 %cell.3, %d3
278+
%reass.add = add nsw nuw i64 %mul.2, %mul.1
279+
%reass.add1 = add nsw nuw i64 %reass.add, %mul.3
280+
%reass.mul = mul nsw nuw i64 %reass.add1, %delta
281281
store i64 %reass.mul, ptr %arrayidx.j.2, align 8
282282
br label %for.cond
283283

@@ -362,3 +362,34 @@ for.body:
362362
for.end:
363363
ret void
364364
}
365+
366+
; Make sure we drop poison flags on the mul in the loop.
367+
define i32 @pr85457(i32 %x, i32 %y) {
368+
; CHECK-LABEL: define i32 @pr85457
369+
; CHECK-SAME: (i32 [[X:%.*]], i32 [[Y:%.*]]) {
370+
; CHECK-NEXT: entry:
371+
; CHECK-NEXT: [[FACTOR_OP_MUL:%.*]] = mul i32 [[X]], [[Y]]
372+
; CHECK-NEXT: br label [[LOOP:%.*]]
373+
; CHECK: loop:
374+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
375+
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
376+
; CHECK-NEXT: [[MUL0:%.*]] = mul i32 [[FACTOR_OP_MUL]], [[IV]]
377+
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[MUL0]], 1
378+
; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]]
379+
; CHECK: exit:
380+
; CHECK-NEXT: ret i32 0
381+
;
382+
entry:
383+
br label %loop
384+
385+
loop:
386+
%iv = phi i32 [ 1, %entry ], [ %iv.next, %loop ]
387+
%iv.next = add nuw nsw i32 %iv, 1
388+
%mul0 = mul nuw nsw i32 %x, %iv
389+
%mul1 = mul nuw i32 %mul0, %y
390+
%cmp = icmp slt i32 %mul1, 1
391+
br i1 %cmp, label %exit, label %loop
392+
393+
exit:
394+
ret i32 0
395+
}

0 commit comments

Comments
 (0)