Skip to content

Commit 076b8a1

Browse files
committed
[InstCombine] Tighten use constraint in factorization transforms
1 parent 0c308e7 commit 076b8a1

File tree

9 files changed

+79
-67
lines changed

9 files changed

+79
-67
lines changed

llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1424,6 +1424,12 @@ static Instruction *factorizeMathWithShlOps(BinaryOperator &I,
14241424
!match(Op1, m_Shl(m_Value(Y), m_Specific(ShAmt))))
14251425
return nullptr;
14261426

1427+
// This transform is only profitiable if both operations or one operation and
1428+
// the resulting add/sub can be eliminated/folded.
1429+
if (!(Op0->hasOneUse() && Op1->hasOneUse()) &&
1430+
!(isa<Constant>(X) && isa<Constant>(Y)))
1431+
return nullptr;
1432+
14271433
// No-wrap propagates only when all ops have no-wrap.
14281434
bool HasNSW = I.hasNoSignedWrap() && Op0->hasNoSignedWrap() &&
14291435
Op1->hasNoSignedWrap();

llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -688,9 +688,12 @@ static Value *tryFactorization(BinaryOperator &I, const SimplifyQuery &SQ,
688688
// If "B op D" simplifies then it can be formed with no cost.
689689
V = simplifyBinOp(TopLevelOpcode, B, D, SQ.getWithInstruction(&I));
690690

691-
// If "B op D" doesn't simplify then only go on if one of the existing
691+
// If "B op D" doesn't simplify then only go on if both of the existing
692692
// operations "A op' B" and "C op' D" will be zapped as no longer used.
693-
if (!V && (LHS->hasOneUse() || RHS->hasOneUse()))
693+
// Note that when an operation is equal to one of its operands, that
694+
// operation is "zapped" by having never existed in the first place.
695+
if (!V && (LHS->hasOneUse() || LHS == A || LHS == B) &&
696+
(RHS->hasOneUse() || RHS == C || RHS == D))
694697
V = Builder.CreateBinOp(TopLevelOpcode, B, D, RHS->getName());
695698
if (V)
696699
RetVal = Builder.CreateBinOp(InnerOpcode, A, V);
@@ -708,9 +711,12 @@ static Value *tryFactorization(BinaryOperator &I, const SimplifyQuery &SQ,
708711
// If "A op C" simplifies then it can be formed with no cost.
709712
V = simplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I));
710713

711-
// If "A op C" doesn't simplify then only go on if one of the existing
714+
// If "A op C" doesn't simplify then only go on if both of the existing
712715
// operations "A op' B" and "C op' D" will be zapped as no longer used.
713-
if (!V && (LHS->hasOneUse() || RHS->hasOneUse()))
716+
// Note that when an operation is equal to one of its operands, that
717+
// operation is "zapped" by having never existed in the first place.
718+
if (!V && (LHS->hasOneUse() || LHS == A || LHS == B) &&
719+
(RHS->hasOneUse() || RHS == C || RHS == D))
714720
V = Builder.CreateBinOp(TopLevelOpcode, A, C, LHS->getName());
715721
if (V)
716722
RetVal = Builder.CreateBinOp(InnerOpcode, V, B);

llvm/test/Transforms/InstCombine/add.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2013,9 +2013,9 @@ define i8 @mul_add_common_factor_use(i8 %x, i8 %y) {
20132013
define i8 @mul_add_common_factor_use2(i8 %x, i8 %y, i8 %z) {
20142014
; CHECK-LABEL: @mul_add_common_factor_use2(
20152015
; CHECK-NEXT: [[M:%.*]] = mul i8 [[X:%.*]], [[Y:%.*]]
2016+
; CHECK-NEXT: [[N:%.*]] = mul i8 [[X]], [[Z:%.*]]
20162017
; CHECK-NEXT: call void @use(i8 [[M]])
2017-
; CHECK-NEXT: [[N1:%.*]] = add i8 [[Y]], [[Z:%.*]]
2018-
; CHECK-NEXT: [[A:%.*]] = mul i8 [[N1]], [[X]]
2018+
; CHECK-NEXT: [[A:%.*]] = add i8 [[M]], [[N]]
20192019
; CHECK-NEXT: ret i8 [[A]]
20202020
;
20212021
%m = mul i8 %x, %y

llvm/test/Transforms/InstCombine/and-or.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -685,11 +685,11 @@ define i32 @or_or_and_noOneUse_fail1(i32 %a, i32 %b) {
685685
; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[A:%.*]], 23
686686
; CHECK-NEXT: [[AND:%.*]] = and i32 [[SHR]], 157
687687
; CHECK-NEXT: call void @use2(i32 [[AND]])
688-
; CHECK-NEXT: [[AND1:%.*]] = or i32 [[B:%.*]], 157
689-
; CHECK-NEXT: [[OR:%.*]] = and i32 [[SHR]], [[AND1]]
690-
; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[B]], 23
691-
; CHECK-NEXT: [[AND9:%.*]] = and i32 [[TMP1]], 157
692-
; CHECK-NEXT: [[R:%.*]] = or i32 [[OR]], [[AND9]]
688+
; CHECK-NEXT: [[AND3:%.*]] = and i32 [[SHR]], [[B:%.*]]
689+
; CHECK-NEXT: [[SHR8:%.*]] = lshr i32 [[B]], 23
690+
; CHECK-NEXT: [[AND9:%.*]] = and i32 [[SHR8]], 157
691+
; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[AND3]], [[AND9]]
692+
; CHECK-NEXT: [[R:%.*]] = or i32 [[TMP1]], [[AND]]
693693
; CHECK-NEXT: ret i32 [[R]]
694694
;
695695
%shr = ashr i32 %a, 23
@@ -714,9 +714,9 @@ define { i1, i1, i1, i1, i1 } @or_or_and_noOneUse_fail2(i1 %a_0, i1 %a_1, i1 %a_
714714
; CHECK-NEXT: [[TMP3:%.*]] = and i1 [[A_1:%.*]], [[B_1:%.*]]
715715
; CHECK-NEXT: [[TMP4:%.*]] = xor i1 [[TMP3]], true
716716
; CHECK-NEXT: [[TMP5:%.*]] = and i1 [[TMP0]], [[A_1]]
717-
; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP2]], [[A_1]]
718-
; CHECK-NEXT: [[TMP7:%.*]] = and i1 [[TMP6]], [[B_1]]
719-
; CHECK-NEXT: [[D:%.*]] = or i1 [[TMP7]], [[TMP5]]
717+
; CHECK-NEXT: [[TMP6:%.*]] = and i1 [[TMP2]], [[B_1]]
718+
; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP6]], [[TMP5]]
719+
; CHECK-NEXT: [[D:%.*]] = or i1 [[TMP7]], [[TMP3]]
720720
; CHECK-NEXT: [[DOTNOT1:%.*]] = or i1 [[TMP1]], [[TMP3]]
721721
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { i1, i1, i1, i1, i1 } zeroinitializer, i1 [[D]], 0
722722
; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { i1, i1, i1, i1, i1 } [[TMP8]], i1 [[TMP4]], 1

llvm/test/Transforms/InstCombine/ctpop.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -442,9 +442,9 @@ define i32 @parity_xor_extra_use(i32 %arg, i32 %arg1) {
442442
; CHECK-NEXT: [[I:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctpop.i32(i32 [[ARG:%.*]])
443443
; CHECK-NEXT: [[I2:%.*]] = and i32 [[I]], 1
444444
; CHECK-NEXT: tail call void @use(i32 [[I2]])
445-
; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[ARG1:%.*]], [[ARG]]
446-
; CHECK-NEXT: [[TMP2:%.*]] = call range(i32 0, 33) i32 @llvm.ctpop.i32(i32 [[TMP1]])
447-
; CHECK-NEXT: [[I5:%.*]] = and i32 [[TMP2]], 1
445+
; CHECK-NEXT: [[I3:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctpop.i32(i32 [[ARG1:%.*]])
446+
; CHECK-NEXT: [[I4:%.*]] = and i32 [[I3]], 1
447+
; CHECK-NEXT: [[I5:%.*]] = xor i32 [[I4]], [[I2]]
448448
; CHECK-NEXT: ret i32 [[I5]]
449449
;
450450
%i = tail call i32 @llvm.ctpop.i32(i32 %arg)
@@ -461,9 +461,9 @@ define i32 @parity_xor_extra_use2(i32 %arg, i32 %arg1) {
461461
; CHECK-NEXT: [[I:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctpop.i32(i32 [[ARG1:%.*]])
462462
; CHECK-NEXT: [[I2:%.*]] = and i32 [[I]], 1
463463
; CHECK-NEXT: tail call void @use(i32 [[I2]])
464-
; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[ARG1]], [[ARG:%.*]]
465-
; CHECK-NEXT: [[TMP2:%.*]] = call range(i32 0, 33) i32 @llvm.ctpop.i32(i32 [[TMP1]])
466-
; CHECK-NEXT: [[I5:%.*]] = and i32 [[TMP2]], 1
464+
; CHECK-NEXT: [[I3:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctpop.i32(i32 [[ARG:%.*]])
465+
; CHECK-NEXT: [[I4:%.*]] = and i32 [[I3]], 1
466+
; CHECK-NEXT: [[I5:%.*]] = xor i32 [[I2]], [[I4]]
467467
; CHECK-NEXT: ret i32 [[I5]]
468468
;
469469
%i = tail call i32 @llvm.ctpop.i32(i32 %arg1)

llvm/test/Transforms/InstCombine/shl-factor.ll

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,8 @@ define i8 @add_shl_same_amount_nsw_extra_use1(i8 %x, i8 %y, i8 %z) {
4343
; CHECK-LABEL: @add_shl_same_amount_nsw_extra_use1(
4444
; CHECK-NEXT: [[XS:%.*]] = shl nuw nsw i8 [[X:%.*]], [[Z:%.*]]
4545
; CHECK-NEXT: call void @use8(i8 [[XS]])
46-
; CHECK-NEXT: [[TMP1:%.*]] = add nsw i8 [[X]], [[Y:%.*]]
47-
; CHECK-NEXT: [[DIFF:%.*]] = shl nsw i8 [[TMP1]], [[Z]]
46+
; CHECK-NEXT: [[YS:%.*]] = shl nuw nsw i8 [[Y:%.*]], [[Z]]
47+
; CHECK-NEXT: [[DIFF:%.*]] = add nsw i8 [[XS]], [[YS]]
4848
; CHECK-NEXT: ret i8 [[DIFF]]
4949
;
5050
%xs = shl nsw nuw i8 %x, %z
@@ -56,10 +56,10 @@ define i8 @add_shl_same_amount_nsw_extra_use1(i8 %x, i8 %y, i8 %z) {
5656

5757
define i8 @add_shl_same_amount_nuw_extra_use2(i8 %x, i8 %y, i8 %z) {
5858
; CHECK-LABEL: @add_shl_same_amount_nuw_extra_use2(
59-
; CHECK-NEXT: [[YS:%.*]] = shl nuw nsw i8 [[Y:%.*]], [[Z:%.*]]
59+
; CHECK-NEXT: [[XS:%.*]] = shl nuw i8 [[X:%.*]], [[Z:%.*]]
60+
; CHECK-NEXT: [[YS:%.*]] = shl nuw nsw i8 [[Y:%.*]], [[Z]]
6061
; CHECK-NEXT: call void @use8(i8 [[YS]])
61-
; CHECK-NEXT: [[TMP1:%.*]] = add nuw i8 [[X:%.*]], [[Y]]
62-
; CHECK-NEXT: [[DIFF:%.*]] = shl nuw i8 [[TMP1]], [[Z]]
62+
; CHECK-NEXT: [[DIFF:%.*]] = add nuw nsw i8 [[XS]], [[YS]]
6363
; CHECK-NEXT: ret i8 [[DIFF]]
6464
;
6565
%xs = shl nuw i8 %x, %z
@@ -174,8 +174,8 @@ define i8 @sub_shl_same_amount_nsw_extra_use1(i8 %x, i8 %y, i8 %z) {
174174
; CHECK-LABEL: @sub_shl_same_amount_nsw_extra_use1(
175175
; CHECK-NEXT: [[XS:%.*]] = shl nuw nsw i8 [[X:%.*]], [[Z:%.*]]
176176
; CHECK-NEXT: call void @use8(i8 [[XS]])
177-
; CHECK-NEXT: [[TMP1:%.*]] = sub nsw i8 [[X]], [[Y:%.*]]
178-
; CHECK-NEXT: [[DIFF:%.*]] = shl nsw i8 [[TMP1]], [[Z]]
177+
; CHECK-NEXT: [[YS:%.*]] = shl nuw nsw i8 [[Y:%.*]], [[Z]]
178+
; CHECK-NEXT: [[DIFF:%.*]] = sub nsw i8 [[XS]], [[YS]]
179179
; CHECK-NEXT: ret i8 [[DIFF]]
180180
;
181181
%xs = shl nsw nuw i8 %x, %z
@@ -187,10 +187,10 @@ define i8 @sub_shl_same_amount_nsw_extra_use1(i8 %x, i8 %y, i8 %z) {
187187

188188
define i8 @sub_shl_same_amount_nuw_extra_use2(i8 %x, i8 %y, i8 %z) {
189189
; CHECK-LABEL: @sub_shl_same_amount_nuw_extra_use2(
190-
; CHECK-NEXT: [[YS:%.*]] = shl nuw nsw i8 [[Y:%.*]], [[Z:%.*]]
190+
; CHECK-NEXT: [[XS:%.*]] = shl nuw i8 [[X:%.*]], [[Z:%.*]]
191+
; CHECK-NEXT: [[YS:%.*]] = shl nuw nsw i8 [[Y:%.*]], [[Z]]
191192
; CHECK-NEXT: call void @use8(i8 [[YS]])
192-
; CHECK-NEXT: [[TMP1:%.*]] = sub nuw i8 [[X:%.*]], [[Y]]
193-
; CHECK-NEXT: [[DIFF:%.*]] = shl nuw i8 [[TMP1]], [[Z]]
193+
; CHECK-NEXT: [[DIFF:%.*]] = sub nuw nsw i8 [[XS]], [[YS]]
194194
; CHECK-NEXT: ret i8 [[DIFF]]
195195
;
196196
%xs = shl nuw i8 %x, %z

llvm/test/Transforms/LoopVectorize/induction.ll

Lines changed: 31 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -3910,8 +3910,8 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
39103910
; IND-NEXT: [[N_VEC:%.*]] = and i32 [[TMP0]], 510
39113911
; IND-NEXT: [[DOTCAST:%.*]] = trunc i32 [[N_VEC]] to i8
39123912
; IND-NEXT: [[IND_END:%.*]] = add i8 [[DOTCAST]], [[T]]
3913-
; IND-NEXT: [[EXT_MUL5:%.*]] = add nuw nsw i32 [[N_VEC]], [[EXT]]
3914-
; IND-NEXT: [[IND_END1:%.*]] = shl nuw nsw i32 [[EXT_MUL5]], 2
3913+
; IND-NEXT: [[TMP10:%.*]] = shl nuw nsw i32 [[N_VEC]], 2
3914+
; IND-NEXT: [[IND_END1:%.*]] = add nuw nsw i32 [[EXT_MUL]], [[TMP10]]
39153915
; IND-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[EXT_MUL]], i64 0
39163916
; IND-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
39173917
; IND-NEXT: [[INDUCTION:%.*]] = add nuw nsw <2 x i32> [[DOTSPLAT]], <i32 0, i32 4>
@@ -3921,13 +3921,13 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
39213921
; IND-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
39223922
; IND-NEXT: [[DOTCAST4:%.*]] = trunc i32 [[INDEX]] to i8
39233923
; IND-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[DOTCAST4]], [[T]]
3924-
; IND-NEXT: [[TMP10:%.*]] = sext i8 [[OFFSET_IDX]] to i64
3925-
; IND-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP10]]
3926-
; IND-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP11]], align 4
3924+
; IND-NEXT: [[TMP11:%.*]] = sext i8 [[OFFSET_IDX]] to i64
3925+
; IND-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]]
3926+
; IND-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP12]], align 4
39273927
; IND-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
39283928
; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 8, i32 8>
3929-
; IND-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
3930-
; IND-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]]
3929+
; IND-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
3930+
; IND-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]]
39313931
; IND: middle.block:
39323932
; IND-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
39333933
; IND-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
@@ -3940,8 +3940,8 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
39403940
; IND-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
39413941
; IND-NEXT: [[SPHI:%.*]] = phi i32 [ [[MUL:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ]
39423942
; IND-NEXT: [[IDX_B:%.*]] = phi i32 [ [[IDX_B_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ]
3943-
; IND-NEXT: [[TMP13:%.*]] = sext i8 [[IDX]] to i64
3944-
; IND-NEXT: [[PTR:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP13]]
3943+
; IND-NEXT: [[TMP14:%.*]] = sext i8 [[IDX]] to i64
3944+
; IND-NEXT: [[PTR:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP14]]
39453945
; IND-NEXT: store i32 [[SPHI]], ptr [[PTR]], align 4
39463946
; IND-NEXT: [[IDX_INC]] = add i8 [[IDX]], 1
39473947
; IND-NEXT: [[IDX_INC_EXT:%.*]] = zext i8 [[IDX_INC]] to i32
@@ -3979,8 +3979,8 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
39793979
; UNROLL-NEXT: [[N_VEC:%.*]] = and i32 [[TMP0]], 508
39803980
; UNROLL-NEXT: [[DOTCAST:%.*]] = trunc i32 [[N_VEC]] to i8
39813981
; UNROLL-NEXT: [[IND_END:%.*]] = add i8 [[DOTCAST]], [[T]]
3982-
; UNROLL-NEXT: [[EXT_MUL6:%.*]] = add nuw nsw i32 [[N_VEC]], [[EXT]]
3983-
; UNROLL-NEXT: [[IND_END1:%.*]] = shl nuw nsw i32 [[EXT_MUL6]], 2
3982+
; UNROLL-NEXT: [[TMP10:%.*]] = shl nuw nsw i32 [[N_VEC]], 2
3983+
; UNROLL-NEXT: [[IND_END1:%.*]] = add nuw nsw i32 [[EXT_MUL]], [[TMP10]]
39843984
; UNROLL-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[EXT_MUL]], i64 0
39853985
; UNROLL-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
39863986
; UNROLL-NEXT: [[INDUCTION:%.*]] = add nuw nsw <2 x i32> [[DOTSPLAT]], <i32 0, i32 4>
@@ -3991,15 +3991,15 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
39913991
; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 8, i32 8>
39923992
; UNROLL-NEXT: [[DOTCAST5:%.*]] = trunc i32 [[INDEX]] to i8
39933993
; UNROLL-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[DOTCAST5]], [[T]]
3994-
; UNROLL-NEXT: [[TMP10:%.*]] = sext i8 [[OFFSET_IDX]] to i64
3995-
; UNROLL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP10]]
3996-
; UNROLL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i64 8
3997-
; UNROLL-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP11]], align 4
3998-
; UNROLL-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP12]], align 4
3994+
; UNROLL-NEXT: [[TMP11:%.*]] = sext i8 [[OFFSET_IDX]] to i64
3995+
; UNROLL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]]
3996+
; UNROLL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 8
3997+
; UNROLL-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP12]], align 4
3998+
; UNROLL-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP13]], align 4
39993999
; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
40004000
; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 16, i32 16>
4001-
; UNROLL-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
4002-
; UNROLL-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]]
4001+
; UNROLL-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
4002+
; UNROLL-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]]
40034003
; UNROLL: middle.block:
40044004
; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
40054005
; UNROLL-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
@@ -4012,8 +4012,8 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
40124012
; UNROLL-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
40134013
; UNROLL-NEXT: [[SPHI:%.*]] = phi i32 [ [[MUL:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ]
40144014
; UNROLL-NEXT: [[IDX_B:%.*]] = phi i32 [ [[IDX_B_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ]
4015-
; UNROLL-NEXT: [[TMP14:%.*]] = sext i8 [[IDX]] to i64
4016-
; UNROLL-NEXT: [[PTR:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP14]]
4015+
; UNROLL-NEXT: [[TMP15:%.*]] = sext i8 [[IDX]] to i64
4016+
; UNROLL-NEXT: [[PTR:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP15]]
40174017
; UNROLL-NEXT: store i32 [[SPHI]], ptr [[PTR]], align 4
40184018
; UNROLL-NEXT: [[IDX_INC]] = add i8 [[IDX]], 1
40194019
; UNROLL-NEXT: [[IDX_INC_EXT:%.*]] = zext i8 [[IDX_INC]] to i32
@@ -4129,8 +4129,8 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
41294129
; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i32 [[TMP0]], 504
41304130
; INTERLEAVE-NEXT: [[DOTCAST:%.*]] = trunc i32 [[N_VEC]] to i8
41314131
; INTERLEAVE-NEXT: [[IND_END:%.*]] = add i8 [[DOTCAST]], [[T]]
4132-
; INTERLEAVE-NEXT: [[EXT_MUL6:%.*]] = add nuw nsw i32 [[N_VEC]], [[EXT]]
4133-
; INTERLEAVE-NEXT: [[IND_END1:%.*]] = shl nuw nsw i32 [[EXT_MUL6]], 2
4132+
; INTERLEAVE-NEXT: [[TMP10:%.*]] = shl nuw nsw i32 [[N_VEC]], 2
4133+
; INTERLEAVE-NEXT: [[IND_END1:%.*]] = add nuw nsw i32 [[EXT_MUL]], [[TMP10]]
41344134
; INTERLEAVE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[EXT_MUL]], i64 0
41354135
; INTERLEAVE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
41364136
; INTERLEAVE-NEXT: [[INDUCTION:%.*]] = add nuw nsw <4 x i32> [[DOTSPLAT]], <i32 0, i32 4, i32 8, i32 12>
@@ -4141,15 +4141,15 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
41414141
; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], <i32 16, i32 16, i32 16, i32 16>
41424142
; INTERLEAVE-NEXT: [[DOTCAST5:%.*]] = trunc i32 [[INDEX]] to i8
41434143
; INTERLEAVE-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[DOTCAST5]], [[T]]
4144-
; INTERLEAVE-NEXT: [[TMP10:%.*]] = sext i8 [[OFFSET_IDX]] to i64
4145-
; INTERLEAVE-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP10]]
4146-
; INTERLEAVE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i64 16
4147-
; INTERLEAVE-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP11]], align 4
4148-
; INTERLEAVE-NEXT: store <4 x i32> [[STEP_ADD]], ptr [[TMP12]], align 4
4144+
; INTERLEAVE-NEXT: [[TMP11:%.*]] = sext i8 [[OFFSET_IDX]] to i64
4145+
; INTERLEAVE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]]
4146+
; INTERLEAVE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 16
4147+
; INTERLEAVE-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP12]], align 4
4148+
; INTERLEAVE-NEXT: store <4 x i32> [[STEP_ADD]], ptr [[TMP13]], align 4
41494149
; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
41504150
; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 32, i32 32, i32 32, i32 32>
4151-
; INTERLEAVE-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
4152-
; INTERLEAVE-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]]
4151+
; INTERLEAVE-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
4152+
; INTERLEAVE-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]]
41534153
; INTERLEAVE: middle.block:
41544154
; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
41554155
; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
@@ -4162,8 +4162,8 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
41624162
; INTERLEAVE-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
41634163
; INTERLEAVE-NEXT: [[SPHI:%.*]] = phi i32 [ [[MUL:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ]
41644164
; INTERLEAVE-NEXT: [[IDX_B:%.*]] = phi i32 [ [[IDX_B_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ]
4165-
; INTERLEAVE-NEXT: [[TMP14:%.*]] = sext i8 [[IDX]] to i64
4166-
; INTERLEAVE-NEXT: [[PTR:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP14]]
4165+
; INTERLEAVE-NEXT: [[TMP15:%.*]] = sext i8 [[IDX]] to i64
4166+
; INTERLEAVE-NEXT: [[PTR:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP15]]
41674167
; INTERLEAVE-NEXT: store i32 [[SPHI]], ptr [[PTR]], align 4
41684168
; INTERLEAVE-NEXT: [[IDX_INC]] = add i8 [[IDX]], 1
41694169
; INTERLEAVE-NEXT: [[IDX_INC_EXT:%.*]] = zext i8 [[IDX_INC]] to i32

llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -371,9 +371,9 @@ define i32 @multiple_uniform_stores(ptr nocapture %var1, ptr nocapture readonly
371371
; CHECK-NEXT: [[TMP10:%.*]] = xor i32 [[J_022]], -1
372372
; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], [[ITR]]
373373
; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64
374-
; CHECK-NEXT: [[TMP13:%.*]] = add nuw nsw i64 [[TMP4]], [[TMP12]]
375-
; CHECK-NEXT: [[TMP14:%.*]] = shl nuw nsw i64 [[TMP13]], 2
376-
; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[SCEVGEP2]], i64 [[TMP14]]
374+
; CHECK-NEXT: [[TMP13:%.*]] = shl nuw nsw i64 [[TMP12]], 2
375+
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[SCEVGEP2]], i64 [[TMP9]]
376+
; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[TMP14]], i64 [[TMP13]]
377377
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt ptr [[SCEVGEP3]], [[VAR1]]
378378
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SCEVGEP1]], [[SCEVGEP]]
379379
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]

0 commit comments

Comments
 (0)