Skip to content

Commit 683b00b

Browse files
committed
[VPlan] Limit VPScalarIVSteps to step == 1 in getSCEVExprForVPValue.
For now, just support VPScalarIVSteps with step == 1 in getSCEVExprForVPValue. This fixes a crash when the step would be != 1.
1 parent 73b092f commit 683b00b

File tree

2 files changed

+110
-1
lines changed

2 files changed

+110
-1
lines changed

llvm/lib/Transforms/Vectorize/VPlanUtils.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,8 @@ const SCEV *vputils::getSCEVExprForVPValue(const VPValue *V,
108108
.Case<VPScalarIVStepsRecipe>([&SE, L](const VPScalarIVStepsRecipe *R) {
109109
const SCEV *IV = getSCEVExprForVPValue(R->getOperand(0), SE, L);
110110
const SCEV *Step = getSCEVExprForVPValue(R->getOperand(1), SE, L);
111-
if (isa<SCEVCouldNotCompute>(IV) || isa<SCEVCouldNotCompute>(Step))
111+
if (isa<SCEVCouldNotCompute>(IV) || isa<SCEVCouldNotCompute>(Step) ||
112+
!Step->isOne())
112113
return SE.getCouldNotCompute();
113114
return SE.getMulExpr(SE.getTruncateOrSignExtend(IV, Step->getType()),
114115
Step);

llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -660,6 +660,114 @@ exit:
660660
ret i32 %red
661661
}
662662

663+
664+
define i32 @test_or_reduction_with_stride_2(i32 %scale, ptr %src) {
665+
; CHECK-LABEL: define i32 @test_or_reduction_with_stride_2(
666+
; CHECK-SAME: i32 [[SCALE:%.*]], ptr [[SRC:%.*]]) {
667+
; CHECK-NEXT: [[ENTRY:.*:]]
668+
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
669+
; CHECK: [[VECTOR_PH]]:
670+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[SCALE]], i64 0
671+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
672+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
673+
; CHECK: [[VECTOR_BODY]]:
674+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
675+
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP66:%.*]], %[[VECTOR_BODY]] ]
676+
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
677+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
678+
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
679+
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
680+
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
681+
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 8
682+
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 10
683+
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 12
684+
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 14
685+
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 16
686+
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 18
687+
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 20
688+
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], 22
689+
; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 24
690+
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 26
691+
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 28
692+
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 30
693+
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP0]]
694+
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP1]]
695+
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP2]]
696+
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP3]]
697+
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP4]]
698+
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP5]]
699+
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP6]]
700+
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP7]]
701+
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP8]]
702+
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP9]]
703+
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP10]]
704+
; CHECK-NEXT: [[TMP27:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP11]]
705+
; CHECK-NEXT: [[TMP28:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP12]]
706+
; CHECK-NEXT: [[TMP29:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP13]]
707+
; CHECK-NEXT: [[TMP30:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP14]]
708+
; CHECK-NEXT: [[TMP31:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP15]]
709+
; CHECK-NEXT: [[TMP32:%.*]] = load i8, ptr [[TMP16]], align 1
710+
; CHECK-NEXT: [[TMP33:%.*]] = load i8, ptr [[TMP17]], align 1
711+
; CHECK-NEXT: [[TMP34:%.*]] = load i8, ptr [[TMP18]], align 1
712+
; CHECK-NEXT: [[TMP35:%.*]] = load i8, ptr [[TMP19]], align 1
713+
; CHECK-NEXT: [[TMP36:%.*]] = load i8, ptr [[TMP20]], align 1
714+
; CHECK-NEXT: [[TMP37:%.*]] = load i8, ptr [[TMP21]], align 1
715+
; CHECK-NEXT: [[TMP38:%.*]] = load i8, ptr [[TMP22]], align 1
716+
; CHECK-NEXT: [[TMP39:%.*]] = load i8, ptr [[TMP23]], align 1
717+
; CHECK-NEXT: [[TMP40:%.*]] = load i8, ptr [[TMP24]], align 1
718+
; CHECK-NEXT: [[TMP41:%.*]] = load i8, ptr [[TMP25]], align 1
719+
; CHECK-NEXT: [[TMP42:%.*]] = load i8, ptr [[TMP26]], align 1
720+
; CHECK-NEXT: [[TMP43:%.*]] = load i8, ptr [[TMP27]], align 1
721+
; CHECK-NEXT: [[TMP44:%.*]] = load i8, ptr [[TMP28]], align 1
722+
; CHECK-NEXT: [[TMP45:%.*]] = load i8, ptr [[TMP29]], align 1
723+
; CHECK-NEXT: [[TMP46:%.*]] = load i8, ptr [[TMP30]], align 1
724+
; CHECK-NEXT: [[TMP47:%.*]] = load i8, ptr [[TMP31]], align 1
725+
; CHECK-NEXT: [[TMP48:%.*]] = insertelement <16 x i8> poison, i8 [[TMP32]], i32 0
726+
; CHECK-NEXT: [[TMP49:%.*]] = insertelement <16 x i8> [[TMP48]], i8 [[TMP33]], i32 1
727+
; CHECK-NEXT: [[TMP50:%.*]] = insertelement <16 x i8> [[TMP49]], i8 [[TMP34]], i32 2
728+
; CHECK-NEXT: [[TMP51:%.*]] = insertelement <16 x i8> [[TMP50]], i8 [[TMP35]], i32 3
729+
; CHECK-NEXT: [[TMP52:%.*]] = insertelement <16 x i8> [[TMP51]], i8 [[TMP36]], i32 4
730+
; CHECK-NEXT: [[TMP53:%.*]] = insertelement <16 x i8> [[TMP52]], i8 [[TMP37]], i32 5
731+
; CHECK-NEXT: [[TMP54:%.*]] = insertelement <16 x i8> [[TMP53]], i8 [[TMP38]], i32 6
732+
; CHECK-NEXT: [[TMP55:%.*]] = insertelement <16 x i8> [[TMP54]], i8 [[TMP39]], i32 7
733+
; CHECK-NEXT: [[TMP56:%.*]] = insertelement <16 x i8> [[TMP55]], i8 [[TMP40]], i32 8
734+
; CHECK-NEXT: [[TMP57:%.*]] = insertelement <16 x i8> [[TMP56]], i8 [[TMP41]], i32 9
735+
; CHECK-NEXT: [[TMP58:%.*]] = insertelement <16 x i8> [[TMP57]], i8 [[TMP42]], i32 10
736+
; CHECK-NEXT: [[TMP59:%.*]] = insertelement <16 x i8> [[TMP58]], i8 [[TMP43]], i32 11
737+
; CHECK-NEXT: [[TMP60:%.*]] = insertelement <16 x i8> [[TMP59]], i8 [[TMP44]], i32 12
738+
; CHECK-NEXT: [[TMP61:%.*]] = insertelement <16 x i8> [[TMP60]], i8 [[TMP45]], i32 13
739+
; CHECK-NEXT: [[TMP62:%.*]] = insertelement <16 x i8> [[TMP61]], i8 [[TMP46]], i32 14
740+
; CHECK-NEXT: [[TMP63:%.*]] = insertelement <16 x i8> [[TMP62]], i8 [[TMP47]], i32 15
741+
; CHECK-NEXT: [[TMP64:%.*]] = sext <16 x i8> [[TMP63]] to <16 x i32>
742+
; CHECK-NEXT: [[TMP65:%.*]] = mul <16 x i32> [[BROADCAST_SPLAT]], [[TMP64]]
743+
; CHECK-NEXT: [[TMP66]] = or <16 x i32> [[TMP65]], [[VEC_PHI]]
744+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
745+
; CHECK-NEXT: [[TMP67:%.*]] = icmp eq i64 [[INDEX_NEXT]], 48
746+
; CHECK-NEXT: br i1 [[TMP67]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
747+
; CHECK: [[MIDDLE_BLOCK]]:
748+
; CHECK-NEXT: [[TMP68:%.*]] = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> [[TMP66]])
749+
; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
750+
; CHECK: [[SCALAR_PH]]:
751+
;
752+
entry:
753+
br label %loop
754+
755+
loop:
756+
%iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
757+
%reduction = phi i32 [ %reduction.next, %loop ], [ 0, %entry ]
758+
%gep = getelementptr [32 x i8], ptr %src, i64 %iv
759+
%load = load i8, ptr %gep, align 1
760+
%sext = sext i8 %load to i32
761+
%mul = mul i32 %scale, %sext
762+
%reduction.next = or i32 %mul, %reduction
763+
%iv.next = add i64 %iv, 2
764+
%cmp = icmp eq i64 %iv.next, 100
765+
br i1 %cmp, label %exit, label %loop
766+
767+
exit:
768+
ret i32 %reduction.next
769+
}
770+
663771
attributes #0 = { "target-cpu"="neoverse-512tvb" }
664772

665773
!0 = !{!1, !2, i64 0}

0 commit comments

Comments
 (0)