diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Scheduler.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Scheduler.cpp index dd24cc3d98cf8..2f7d7087ca880 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Scheduler.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Scheduler.cpp @@ -230,11 +230,13 @@ bool Scheduler::trySchedule(ArrayRef Instrs) { // top-most part of the schedule that includes the instrs in the bundle and // re-schedule. trimSchedule(Instrs); + ScheduleTopItOpt = std::nullopt; [[fallthrough]]; case BndlSchedState::NoneScheduled: { // TODO: Set the window of the DAG that we are interested in. - // We start scheduling at the bottom instr of Instrs. - ScheduleTopItOpt = std::next(VecUtils::getLowest(Instrs)->getIterator()); + if (!ScheduleTopItOpt) + // We start scheduling at the bottom instr of Instrs. + ScheduleTopItOpt = std::next(VecUtils::getLowest(Instrs)->getIterator()); // TODO: For now don't cross BBs. if (!DAG.getInterval().empty()) { @@ -262,6 +264,12 @@ bool Scheduler::trySchedule(ArrayRef Instrs) { void Scheduler::dump(raw_ostream &OS) const { OS << "ReadyList:\n"; ReadyList.dump(OS); + OS << "Top of schedule: "; + if (ScheduleTopItOpt) + OS << **ScheduleTopItOpt; + else + OS << "Empty"; + OS << "\n"; } void Scheduler::dump() const { dump(dbgs()); } #endif // NDEBUG diff --git a/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll b/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll index ee8592c04b62c..45b937dc1b1b6 100644 --- a/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll +++ b/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll @@ -77,7 +77,7 @@ define void @store_fadd_load(ptr %ptr) { ; CHECK-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0 ; CHECK-NEXT: [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4 ; CHECK-NEXT: [[VECL1:%.*]] = load <2 x float>, ptr [[PTR0]], align 4 -; CHECK-NEXT: [[VEC:%.*]] = fadd <2 x float> [[VECL]], [[VECL1]] +; CHECK-NEXT: [[VEC:%.*]] = fadd <2 x float> [[VECL1]], [[VECL]] ; CHECK-NEXT: store <2 x float> [[VEC]], ptr [[PTR0]], align 4 ; CHECK-NEXT: ret void ; @@ -247,8 +247,8 @@ define void @diamondMultiInput(ptr %ptr, ptr %ptrX) { ; CHECK-LABEL: define void @diamondMultiInput( ; CHECK-SAME: ptr [[PTR:%.*]], ptr [[PTRX:%.*]]) { ; CHECK-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0 -; CHECK-NEXT: [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4 ; CHECK-NEXT: [[LDX:%.*]] = load float, ptr [[PTRX]], align 4 +; CHECK-NEXT: [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4 ; CHECK-NEXT: [[VINS:%.*]] = insertelement <2 x float> poison, float [[LDX]], i32 0 ; CHECK-NEXT: [[VEXT:%.*]] = extractelement <2 x float> [[VECL]], i32 0 ; CHECK-NEXT: [[VINS1:%.*]] = insertelement <2 x float> [[VINS]], float [[VEXT]], i32 1 diff --git a/llvm/test/Transforms/SandboxVectorizer/bottomup_seed_slice_pow2.ll b/llvm/test/Transforms/SandboxVectorizer/bottomup_seed_slice_pow2.ll index f1c6e3297d79c..1b189831569f5 100644 --- a/llvm/test/Transforms/SandboxVectorizer/bottomup_seed_slice_pow2.ll +++ b/llvm/test/Transforms/SandboxVectorizer/bottomup_seed_slice_pow2.ll @@ -7,8 +7,8 @@ define void @pow2(ptr %ptr, float %val) { ; POW2-SAME: ptr [[PTR:%.*]], float [[VAL:%.*]]) { ; POW2-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0 ; POW2-NEXT: [[PTR2:%.*]] = getelementptr float, ptr [[PTR]], i32 2 -; POW2-NEXT: [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4 ; POW2-NEXT: [[LD2:%.*]] = load float, ptr [[PTR2]], align 4 +; POW2-NEXT: [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4 ; POW2-NEXT: store <2 x float> [[VECL]], ptr [[PTR0]], align 4 ; POW2-NEXT: store float [[LD2]], ptr [[PTR2]], align 4 ; POW2-NEXT: ret void diff --git a/llvm/test/Transforms/SandboxVectorizer/repeated_instrs.ll b/llvm/test/Transforms/SandboxVectorizer/repeated_instrs.ll index 25d9d79154d35..add762ac2d894 100644 --- a/llvm/test/Transforms/SandboxVectorizer/repeated_instrs.ll +++ b/llvm/test/Transforms/SandboxVectorizer/repeated_instrs.ll @@ -5,10 +5,10 @@ define i32 @repeated_splat(ptr %ptr, i32 %v) #0 { ; CHECK-LABEL: define i32 @repeated_splat( ; CHECK-SAME: ptr [[PTR:%.*]], i32 [[V:%.*]]) { ; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 0 -; CHECK-NEXT: [[VECL:%.*]] = load <2 x i32>, ptr [[GEP0]], align 4 ; CHECK-NEXT: [[SPLAT:%.*]] = add i32 [[V]], 0 ; CHECK-NEXT: [[PACK:%.*]] = insertelement <2 x i32> poison, i32 [[SPLAT]], i32 0 ; CHECK-NEXT: [[PACK1:%.*]] = insertelement <2 x i32> [[PACK]], i32 [[SPLAT]], i32 1 +; CHECK-NEXT: [[VECL:%.*]] = load <2 x i32>, ptr [[GEP0]], align 4 ; CHECK-NEXT: [[VEC:%.*]] = mul <2 x i32> [[VECL]], [[PACK1]] ; CHECK-NEXT: store <2 x i32> [[VEC]], ptr [[GEP0]], align 4 ; CHECK-NEXT: ret i32 0 @@ -31,6 +31,7 @@ define i32 @repeated_partial(ptr %ptr, i32 %v) #0 { ; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 0 ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 1 ; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 3 +; CHECK-NEXT: [[SPLAT:%.*]] = add i32 [[V]], 0 ; CHECK-NEXT: [[LD0:%.*]] = load i32, ptr [[GEP0]], align 4 ; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[GEP1]], align 4 ; CHECK-NEXT: [[LD3:%.*]] = load i32, ptr [[GEP3]], align 4 @@ -39,7 +40,6 @@ define i32 @repeated_partial(ptr %ptr, i32 %v) #0 { ; CHECK-NEXT: [[PACK2:%.*]] = insertelement <4 x i32> [[PACK1]], i32 [[LD1]], i32 2 ; CHECK-NEXT: [[PACK3:%.*]] = insertelement <4 x i32> [[PACK2]], i32 [[LD3]], i32 3 ; CHECK-NEXT: [[VECL:%.*]] = load <4 x i32>, ptr [[GEP0]], align 4 -; CHECK-NEXT: [[SPLAT:%.*]] = add i32 [[V]], 0 ; CHECK-NEXT: [[VEC:%.*]] = mul <4 x i32> [[VECL]], [[PACK3]] ; CHECK-NEXT: store <4 x i32> [[VEC]], ptr [[GEP0]], align 4 ; CHECK-NEXT: ret i32 0 diff --git a/llvm/test/Transforms/SandboxVectorizer/scheduler.ll b/llvm/test/Transforms/SandboxVectorizer/scheduler.ll index 92a78a979192b..acbec80db6b06 100644 --- a/llvm/test/Transforms/SandboxVectorizer/scheduler.ll +++ b/llvm/test/Transforms/SandboxVectorizer/scheduler.ll @@ -49,3 +49,28 @@ define void @check_dag_scheduler_update(ptr noalias %p, ptr noalias %p1) { store i32 %add21, ptr %arrayidx23 ret void } + +; This used to generate use-before-def because of a buggy update of the +; top-of-schedule variable. +define <4 x float> @check_top_of_schedule(ptr %0) { +; CHECK-LABEL: define <4 x float> @check_top_of_schedule( +; CHECK-SAME: ptr [[TMP0:%.*]]) { +; CHECK-NEXT: [[INS_1:%.*]] = insertelement <4 x float> zeroinitializer, float poison, i64 0 +; CHECK-NEXT: [[TRUNC_1:%.*]] = fptrunc double 0.000000e+00 to float +; CHECK-NEXT: [[INS_2:%.*]] = insertelement <4 x float> [[INS_1]], float [[TRUNC_1]], i64 0 +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr double, ptr [[TMP0]], i64 1 +; CHECK-NEXT: store <2 x double> , ptr [[GEP_1]], align 8 +; CHECK-NEXT: ret <4 x float> [[INS_2]] +; + %trunc.1 = fptrunc double 0.000000e+00 to float + %trunc.2 = fptrunc double 1.000000e+00 to float + %ins.1 = insertelement <4 x float> zeroinitializer, float poison, i64 0 + %ins.2 = insertelement <4 x float> %ins.1, float %trunc.1, i64 0 + %ext.1 = fpext float %trunc.1 to double + %gep.1 = getelementptr double, ptr %0, i64 1 + store double %ext.1, ptr %gep.1, align 8 + %ext.2 = fpext float %trunc.2 to double + %gep.2 = getelementptr double, ptr %0, i64 2 + store double %ext.2, ptr %gep.2, align 8 + ret <4 x float> %ins.2 +}