Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -2459,7 +2459,8 @@ class VPPartialReductionRecipe : public VPSingleDefRecipe {
~VPPartialReductionRecipe() override = default;

VPPartialReductionRecipe *clone() override {
return new VPPartialReductionRecipe(Opcode, getOperand(0), getOperand(1));
return new VPPartialReductionRecipe(Opcode, getOperand(0), getOperand(1),
getUnderlyingInstr());
}

VP_CLASSOF_IMPL(VPDef::VPPartialReductionSC)
Expand Down
68 changes: 66 additions & 2 deletions llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; REQUIRES: asserts
; RUN: opt -mattr=+neon,+dotprod -passes=loop-vectorize -debug-only=loop-vectorize -force-vector-interleave=1 -disable-output %s 2>&1 | FileCheck %s
; RUN: opt -mattr=+neon,+dotprod -passes=loop-vectorize -debug-only=loop-vectorize -force-vector-interleave=1 -enable-epilogue-vectorization -epilogue-vectorization-force-VF=2 -disable-output %s 2>&1 | FileCheck %s

target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-none-unknown-elf"
Expand Down Expand Up @@ -70,7 +70,71 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) {
; CHECK-NEXT: IR %add.lcssa = phi i32 [ %add, %for.body ] (extra operand: vp<[[EXTRACT]]> from middle.block)
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
; CHECK: VPlan 'Final VPlan for VF={8,16},UF={1}' {
; CHECK-NEXT: Live-in ir<[[EP_VFxUF:.+]]> = VF * UF
; CHECK-NEXT: Live-in ir<[[EP_VEC_TC:.+]]> = vector-trip-count
; CHECK-NEXT: Live-in ir<1024> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: Successor(s): ir-bb<scalar.ph>, ir-bb<vector.main.loop.iter.check>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<vector.main.loop.iter.check>:
; CHECK-NEXT: Successor(s): ir-bb<scalar.ph>, ir-bb<vector.ph>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<vector.ph>:
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: SCALAR-PHI vp<[[EP_IV:%.+]]> = phi ir<0>, vp<%index.next>
; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%accum> = phi ir<0>, ir<%add> (VF scaled by 1/4)
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[EP_IV]]>, ir<1>
; CHECK-NEXT: CLONE ir<%gep.a> = getelementptr ir<%a>, vp<[[STEPS]]>
; CHECK-NEXT: vp<[[PTR_A:%.+]]> = vector-pointer ir<%gep.a>
; CHECK-NEXT: WIDEN ir<%load.a> = load vp<[[PTR_A]]>
; CHECK-NEXT: WIDEN-CAST ir<%ext.a> = zext ir<%load.a> to i32
; CHECK-NEXT: CLONE ir<%gep.b> = getelementptr ir<%b>, vp<[[STEPS]]>
; CHECK-NEXT: vp<[[PTR_B:%.+]]> = vector-pointer ir<%gep.b>
; CHECK-NEXT: WIDEN ir<%load.b> = load vp<[[PTR_B]]>
; CHECK-NEXT: WIDEN-CAST ir<%ext.b> = zext ir<%load.b> to i32
; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%ext.b>, ir<%ext.a>
; CHECK-NEXT: PARTIAL-REDUCE ir<%add> = add ir<%mul>, ir<%accum>
; CHECK-NEXT: EMIT vp<[[EP_IV_NEXT:%.+]]> = add nuw vp<[[EP_IV]]>, ir<16>
; CHECK-NEXT: EMIT branch-on-count vp<[[EP_IV_NEXT]]>, ir<1024>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): ir-bb<middle.block>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<middle.block>:
; CHECK-NEXT: EMIT vp<[[RED_RESULT:%.+]]> = compute-reduction-result ir<%accum>, ir<%add>
; CHECK-NEXT: EMIT vp<[[EXTRACT:%.+]]> = extract-from-end vp<[[RED_RESULT]]>, ir<1>
; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1024>, ir<1024>
; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]>
; CHECK-NEXT: Successor(s): ir-bb<exit>, ir-bb<scalar.ph>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>:
; CHECK-NEXT: IR %add.lcssa = phi i32 [ %add, %for.body ] (extra operand: vp<[[EXTRACT]]> from ir-bb<middle.block>)
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<scalar.ph>:
; CHECK-NEXT: EMIT vp<[[EP_RESUME:%.+]]> = resume-phi ir<1024>, ir<0>
; CHECK-NEXT: EMIT vp<[[EP_MERGE:%.+]]> = resume-phi vp<[[RED_RESULT]]>, ir<0>
; CHECK-NEXT: Successor(s): ir-bb<for.body>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<for.body>:
; CHECK-NEXT: IR %accum = phi i32 [ 0, %scalar.ph ], [ %add, %for.body ] (extra operand: vp<[[EP_MERGE]]> from ir-bb<scalar.ph>)
; CHECK-NEXT: IR %gep.a = getelementptr i8, ptr %a, i64 %iv
; CHECK-NEXT: IR %load.a = load i8, ptr %gep.a, align 1
; CHECK-NEXT: IR %ext.a = zext i8 %load.a to i32
; CHECK-NEXT: IR %gep.b = getelementptr i8, ptr %b, i64 %iv
; CHECK-NEXT: IR %load.b = load i8, ptr %gep.b, align 1
; CHECK-NEXT: IR %ext.b = zext i8 %load.b to i32
; CHECK-NEXT: IR %mul = mul i32 %ext.b, %ext.a
; CHECK-NEXT: IR %add = add i32 %mul, %accum
; CHECK-NEXT: IR %iv.next = add i64 %iv, 1
; CHECK-NEXT: IR %exitcond.not = icmp eq i64 %iv.next, 1024
; CHECK-NEXT: No successors
; CHECK-NEXT: }
entry:
br label %for.body

Expand Down
Loading