Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5256,8 +5256,11 @@ LoopVectorizationCostModel::getGatherScatterCost(Instruction *I,
Type *ValTy = getLoadStoreType(I);
auto *VectorTy = cast<VectorType>(toVectorTy(ValTy, VF));
const Align Alignment = getLoadStoreAlignment(I);
const Value *Ptr = getLoadStorePointerOperand(I);
Type *PtrTy = toVectorTy(Ptr->getType(), VF);
Value *Ptr = getLoadStorePointerOperand(I);
Type *PtrTy = Ptr->getType();

if (!Legal->isUniform(Ptr, VF))
PtrTy = toVectorTy(PtrTy, VF);

return TTI.getAddressComputationCost(PtrTy, nullptr, nullptr, CostKind) +
TTI.getGatherScatterOpCost(I->getOpcode(), VectorTy, Ptr,
Expand Down
190 changes: 190 additions & 0 deletions llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=loop-vectorize -mtriple riscv64 -mattr=+rva23u64 -S | FileCheck %s -check-prefixes=CHECK,RVA23
; RUN: opt < %s -passes=loop-vectorize -mtriple riscv64 -mattr=+rva23u64,+zvl1024b -S | FileCheck %s -check-prefixes=CHECK,RVA23ZVL1024B

define i32 @getNeighborBoxes(ptr %boxes, i32 %iBox, ptr %nbrBoxes) {
; CHECK-LABEL: @getNeighborBoxes(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[IBOX:%.*]] to i64
; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[TMP0]], 1
; CHECK-NEXT: [[SMAX2:%.*]] = call i64 @llvm.smax.i64(i64 [[TMP1]], i64 0)
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[SMAX2]] to i32
; CHECK-NEXT: [[TMP3:%.*]] = add nuw i32 [[TMP2]], 1
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
; CHECK: vector.scevcheck:
; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[IBOX]] to i64
; CHECK-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP4]], 1
; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[TMP5]], i64 0)
; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[SMAX]] to i32
; CHECK-NEXT: [[TMP7:%.*]] = icmp slt i32 [[TMP6]], 0
; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[SMAX]], 4294967295
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
; CHECK-NEXT: br i1 [[TMP9]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[NBRBOXES:%.*]], i64 4
; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[BOXES:%.*]], i64 4
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[NBRBOXES]], [[SCEVGEP1]]
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[BOXES]], [[SCEVGEP]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[BOXES]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[NBRBOXES]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 4 x ptr> [[BROADCAST_SPLATINSERT3]], <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[AVL:%.*]] = phi i32 [ [[TMP3]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[AVL]], i32 4, i1 true)
; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP10]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT5]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP11:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
; CHECK-NEXT: [[TMP12:%.*]] = icmp ult <vscale x 4 x i32> [[TMP11]], [[BROADCAST_SPLAT6]]
; CHECK-NEXT: [[TMP13:%.*]] = select <vscale x 4 x i1> [[TMP12]], <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i1> zeroinitializer
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.vp.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> align 4 [[BROADCAST_SPLAT]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]), !alias.scope [[META0:![0-9]+]]
; CHECK-NEXT: [[TMP14:%.*]] = select <vscale x 4 x i1> [[TMP12]], <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i1> zeroinitializer
; CHECK-NEXT: [[TMP15:%.*]] = or <vscale x 4 x i1> [[TMP13]], [[TMP14]]
; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP14]], <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> [[WIDE_MASKED_GATHER]]
; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[PREDPHI]], <vscale x 4 x ptr> align 4 [[BROADCAST_SPLAT4]], <vscale x 4 x i1> [[TMP15]], i32 [[TMP10]]), !alias.scope [[META3:![0-9]+]], !noalias [[META0]]
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i32 [[AVL]], [[TMP10]]
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[AVL_NEXT]], 0
; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[EXIT:%.*]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_EXIT:%.*]] ]
; CHECK-NEXT: br i1 false, label [[EXIT_CRIT_EDGE:%.*]], label [[IF:%.*]]
; CHECK: exit_crit_edge:
; CHECK-NEXT: br label [[LOOP_EXIT]]
; CHECK: if:
; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[BOXES]], align 4
; CHECK-NEXT: br label [[LOOP_EXIT]]
; CHECK: loop.exit:
; CHECK-NEXT: [[IBOX_0_I:%.*]] = phi i32 [ [[TMP17]], [[IF]] ], [ 0, [[EXIT_CRIT_EDGE]] ]
; CHECK-NEXT: store i32 [[IBOX_0_I]], ptr [[NBRBOXES]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp sgt i32 [[IV]], [[IBOX]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret i32 0
;
entry:
br label %loop

loop:
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.exit ]
br i1 false, label %exit_crit_edge, label %if
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is a known condition required here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. Otherwise it will using fixed VF and not triggering the assertion.


exit_crit_edge:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
exit_crit_edge:
loop.then:

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated, thanks!

br label %loop.exit

if:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

loop.else:

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated, thanks

%0 = load i32, ptr %boxes, align 4
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
%0 = load i32, ptr %boxes, align 4
%0 = load i32, ptr %src, align 4

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated, thanks!

br label %loop.exit

loop.exit:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
loop.exit:
loop.latch:

this is not the exit of the loop, but the (exiting) latch in LLVM terminology.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed, thanks!

%iBox.0.i = phi i32 [ %0, %if ], [ 0, %exit_crit_edge ]
store i32 %iBox.0.i, ptr %nbrBoxes, align 4
%iv.next = add i32 %iv, 1
%exitcond = icmp sgt i32 %iv, %iBox
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
%exitcond = icmp sgt i32 %iv, %iBox
%exitcond = icmp sgt i32 %iv, %n

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

br i1 %exitcond, label %exit, label %loop

exit:
ret i32 0
}

define void @_Z9BM_MemCmpILi7E12LessThanZero5FirstEvRN9benchmark5StateE() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you adjust the names to be more descriptive, instead of using the ones from the original source function? Also above

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated, thanks!

; RVA23-LABEL: @_Z9BM_MemCmpILi7E12LessThanZero5FirstEvRN9benchmark5StateE(
; RVA23-NEXT: entry:
; RVA23-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; RVA23: vector.ph:
; RVA23-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i64> @llvm.stepvector.nxv16i64()
; RVA23-NEXT: [[TMP1:%.*]] = mul <vscale x 16 x i64> [[TMP0]], splat (i64 1)
; RVA23-NEXT: [[INDUCTION:%.*]] = add <vscale x 16 x i64> zeroinitializer, [[TMP1]]
; RVA23-NEXT: br label [[VECTOR_BODY:%.*]]
; RVA23: vector.body:
; RVA23-NEXT: [[VEC_IND:%.*]] = phi <vscale x 16 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; RVA23-NEXT: [[AVL:%.*]] = phi i64 [ 586, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; RVA23-NEXT: [[TMP2:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 16, i1 true)
; RVA23-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64
; RVA23-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i64> poison, i64 [[TMP3]], i64 0
; RVA23-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i64> poison, <vscale x 16 x i32> zeroinitializer
; RVA23-NEXT: [[TMP4:%.*]] = mul <vscale x 16 x i64> [[VEC_IND]], splat (i64 7)
; RVA23-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr null, <vscale x 16 x i64> [[TMP4]]
; RVA23-NEXT: call void @llvm.vp.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x ptr> align 1 [[TMP5]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP2]])
; RVA23-NEXT: [[TMP6:%.*]] = zext i32 [[TMP2]] to i64
; RVA23-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP6]]
; RVA23-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
; RVA23-NEXT: [[TMP7:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
; RVA23-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; RVA23: middle.block:
; RVA23-NEXT: br label [[EXIT:%.*]]
; RVA23: scalar.ph:
; RVA23-NEXT: br label [[LOOP:%.*]]
; RVA23: loop:
; RVA23-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
; RVA23-NEXT: [[TMP8:%.*]] = mul i64 [[IV]], 7
; RVA23-NEXT: [[ADD_PTR:%.*]] = getelementptr i8, ptr null, i64 [[TMP8]]
; RVA23-NEXT: store i8 0, ptr [[ADD_PTR]], align 1
; RVA23-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; RVA23-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV]], 585
; RVA23-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP11:![0-9]+]]
; RVA23: exit:
; RVA23-NEXT: ret void
;
; RVA23ZVL1024B-LABEL: @_Z9BM_MemCmpILi7E12LessThanZero5FirstEvRN9benchmark5StateE(
; RVA23ZVL1024B-NEXT: entry:
; RVA23ZVL1024B-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; RVA23ZVL1024B: vector.ph:
; RVA23ZVL1024B-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; RVA23ZVL1024B-NEXT: [[TMP1:%.*]] = mul <vscale x 2 x i64> [[TMP0]], splat (i64 1)
; RVA23ZVL1024B-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP1]]
; RVA23ZVL1024B-NEXT: br label [[VECTOR_BODY:%.*]]
; RVA23ZVL1024B: vector.body:
; RVA23ZVL1024B-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; RVA23ZVL1024B-NEXT: [[AVL:%.*]] = phi i64 [ 586, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; RVA23ZVL1024B-NEXT: [[TMP2:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
; RVA23ZVL1024B-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64
; RVA23ZVL1024B-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP3]], i64 0
; RVA23ZVL1024B-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; RVA23ZVL1024B-NEXT: [[TMP4:%.*]] = mul <vscale x 2 x i64> [[VEC_IND]], splat (i64 7)
; RVA23ZVL1024B-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr null, <vscale x 2 x i64> [[TMP4]]
; RVA23ZVL1024B-NEXT: call void @llvm.vp.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> zeroinitializer, <vscale x 2 x ptr> align 1 [[TMP5]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP2]])
; RVA23ZVL1024B-NEXT: [[TMP6:%.*]] = zext i32 [[TMP2]] to i64
; RVA23ZVL1024B-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP6]]
; RVA23ZVL1024B-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
; RVA23ZVL1024B-NEXT: [[TMP7:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
; RVA23ZVL1024B-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; RVA23ZVL1024B: middle.block:
; RVA23ZVL1024B-NEXT: br label [[EXIT:%.*]]
; RVA23ZVL1024B: scalar.ph:
; RVA23ZVL1024B-NEXT: br label [[LOOP:%.*]]
; RVA23ZVL1024B: loop:
; RVA23ZVL1024B-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
; RVA23ZVL1024B-NEXT: [[TMP8:%.*]] = mul i64 [[IV]], 7
; RVA23ZVL1024B-NEXT: [[ADD_PTR:%.*]] = getelementptr i8, ptr null, i64 [[TMP8]]
; RVA23ZVL1024B-NEXT: store i8 0, ptr [[ADD_PTR]], align 1
; RVA23ZVL1024B-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; RVA23ZVL1024B-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV]], 585
; RVA23ZVL1024B-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP11:![0-9]+]]
; RVA23ZVL1024B: exit:
; RVA23ZVL1024B-NEXT: ret void
;
entry:
br label %loop

loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%0 = mul i64 %iv, 7
%add.ptr = getelementptr i8, ptr null, i64 %0
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please avoid null, it makes the function have immediate UB on the store

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using non-null value, thanks!

store i8 0, ptr %add.ptr, align 1
%iv.next = add i64 %iv, 1
%exitcond = icmp eq i64 %iv, 585
br i1 %exitcond, label %exit, label %loop

exit:
ret void
}