-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[LV] Fix cost misaligned when gather/scatter w/ addr is uniform. #157387
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
8d7aea3
70936c0
3b3cc6e
b57e641
1fcac6d
f62c348
7b7b486
f1a6c9b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -6905,6 +6905,17 @@ static bool planContainsAdditionalSimplifications(VPlan &Plan, | |||||
| if (isa<VPPartialReductionRecipe>(&R)) | ||||||
| return true; | ||||||
|
|
||||||
| // The VPlan-based cost model can analysis if recipes is scalar | ||||||
| // recursively, but legacy cost model cannot. | ||||||
|
||||||
| // recursively, but legacy cost model cannot. | |
| // recursively, but the legacy cost model cannot. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Still pending.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ooops, missed that. Fixed, thanks!
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can this use isLegacyUniformAfterVectorization, as the code does below?
| CostCtx.CM.Legal->isUniform( | |
| CostCtx.isLegacyUniformAfterVectorization( |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated, thanks!
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -184,3 +184,73 @@ loop: | |
| exit: | ||
| ret void | ||
| } | ||
|
|
||
| define void @store_to_addr_generated_from_invariant_addr(ptr noalias %p1, ptr noalias %p2, ptr %p3, i64 %N) { | ||
| ; CHECK-LABEL: @store_to_addr_generated_from_invariant_addr( | ||
| ; CHECK-NEXT: entry: | ||
| ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], 1 | ||
| ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] | ||
| ; CHECK: vector.ph: | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64() | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = mul <vscale x 2 x i64> [[TMP1]], splat (i64 1) | ||
| ; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP2]] | ||
| ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] | ||
| ; CHECK: vector.body: | ||
| ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] | ||
| ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP0]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] | ||
| ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) | ||
| ; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64 | ||
| ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP4]], i64 0 | ||
| ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer | ||
| ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[P1:%.*]], <vscale x 2 x i64> [[VEC_IND]] | ||
| ; CHECK-NEXT: call void @llvm.vp.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> zeroinitializer, <vscale x 2 x ptr> align 8 [[TMP5]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP3]]) | ||
| ; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[P2:%.*]], align 4 | ||
| ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP6]], i64 0 | ||
| ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer | ||
| ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[P3:%.*]], <vscale x 2 x i64> [[BROADCAST_SPLAT2]] | ||
| ; CHECK-NEXT: call void @llvm.vp.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> zeroinitializer, <vscale x 2 x ptr> align 4 [[TMP7]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP3]]) | ||
| ; CHECK-NEXT: call void @llvm.vp.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> zeroinitializer, <vscale x 2 x ptr> align 4 [[TMP7]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP3]]) | ||
| ; CHECK-NEXT: call void @llvm.vp.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> zeroinitializer, <vscale x 2 x ptr> align 1 [[TMP7]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP3]]) | ||
| ; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP3]] to i64 | ||
| ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] | ||
| ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] | ||
| ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 | ||
| ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] | ||
| ; CHECK: middle.block: | ||
| ; CHECK-NEXT: br label [[EXIT:%.*]] | ||
| ; CHECK: scalar.ph: | ||
| ; CHECK-NEXT: br label [[LOOP:%.*]] | ||
| ; CHECK: loop: | ||
| ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] | ||
| ; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr i32, ptr [[P1]], i64 [[IV]] | ||
| ; CHECK-NEXT: store ptr null, ptr [[ARRAYIDX11]], align 8 | ||
| ; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[P2]], align 4 | ||
| ; CHECK-NEXT: [[BITS_TO_GO:%.*]] = getelementptr i8, ptr [[P3]], i64 [[TMP10]] | ||
| ; CHECK-NEXT: store i32 0, ptr [[BITS_TO_GO]], align 4 | ||
| ; CHECK-NEXT: store i32 0, ptr [[BITS_TO_GO]], align 4 | ||
| ; CHECK-NEXT: store i8 0, ptr [[BITS_TO_GO]], align 1 | ||
| ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 | ||
| ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], [[N]] | ||
| ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP15:![0-9]+]] | ||
| ; CHECK: exit: | ||
| ; CHECK-NEXT: ret void | ||
| ; | ||
| entry: | ||
| br label %loop | ||
|
|
||
| loop: | ||
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] | ||
| %arrayidx11 = getelementptr i32, ptr %p1, i64 %iv | ||
| store ptr null, ptr %arrayidx11, align 8 | ||
|
||
| %0 = load i64, ptr %p2, align 4 | ||
| %bits_to_go = getelementptr i8, ptr %p3, i64 %0 | ||
| store i32 0, ptr %bits_to_go, align 4 | ||
| store i32 0, ptr %bits_to_go, align 4 | ||
| store i8 0, ptr %bits_to_go, align 1 | ||
| %iv.next = add i64 %iv, 1 | ||
| %exitcond.not = icmp eq i64 %iv, %N | ||
| br i1 %exitcond.not, label %exit, label %loop | ||
|
|
||
| exit: | ||
| ret void | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed, thanks!