-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[VPlan] Handle FirstActiveLane when unrolling. #145394
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
badd922
e80224d
f4e7f8a
f2d4f6c
e7a1d64
1bdc736
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -821,9 +821,32 @@ Value *VPInstruction::generate(VPTransformState &State) { | |||||||||||||||||||||||||
| return Builder.CreateOrReduce(A); | ||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||
| case VPInstruction::FirstActiveLane: { | ||||||||||||||||||||||||||
| Value *Mask = State.get(getOperand(0)); | ||||||||||||||||||||||||||
| return Builder.CreateCountTrailingZeroElems(Builder.getInt64Ty(), Mask, | ||||||||||||||||||||||||||
| true, Name); | ||||||||||||||||||||||||||
| if (getNumOperands() == 1) { | ||||||||||||||||||||||||||
| Value *Mask = State.get(getOperand(0)); | ||||||||||||||||||||||||||
| return Builder.CreateCountTrailingZeroElems(Builder.getInt64Ty(), Mask, | ||||||||||||||||||||||||||
| true, Name); | ||||||||||||||||||||||||||
|
Comment on lines
+861
to
+862
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
consistent with below. |
||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||
| // If there are multiple operands, create a chain of selects to pick the | ||||||||||||||||||||||||||
| // first operand with an active lane and add the number of lanes of the | ||||||||||||||||||||||||||
| // preceding operands. | ||||||||||||||||||||||||||
|
Comment on lines
+864
to
+866
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||||||||||||
| Value *RuntimeVF = | ||||||||||||||||||||||||||
| getRuntimeVF(State.Builder, State.Builder.getInt64Ty(), State.VF); | ||||||||||||||||||||||||||
|
Comment on lines
+867
to
+868
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Use |
||||||||||||||||||||||||||
| unsigned LastOpIdx = getNumOperands() - 1; | ||||||||||||||||||||||||||
| Value *Res = nullptr; | ||||||||||||||||||||||||||
| for (int Idx = LastOpIdx; Idx >= 0; --Idx) { | ||||||||||||||||||||||||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can something like this
Suggested change
work instead? |
||||||||||||||||||||||||||
| Value *TrailingZeros = Builder.CreateCountTrailingZeroElems( | ||||||||||||||||||||||||||
| Builder.getInt64Ty(), State.get(getOperand(Idx)), true, Name); | ||||||||||||||||||||||||||
|
Comment on lines
+872
to
+873
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||||||||||||
| Value *Current = Builder.CreateAdd( | ||||||||||||||||||||||||||
| Builder.CreateMul(RuntimeVF, Builder.getInt64(Idx)), TrailingZeros); | ||||||||||||||||||||||||||
|
Comment on lines
+874
to
+875
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
splitting is easier to read, as in cmp-select below? |
||||||||||||||||||||||||||
| if (Res) { | ||||||||||||||||||||||||||
| Value *Cmp = Builder.CreateICmpNE(TrailingZeros, RuntimeVF); | ||||||||||||||||||||||||||
| Res = Builder.CreateSelect(Cmp, Current, Res); | ||||||||||||||||||||||||||
| } else { | ||||||||||||||||||||||||||
| Res = Current; | ||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||
|
Comment on lines
+876
to
+881
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||
| return Res; | ||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||
| default: | ||||||||||||||||||||||||||
| llvm_unreachable("Unsupported opcode for instruction"); | ||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -31,11 +31,38 @@ define i64 @same_exit_block_pre_inc_use1() #0 { | |
| ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]] | ||
| ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]] | ||
| ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i32 0 | ||
| ; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() | ||
| ; CHECK-NEXT: [[TMP19:%.*]] = mul nuw i64 [[TMP18]], 16 | ||
| ; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 [[TMP19]] | ||
| ; CHECK-NEXT: [[TMP33:%.*]] = call i64 @llvm.vscale.i64() | ||
| ; CHECK-NEXT: [[TMP34:%.*]] = mul nuw i64 [[TMP33]], 32 | ||
| ; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 [[TMP34]] | ||
| ; CHECK-NEXT: [[TMP36:%.*]] = call i64 @llvm.vscale.i64() | ||
| ; CHECK-NEXT: [[TMP37:%.*]] = mul nuw i64 [[TMP36]], 48 | ||
| ; CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 [[TMP37]] | ||
| ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[TMP8]], align 1 | ||
| ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <vscale x 16 x i8>, ptr [[TMP29]], align 1 | ||
| ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 16 x i8>, ptr [[TMP35]], align 1 | ||
| ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 16 x i8>, ptr [[TMP38]], align 1 | ||
| ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[OFFSET_IDX]] | ||
| ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 0 | ||
| ; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() | ||
| ; CHECK-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 16 | ||
| ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 [[TMP21]] | ||
| ; CHECK-NEXT: [[TMP23:%.*]] = call i64 @llvm.vscale.i64() | ||
| ; CHECK-NEXT: [[TMP24:%.*]] = mul nuw i64 [[TMP23]], 32 | ||
| ; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 [[TMP24]] | ||
| ; CHECK-NEXT: [[TMP26:%.*]] = call i64 @llvm.vscale.i64() | ||
| ; CHECK-NEXT: [[TMP27:%.*]] = mul nuw i64 [[TMP26]], 48 | ||
| ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 [[TMP27]] | ||
| ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 16 x i8>, ptr [[TMP10]], align 1 | ||
| ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <vscale x 16 x i8>, ptr [[TMP22]], align 1 | ||
| ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <vscale x 16 x i8>, ptr [[TMP25]], align 1 | ||
| ; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <vscale x 16 x i8>, ptr [[TMP28]], align 1 | ||
| ; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <vscale x 16 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]] | ||
| ; CHECK-NEXT: [[TMP30:%.*]] = icmp ne <vscale x 16 x i8> [[WIDE_LOAD5]], [[WIDE_LOAD6]] | ||
| ; CHECK-NEXT: [[TMP31:%.*]] = icmp ne <vscale x 16 x i8> [[WIDE_LOAD3]], [[WIDE_LOAD7]] | ||
| ; CHECK-NEXT: [[TMP32:%.*]] = icmp ne <vscale x 16 x i8> [[WIDE_LOAD4]], [[WIDE_LOAD8]] | ||
|
||
| ; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], [[TMP5]] | ||
| ; CHECK-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[TMP11]]) | ||
| ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT3]], [[N_VEC]] | ||
|
|
@@ -47,8 +74,27 @@ define i64 @same_exit_block_pre_inc_use1() #0 { | |
| ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 510, [[N_VEC]] | ||
| ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_END:%.*]], label [[SCALAR_PH]] | ||
| ; CHECK: vector.early.exit: | ||
| ; CHECK-NEXT: [[TMP63:%.*]] = call i64 @llvm.vscale.i64() | ||
| ; CHECK-NEXT: [[TMP42:%.*]] = mul nuw i64 [[TMP63]], 16 | ||
| ; CHECK-NEXT: [[TMP44:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> [[TMP32]], i1 true) | ||
| ; CHECK-NEXT: [[TMP62:%.*]] = mul i64 [[TMP42]], 3 | ||
| ; CHECK-NEXT: [[TMP45:%.*]] = add i64 [[TMP62]], [[TMP44]] | ||
| ; CHECK-NEXT: [[TMP46:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> [[TMP31]], i1 true) | ||
| ; CHECK-NEXT: [[TMP58:%.*]] = mul i64 [[TMP42]], 2 | ||
| ; CHECK-NEXT: [[TMP50:%.*]] = add i64 [[TMP58]], [[TMP46]] | ||
| ; CHECK-NEXT: [[TMP47:%.*]] = icmp ne i64 [[TMP46]], [[TMP42]] | ||
| ; CHECK-NEXT: [[TMP51:%.*]] = select i1 [[TMP47]], i64 [[TMP50]], i64 [[TMP45]] | ||
| ; CHECK-NEXT: [[TMP52:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> [[TMP30]], i1 true) | ||
| ; CHECK-NEXT: [[TMP64:%.*]] = mul i64 [[TMP42]], 1 | ||
| ; CHECK-NEXT: [[TMP56:%.*]] = add i64 [[TMP64]], [[TMP52]] | ||
| ; CHECK-NEXT: [[TMP53:%.*]] = icmp ne i64 [[TMP52]], [[TMP42]] | ||
| ; CHECK-NEXT: [[TMP57:%.*]] = select i1 [[TMP53]], i64 [[TMP56]], i64 [[TMP51]] | ||
| ; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> [[TMP11]], i1 true) | ||
| ; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX1]], [[TMP15]] | ||
| ; CHECK-NEXT: [[TMP65:%.*]] = mul i64 [[TMP42]], 0 | ||
| ; CHECK-NEXT: [[TMP60:%.*]] = add i64 [[TMP65]], [[TMP15]] | ||
| ; CHECK-NEXT: [[TMP59:%.*]] = icmp ne i64 [[TMP15]], [[TMP42]] | ||
| ; CHECK-NEXT: [[TMP61:%.*]] = select i1 [[TMP59]], i64 [[TMP60]], i64 [[TMP57]] | ||
| ; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX1]], [[TMP61]] | ||
| ; CHECK-NEXT: [[TMP17:%.*]] = add i64 3, [[TMP16]] | ||
| ; CHECK-NEXT: br label [[LOOP_END]] | ||
| ; CHECK: scalar.ph: | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
to be reused in multiple cases below.