-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[VPlan] Simplify BLEND %a, %b, NOT(%m) -> BLEND %b, %a, %m. #128375
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
a69e89a
112c494
6b0f436
c77792e
06de98a
ea4d37f
2e0ea61
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
|
|
@@ -851,8 +851,20 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { | |||||||
| return; | ||||||||
| } | ||||||||
|
|
||||||||
| if (Blend->isNormalized()) | ||||||||
| if (Blend->isNormalized()) { | ||||||||
|
||||||||
| /// Simplify BLEND %a, %b, Not(%mask) -> BLEND %b, %a, %mask. | ||||||||
| VPValue *NewMask; | ||||||||
| if (Blend->getNumOperands() == 3 && | ||||||||
| match(Blend->getMask(1), m_Not(m_VPValue(NewMask)))) { | ||||||||
| VPValue *Inc0 = Blend->getIncomingValue(0); | ||||||||
| VPValue *Inc1 = Blend->getIncomingValue(1); | ||||||||
| Blend->setOperand(0, Inc1); | ||||||||
| Blend->setOperand(1, Inc0); | ||||||||
| Blend->setOperand(2, NewMask); | ||||||||
|
||||||||
| Blend->setOperand(2, NewMask); | |
| Blend->setOperand(2, NewMask); | |
| recursivelyDeleteDeadRecipes(OldMask); |
or check if unused and delete, knowing its operand is live? Although it does seem to get collected eventually.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated, although there are no more opportunities from the manual cleanup.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -36,16 +36,16 @@ define float @test(ptr nocapture readonly %pA, ptr nocapture readonly %pB, i32 % | |
| ; CHECK-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i8, ptr [[PB]], i32 [[OFFSET_IDX5]] | ||
| ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[NEXT_GEP]], align 4 | ||
| ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x float>, ptr [[NEXT_GEP6]], align 4 | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD]], zeroinitializer | ||
| ; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD7]], zeroinitializer | ||
| ; CHECK-NEXT: [[DOTNOT9:%.*]] = select <4 x i1> [[TMP2]], <4 x i1> [[TMP3]], <4 x i1> zeroinitializer | ||
| ; CHECK-NEXT: [[TMP18:%.*]] = fcmp fast une <4 x float> [[WIDE_LOAD]], zeroinitializer | ||
| ; CHECK-NEXT: [[TMP19:%.*]] = fcmp fast une <4 x float> [[WIDE_LOAD7]], zeroinitializer | ||
| ; CHECK-NEXT: [[TMP20:%.*]] = select <4 x i1> [[TMP18]], <4 x i1> splat (i1 true), <4 x i1> [[TMP19]] | ||
|
Comment on lines
+39
to
+41
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This NOTing of
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It may not be profitable, depending on the cost to materialize After LV, we get |
||
| ; CHECK-NEXT: [[TMP4:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[WIDE_LOAD]]) | ||
| ; CHECK-NEXT: [[TMP5:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[WIDE_LOAD7]]) | ||
| ; CHECK-NEXT: [[TMP6:%.*]] = fadd fast <4 x float> [[TMP5]], [[TMP4]] | ||
| ; CHECK-NEXT: [[TMP7:%.*]] = fsub fast <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD7]] | ||
| ; CHECK-NEXT: [[TMP8:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP7]]) | ||
| ; CHECK-NEXT: [[TMP9:%.*]] = fdiv fast <4 x float> [[TMP8]], [[TMP6]] | ||
| ; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[DOTNOT9]], <4 x float> splat (float -0.000000e+00), <4 x float> [[TMP9]] | ||
| ; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP20]], <4 x float> [[TMP9]], <4 x float> splat (float -0.000000e+00) | ||
| ; CHECK-NEXT: [[PREDPHI]] = fadd reassoc arcp contract afn <4 x float> [[VEC_PHI]], [[TMP10]] | ||
| ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 | ||
| ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -272,9 +272,8 @@ define void @preserve_nuw_nsw_no_addr(ptr %output) local_unnamed_addr #0 { | |
| ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ] | ||
| ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 | ||
| ; CHECK: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer | ||
| ; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) | ||
| ; CHECK-NEXT: [[TMP5:%.*]] = sub nuw nsw <4 x i64> [[VEC_IND]], splat (i64 1) | ||
| ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP6]], <4 x i64> [[TMP5]], <4 x i64> zeroinitializer | ||
| ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x i64> zeroinitializer, <4 x i64> [[TMP5]] | ||
| ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[OUTPUT:%.*]], i64 [[TMP0]] | ||
| ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 | ||
| ; CHECK-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP8]], align 4 | ||
|
|
@@ -364,8 +363,7 @@ define void @drop_zext_nneg(ptr noalias %p, ptr noalias %p1) #0 { | |
| ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr double, ptr [[P]], i64 [[TMP2]] | ||
| ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr double, ptr [[TMP3]], i32 0 | ||
| ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP4]], i32 8, <4 x i1> [[TMP0]], <4 x double> poison) | ||
| ; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true) | ||
| ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP5]], <4 x double> zeroinitializer, <4 x double> [[WIDE_MASKED_LOAD]] | ||
| ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x double> [[WIDE_MASKED_LOAD]], <4 x double> zeroinitializer | ||
| ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x double> [[PREDPHI]], i32 3 | ||
| ; CHECK-NEXT: store double [[TMP6]], ptr [[P1]], align 8 | ||
| ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 | ||
|
|
@@ -479,9 +477,8 @@ define void @preserve_exact_no_addr(ptr %output) local_unnamed_addr #0 { | |
| ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ] | ||
| ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 | ||
| ; CHECK: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer | ||
| ; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) | ||
| ; CHECK-NEXT: [[TMP5:%.*]] = sdiv exact <4 x i64> [[VEC_IND]], splat (i64 2) | ||
| ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP6]], <4 x i64> [[TMP5]], <4 x i64> zeroinitializer | ||
| ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x i64> zeroinitializer, <4 x i64> [[TMP5]] | ||
| ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[OUTPUT:%.*]], i64 [[TMP0]] | ||
| ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 | ||
| ; CHECK-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP8]], align 4 | ||
|
|
@@ -604,7 +601,7 @@ define void @pr70590_recipe_without_underlying_instr(i64 %n, ptr noalias %dst) { | |
| ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr [5 x i8], ptr @c, i64 0, i64 [[TMP16]] | ||
| ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP17]], i32 0 | ||
| ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP18]], align 1 | ||
| ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP2]], <4 x i8> [[WIDE_LOAD]], <4 x i8> zeroinitializer | ||
| ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i8> zeroinitializer, <4 x i8> [[WIDE_LOAD]] | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (here the NOTed mask TMP2 has other uses so remains) |
||
| ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr %dst, i64 [[TMP0]] | ||
| ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[TMP19]], i32 0 | ||
| ; CHECK-NEXT: store <4 x i8> [[PREDPHI]], ptr [[TMP20]], align 4 | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -47,10 +47,8 @@ define void @smax_call_uniform(ptr %dst, i64 %x) { | |
| ; CHECK: [[PRED_UREM_CONTINUE6]]: | ||
| ; CHECK-NEXT: [[TMP12:%.*]] = tail call i64 @llvm.smax.i64(i64 [[TMP4]], i64 0) | ||
| ; CHECK-NEXT: [[TMP13:%.*]] = tail call i64 @llvm.smax.i64(i64 [[TMP9]], i64 0) | ||
| ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0 | ||
| ; CHECK-NEXT: [[P:%.*]] = select i1 [[TMP14]], i64 [[TMP12]], i64 1 | ||
| ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0 | ||
| ; CHECK-NEXT: [[PREDPHI7:%.*]] = select i1 [[TMP15]], i64 [[TMP13]], i64 1 | ||
| ; CHECK-NEXT: [[P:%.*]] = select i1 [[C]], i64 1, i64 [[TMP12]] | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (Old mask TMP14 turns out to be NOT C.. and same for TMP15.) |
||
| ; CHECK-NEXT: [[PREDPHI7:%.*]] = select i1 [[C]], i64 1, i64 [[TMP13]] | ||
| ; CHECK-NEXT: [[ADD:%.*]] = add i64 [[P]], 1 | ||
| ; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[PREDPHI7]], 1 | ||
| ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[DST]], i64 [[ADD]] | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -71,7 +71,7 @@ for.end: | |
|
|
||
| ;CHECK-LABEL: @reduction_func( | ||
| ;CHECK: load <4 x i32> | ||
| ;CHECK: icmp slt <4 x i32> | ||
| ;CHECK: icmp sgt <4 x i32> | ||
|
||
| ;CHECK: add <4 x i32> | ||
| ;CHECK: select <4 x i1> | ||
| ;CHECK: ret i32 | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Unrelated, while we're here: above optimization checks for a single unique (non masked out) value, can hold a pointer rather than a SmallPtrSet. OTOH, can filter away all masked out values, even if more than one remains.