-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[VPlan] Simplify BLEND %a, %b, NOT(%m) -> BLEND %b, %a, %m. #128375
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
a69e89a
112c494
6b0f436
c77792e
06de98a
ea4d37f
2e0ea61
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -36,16 +36,16 @@ define float @test(ptr nocapture readonly %pA, ptr nocapture readonly %pB, i32 % | |
| ; CHECK-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i8, ptr [[PB]], i32 [[OFFSET_IDX5]] | ||
| ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[NEXT_GEP]], align 4 | ||
| ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x float>, ptr [[NEXT_GEP6]], align 4 | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD]], zeroinitializer | ||
| ; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD7]], zeroinitializer | ||
| ; CHECK-NEXT: [[DOTNOT9:%.*]] = select <4 x i1> [[TMP2]], <4 x i1> [[TMP3]], <4 x i1> zeroinitializer | ||
| ; CHECK-NEXT: [[TMP18:%.*]] = fcmp fast une <4 x float> [[WIDE_LOAD]], zeroinitializer | ||
| ; CHECK-NEXT: [[TMP19:%.*]] = fcmp fast une <4 x float> [[WIDE_LOAD7]], zeroinitializer | ||
| ; CHECK-NEXT: [[TMP20:%.*]] = select <4 x i1> [[TMP18]], <4 x i1> splat (i1 true), <4 x i1> [[TMP19]] | ||
|
Comment on lines
+39
to
+41
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This NOTing of
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It may not be profitable, depending on the cost to materialize After LV, we get |
||
| ; CHECK-NEXT: [[TMP4:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[WIDE_LOAD]]) | ||
| ; CHECK-NEXT: [[TMP5:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[WIDE_LOAD7]]) | ||
| ; CHECK-NEXT: [[TMP6:%.*]] = fadd fast <4 x float> [[TMP5]], [[TMP4]] | ||
| ; CHECK-NEXT: [[TMP7:%.*]] = fsub fast <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD7]] | ||
| ; CHECK-NEXT: [[TMP8:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP7]]) | ||
| ; CHECK-NEXT: [[TMP9:%.*]] = fdiv fast <4 x float> [[TMP8]], [[TMP6]] | ||
| ; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[DOTNOT9]], <4 x float> splat (float -0.000000e+00), <4 x float> [[TMP9]] | ||
| ; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP20]], <4 x float> [[TMP9]], <4 x float> splat (float -0.000000e+00) | ||
| ; CHECK-NEXT: [[PREDPHI]] = fadd reassoc arcp contract afn <4 x float> [[VEC_PHI]], [[TMP10]] | ||
| ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 | ||
| ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -272,9 +272,8 @@ define void @preserve_nuw_nsw_no_addr(ptr %output) local_unnamed_addr #0 { | |
| ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ] | ||
| ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 | ||
| ; CHECK: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer | ||
| ; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) | ||
| ; CHECK-NEXT: [[TMP5:%.*]] = sub nuw nsw <4 x i64> [[VEC_IND]], splat (i64 1) | ||
| ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP6]], <4 x i64> [[TMP5]], <4 x i64> zeroinitializer | ||
| ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x i64> zeroinitializer, <4 x i64> [[TMP5]] | ||
| ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[OUTPUT:%.*]], i64 [[TMP0]] | ||
| ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 | ||
| ; CHECK-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP8]], align 4 | ||
|
|
@@ -364,8 +363,7 @@ define void @drop_zext_nneg(ptr noalias %p, ptr noalias %p1) #0 { | |
| ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr double, ptr [[P]], i64 [[TMP2]] | ||
| ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr double, ptr [[TMP3]], i32 0 | ||
| ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP4]], i32 8, <4 x i1> [[TMP0]], <4 x double> poison) | ||
| ; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true) | ||
| ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP5]], <4 x double> zeroinitializer, <4 x double> [[WIDE_MASKED_LOAD]] | ||
| ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x double> [[WIDE_MASKED_LOAD]], <4 x double> zeroinitializer | ||
| ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x double> [[PREDPHI]], i32 3 | ||
| ; CHECK-NEXT: store double [[TMP6]], ptr [[P1]], align 8 | ||
| ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 | ||
|
|
@@ -479,9 +477,8 @@ define void @preserve_exact_no_addr(ptr %output) local_unnamed_addr #0 { | |
| ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ] | ||
| ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 | ||
| ; CHECK: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer | ||
| ; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) | ||
| ; CHECK-NEXT: [[TMP5:%.*]] = sdiv exact <4 x i64> [[VEC_IND]], splat (i64 2) | ||
| ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP6]], <4 x i64> [[TMP5]], <4 x i64> zeroinitializer | ||
| ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x i64> zeroinitializer, <4 x i64> [[TMP5]] | ||
| ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[OUTPUT:%.*]], i64 [[TMP0]] | ||
| ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 | ||
| ; CHECK-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP8]], align 4 | ||
|
|
@@ -604,7 +601,7 @@ define void @pr70590_recipe_without_underlying_instr(i64 %n, ptr noalias %dst) { | |
| ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr [5 x i8], ptr @c, i64 0, i64 [[TMP16]] | ||
| ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP17]], i32 0 | ||
| ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP18]], align 1 | ||
| ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP2]], <4 x i8> [[WIDE_LOAD]], <4 x i8> zeroinitializer | ||
| ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i8> zeroinitializer, <4 x i8> [[WIDE_LOAD]] | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (here the NOTed mask TMP2 has other uses so remains) |
||
| ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr %dst, i64 [[TMP0]] | ||
| ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[TMP19]], i32 0 | ||
| ; CHECK-NEXT: store <4 x i8> [[PREDPHI]], ptr [[TMP20]], align 4 | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -47,10 +47,8 @@ define void @smax_call_uniform(ptr %dst, i64 %x) { | |
| ; CHECK: [[PRED_UREM_CONTINUE6]]: | ||
| ; CHECK-NEXT: [[TMP12:%.*]] = tail call i64 @llvm.smax.i64(i64 [[TMP4]], i64 0) | ||
| ; CHECK-NEXT: [[TMP13:%.*]] = tail call i64 @llvm.smax.i64(i64 [[TMP9]], i64 0) | ||
| ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0 | ||
| ; CHECK-NEXT: [[P:%.*]] = select i1 [[TMP14]], i64 [[TMP12]], i64 1 | ||
| ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0 | ||
| ; CHECK-NEXT: [[PREDPHI7:%.*]] = select i1 [[TMP15]], i64 [[TMP13]], i64 1 | ||
| ; CHECK-NEXT: [[P:%.*]] = select i1 [[C]], i64 1, i64 [[TMP12]] | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (Old mask TMP14 turns out to be NOT C.. and same for TMP15.) |
||
| ; CHECK-NEXT: [[PREDPHI7:%.*]] = select i1 [[C]], i64 1, i64 [[TMP13]] | ||
| ; CHECK-NEXT: [[ADD:%.*]] = add i64 [[P]], 1 | ||
| ; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[PREDPHI7]], 1 | ||
| ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[DST]], i64 [[ADD]] | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For consistency, this should all use getMask(1)/setMask(1), getIncomingValue(0,1)/setIncomingValue(0,1), with setters to be added, or all use getOperand(0,1,2)/setOperand(0,1,2) as in the following.
BTW, VPBlendRecipe could be a VPInstruction, or a derivative of it - which provides additional methods (but not fields).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated to use getOperand consistently for now, thanks!