Skip to content

Commit e9aac24

Browse files
MacDuemikolaj-pirog
authored andcommitted
[InstCombine] Allow folding cross-lane operations into PHIs/selects (llvm#164388)
Previously, cross-lane operations were disallowed here, but they are only problematic if the `select` condition is a vector, as the input of the operation is not simply one of the arms of the phi/select.
1 parent 415c60d commit e9aac24

File tree

7 files changed

+114
-17
lines changed

7 files changed

+114
-17
lines changed

llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4003,18 +4003,29 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
40034003

40044004
// Try to fold intrinsic into select/phi operands. This is legal if:
40054005
// * The intrinsic is speculatable.
4006-
// * The select condition is not a vector, or the intrinsic does not
4007-
// perform cross-lane operations.
4008-
if (isSafeToSpeculativelyExecuteWithVariableReplaced(&CI) &&
4009-
isNotCrossLaneOperation(II))
4006+
// * The operand is one of the following:
4007+
// - a phi.
4008+
// - a select with a scalar condition.
4009+
// - a select with a vector condition and II is not a cross lane operation.
4010+
if (isSafeToSpeculativelyExecuteWithVariableReplaced(&CI)) {
40104011
for (Value *Op : II->args()) {
4011-
if (auto *Sel = dyn_cast<SelectInst>(Op))
4012-
if (Instruction *R = FoldOpIntoSelect(*II, Sel))
4012+
if (auto *Sel = dyn_cast<SelectInst>(Op)) {
4013+
bool IsVectorCond = Sel->getCondition()->getType()->isVectorTy();
4014+
if (IsVectorCond && !isNotCrossLaneOperation(II))
4015+
continue;
4016+
// Don't replace a scalar select with a more expensive vector select if
4017+
// we can't simplify both arms of the select.
4018+
bool SimplifyBothArms =
4019+
!Op->getType()->isVectorTy() && II->getType()->isVectorTy();
4020+
if (Instruction *R = FoldOpIntoSelect(
4021+
*II, Sel, /*FoldWithMultiUse=*/false, SimplifyBothArms))
40134022
return R;
4023+
}
40144024
if (auto *Phi = dyn_cast<PHINode>(Op))
40154025
if (Instruction *R = foldOpIntoPhi(*II, Phi))
40164026
return R;
40174027
}
4028+
}
40184029

40194030
if (Instruction *Shuf = foldShuffledIntrinsicOperands(II))
40204031
return Shuf;

llvm/lib/Transforms/InstCombine/InstCombineInternal.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -664,7 +664,8 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
664664
/// This also works for Cast instructions, which obviously do not have a
665665
/// second operand.
666666
Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI,
667-
bool FoldWithMultiUse = false);
667+
bool FoldWithMultiUse = false,
668+
bool SimplifyBothArms = false);
668669

669670
/// This is a convenience wrapper function for the above two functions.
670671
Instruction *foldBinOpIntoSelectOrPhi(BinaryOperator &I);

llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1777,7 +1777,8 @@ static Value *foldOperationIntoSelectOperand(Instruction &I, SelectInst *SI,
17771777
}
17781778

17791779
Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI,
1780-
bool FoldWithMultiUse) {
1780+
bool FoldWithMultiUse,
1781+
bool SimplifyBothArms) {
17811782
// Don't modify shared select instructions unless set FoldWithMultiUse
17821783
if (!SI->hasOneUse() && !FoldWithMultiUse)
17831784
return nullptr;
@@ -1821,6 +1822,9 @@ Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI,
18211822
if (!NewTV && !NewFV)
18221823
return nullptr;
18231824

1825+
if (SimplifyBothArms && !(NewTV && NewFV))
1826+
return nullptr;
1827+
18241828
// Create an instruction for the arm that did not fold.
18251829
if (!NewTV)
18261830
NewTV = foldOperationIntoSelectOperand(Op, SI, TV, *this);

llvm/test/Transforms/InstCombine/intrinsic-select.ll

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -222,8 +222,7 @@ declare i32 @llvm.vector.reduce.add.v2i32(<2 x i32>)
222222

223223
define i32 @vec_to_scalar_select_scalar(i1 %b) {
224224
; CHECK-LABEL: @vec_to_scalar_select_scalar(
225-
; CHECK-NEXT: [[S:%.*]] = select i1 [[B:%.*]], <2 x i32> <i32 1, i32 2>, <2 x i32> <i32 3, i32 4>
226-
; CHECK-NEXT: [[C:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[S]])
225+
; CHECK-NEXT: [[C:%.*]] = select i1 [[B:%.*]], i32 3, i32 7
227226
; CHECK-NEXT: ret i32 [[C]]
228227
;
229228
%s = select i1 %b, <2 x i32> <i32 1, i32 2>, <2 x i32> <i32 3, i32 4>
@@ -371,3 +370,36 @@ define float @test_fabs_select_multiuse_both_constant(i1 %cond, float %x) {
371370
%fabs = call float @llvm.fabs.f32(float %select)
372371
ret float %fabs
373372
}
373+
374+
; Negative test: Don't replace with select between vector mask and zeroinitializer.
375+
define <16 x i1> @test_select_of_active_lane_mask_bound(i64 %base, i64 %n, i1 %cond) {
376+
; CHECK-LABEL: @test_select_of_active_lane_mask_bound(
377+
; CHECK-NEXT: [[S:%.*]] = select i1 [[COND:%.*]], i64 [[N:%.*]], i64 0
378+
; CHECK-NEXT: [[MASK:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 [[BASE:%.*]], i64 [[S]])
379+
; CHECK-NEXT: ret <16 x i1> [[MASK]]
380+
;
381+
%s = select i1 %cond, i64 %n, i64 0
382+
%mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 %base, i64 %s)
383+
ret <16 x i1> %mask
384+
}
385+
386+
define <16 x i1> @test_select_of_active_lane_mask_bound_both_constant(i64 %base, i64 %n, i1 %cond) {
387+
; CHECK-LABEL: @test_select_of_active_lane_mask_bound_both_constant(
388+
; CHECK-NEXT: [[MASK:%.*]] = select i1 [[COND:%.*]], <16 x i1> splat (i1 true), <16 x i1> zeroinitializer
389+
; CHECK-NEXT: ret <16 x i1> [[MASK]]
390+
;
391+
%s = select i1 %cond, i64 16, i64 0
392+
%mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 0, i64 %s)
393+
ret <16 x i1> %mask
394+
}
395+
396+
define { i64, i1 } @test_select_of_overflow_intrinsic_operand(i64 %n, i1 %cond) {
397+
; CHECK-LABEL: @test_select_of_overflow_intrinsic_operand(
398+
; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[N:%.*]], i64 42)
399+
; CHECK-NEXT: [[ADD_OVERFLOW:%.*]] = select i1 [[COND:%.*]], { i64, i1 } [[TMP1]], { i64, i1 } { i64 42, i1 false }
400+
; CHECK-NEXT: ret { i64, i1 } [[ADD_OVERFLOW]]
401+
;
402+
%s = select i1 %cond, i64 %n, i64 0
403+
%add_overflow = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %s, i64 42)
404+
ret { i64, i1 } %add_overflow
405+
}

llvm/test/Transforms/InstCombine/phi.ll

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3026,6 +3026,56 @@ join:
30263026
ret i32 %umax
30273027
}
30283028

3029+
define i32 @cross_lane_intrinsic_over_phi(i1 %c, i1 %c2, <4 x i32> %a) {
3030+
; CHECK-LABEL: @cross_lane_intrinsic_over_phi(
3031+
; CHECK-NEXT: entry:
3032+
; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[JOIN:%.*]]
3033+
; CHECK: if:
3034+
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[A:%.*]])
3035+
; CHECK-NEXT: br label [[JOIN]]
3036+
; CHECK: join:
3037+
; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[TMP0]], [[IF]] ], [ 0, [[ENTRY:%.*]] ]
3038+
; CHECK-NEXT: call void @may_exit()
3039+
; CHECK-NEXT: ret i32 [[PHI]]
3040+
;
3041+
entry:
3042+
br i1 %c, label %if, label %join
3043+
3044+
if:
3045+
br label %join
3046+
3047+
join:
3048+
%phi = phi <4 x i32> [ %a, %if ], [ zeroinitializer, %entry ]
3049+
call void @may_exit()
3050+
%sum = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %phi)
3051+
ret i32 %sum
3052+
}
3053+
3054+
define { i64, i1 } @overflow_intrinsic_over_phi(i1 %c, i64 %a) {
3055+
; CHECK-LABEL: @overflow_intrinsic_over_phi(
3056+
; CHECK-NEXT: entry:
3057+
; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[JOIN:%.*]]
3058+
; CHECK: if:
3059+
; CHECK-NEXT: [[TMP0:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[A:%.*]], i64 1)
3060+
; CHECK-NEXT: br label [[JOIN]]
3061+
; CHECK: join:
3062+
; CHECK-NEXT: [[PHI:%.*]] = phi { i64, i1 } [ [[TMP0]], [[IF]] ], [ { i64 1, i1 false }, [[ENTRY:%.*]] ]
3063+
; CHECK-NEXT: call void @may_exit()
3064+
; CHECK-NEXT: ret { i64, i1 } [[PHI]]
3065+
;
3066+
entry:
3067+
br i1 %c, label %if, label %join
3068+
3069+
if:
3070+
br label %join
3071+
3072+
join:
3073+
%phi = phi i64 [ %a, %if ], [ 0, %entry ]
3074+
call void @may_exit()
3075+
%add_overflow = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %phi, i64 1)
3076+
ret { i64, i1 } %add_overflow
3077+
}
3078+
30293079
define i32 @multiple_intrinsics_with_multiple_phi_uses(i1 %c, i32 %arg) {
30303080
; CHECK-LABEL: @multiple_intrinsics_with_multiple_phi_uses(
30313081
; CHECK-NEXT: entry:

llvm/test/Transforms/InstCombine/select_frexp.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -115,10 +115,10 @@ define float @test_select_frexp_no_const(float %x, float %y, i1 %cond) {
115115
define i32 @test_select_frexp_extract_exp(float %x, i1 %cond) {
116116
; CHECK-LABEL: define i32 @test_select_frexp_extract_exp(
117117
; CHECK-SAME: float [[X:%.*]], i1 [[COND:%.*]]) {
118-
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND]], float 1.000000e+00, float [[X]]
119-
; CHECK-NEXT: [[FREXP:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[SEL]])
118+
; CHECK-NEXT: [[FREXP:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]])
120119
; CHECK-NEXT: [[FREXP_1:%.*]] = extractvalue { float, i32 } [[FREXP]], 1
121-
; CHECK-NEXT: ret i32 [[FREXP_1]]
120+
; CHECK-NEXT: [[FREXP_2:%.*]] = select i1 [[COND]], i32 1, i32 [[FREXP_1]]
121+
; CHECK-NEXT: ret i32 [[FREXP_2]]
122122
;
123123
%sel = select i1 %cond, float 1.000000e+00, float %x
124124
%frexp = call { float, i32 } @llvm.frexp.f32.i32(float %sel)
@@ -132,7 +132,7 @@ define float @test_select_frexp_fast_math_select(float %x, i1 %cond) {
132132
; CHECK-SAME: float [[X:%.*]], i1 [[COND:%.*]]) {
133133
; CHECK-NEXT: [[FREXP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]])
134134
; CHECK-NEXT: [[MANTISSA:%.*]] = extractvalue { float, i32 } [[FREXP1]], 0
135-
; CHECK-NEXT: [[SELECT_FREXP:%.*]] = select nnan ninf nsz i1 [[COND]], float 5.000000e-01, float [[MANTISSA]]
135+
; CHECK-NEXT: [[SELECT_FREXP:%.*]] = select i1 [[COND]], float 5.000000e-01, float [[MANTISSA]]
136136
; CHECK-NEXT: ret float [[SELECT_FREXP]]
137137
;
138138
%sel = select nnan ninf nsz i1 %cond, float 1.000000e+00, float %x

llvm/test/Transforms/PhaseOrdering/always-inline-alloca-promotion.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,8 @@ define void @ham() #1 {
3535
; CHECK-NEXT: [[SNORK_EXIT:.*:]]
3636
; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr inttoptr (i64 48 to ptr), align 16
3737
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i64 [[TMP0]], 0
38-
; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> zeroinitializer, <vscale x 4 x float> zeroinitializer, i64 0)
39-
; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[TMP1]], <vscale x 16 x float> [[TMP2]], <vscale x 16 x float> undef
40-
; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[SPEC_SELECT]], i64 0)
38+
; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> undef, i64 0)
39+
; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP1]], <vscale x 4 x float> zeroinitializer, <vscale x 4 x float> [[TMP2]]
4140
; CHECK-NEXT: tail call void @llvm.aarch64.sme.mopa.nxv4f32(i32 0, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> zeroinitializer, <vscale x 4 x float> [[TMP3]])
4241
; CHECK-NEXT: ret void
4342
;

0 commit comments

Comments
 (0)