Skip to content
Merged
42 changes: 29 additions & 13 deletions llvm/lib/Transforms/Utils/SimplifyCFG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6466,6 +6466,9 @@ class SwitchReplacement {
/// Return the default value of the switch.
Constant *getDefaultValue();

/// Return true if the replacement is a lookup table.
bool isLookupTable();

private:
// Depending on the switch, there are different alternatives.
enum {
Expand Down Expand Up @@ -6753,6 +6756,8 @@ static bool isTypeLegalForLookupTable(Type *Ty, const TargetTransformInfo &TTI,

Constant *SwitchReplacement::getDefaultValue() { return DefaultValue; }

bool SwitchReplacement::isLookupTable() { return Kind == LookupTableKind; }

static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange) {
// 40% is the default density for building a jump table in optsize/minsize
// mode. See also TargetLoweringBase::isSuitableForJumpTable(), which this
Expand Down Expand Up @@ -6918,16 +6923,12 @@ static void reuseTableCompare(
/// lookup tables.
static bool simplifySwitchLookup(SwitchInst *SI, IRBuilder<> &Builder,
DomTreeUpdater *DTU, const DataLayout &DL,
const TargetTransformInfo &TTI) {
const TargetTransformInfo &TTI,
bool ConvertSwitchToLookupTable) {
assert(SI->getNumCases() > 1 && "Degenerate switch?");

BasicBlock *BB = SI->getParent();
Function *Fn = BB->getParent();
// Only build lookup table when we have a target that supports it or the
// attribute is not set.
if (!TTI.shouldBuildLookupTables() ||
(Fn->getFnAttribute("no-jump-tables").getValueAsBool()))
return false;

// FIXME: If the switch is too sparse for a lookup table, perhaps we could
// split off a dense part and build a lookup table for that.
Expand Down Expand Up @@ -7086,6 +7087,26 @@ static bool simplifySwitchLookup(SwitchInst *SI, IRBuilder<> &Builder,
PhiToReplacementMap.insert({PHI, Replacement});
}

bool AnyLookupTables = any_of(
PhiToReplacementMap, [](auto &KV) { return KV.second.isLookupTable(); });

// A few conditions prevent the generation of lookup tables:
// 1. Not setting the ConvertSwitchToLookupTable option
// This option prevents the LUT creation until a later stage in the
// pipeline, because it would otherwise result in some
// difficult-to-analyze code and make pruning branches much harder.
// This is a problem if the switch expression itself can be restricted
// by inlining or CVP.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Based on the llvm-opt-benchmark results, we should also guard BitMapKind behind ConvertSwitchToLookupTable. Similar to lookup tables, this representation may make further optimization harder.

(I'd also be okay with dropping the ConvertSwitchToLookupTable change from this PR to not block the rest of the changes on flushing out all the phase ordering issues.)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for looking into the benchmark and reviewing. I decided to guard the bitmap creation with ConvertSwitchToLookup for now instead of completely removing the ConvertSwitchToLookup change. Hopefully the new benchmark results will show no other regressions, so the other kinds of transformations can still be applied earlier without issues, otherwise I will remove it for now.

// 2. The target does not support lookup tables.
// 3. The "no-jump-tables" function attribute is set.
// However, these objections do not apply to other switch replacements, like
// the bitmap, so we only stop here if any of these conditions are met and we
// want to create a LUT. Otherwise, continue with the switch replacement.
if (AnyLookupTables &&
(!ConvertSwitchToLookupTable || !TTI.shouldBuildLookupTables() ||
Fn->getFnAttribute("no-jump-tables").getValueAsBool()))
return false;

Builder.SetInsertPoint(SI);
// TableIndex is the switch condition - TableIndexOffset if we don't
// use the condition directly
Expand Down Expand Up @@ -7727,13 +7748,8 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
if (Options.ForwardSwitchCondToPhi && forwardSwitchConditionToPHI(SI))
return requestResimplify();

// The conversion from switch to lookup tables results in difficult-to-analyze
// code and makes pruning branches much harder. This is a problem if the
// switch expression itself can still be restricted as a result of inlining or
// CVP. Therefore, only apply this transformation during late stages of the
// optimisation pipeline.
if (Options.ConvertSwitchToLookupTable &&
simplifySwitchLookup(SI, Builder, DTU, DL, TTI))
if (simplifySwitchLookup(SI, Builder, DTU, DL, TTI,
Options.ConvertSwitchToLookupTable))
return requestResimplify();

if (simplifySwitchOfPowersOfTwo(SI, Builder, DL, TTI))
Expand Down
5 changes: 3 additions & 2 deletions llvm/test/CodeGen/Thumb2/bti-indirect-branches.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ define internal i32 @table_switch(i32 %x) "branch-target-enforcement" {
; CHECK-NEXT: cmp r1, #3
; CHECK-NEXT: bhi .LBB0_6
; CHECK-NEXT: @ %bb.1: @ %entry
; CHECK-NEXT: movs r0, #3
; CHECK-NEXT: .LCPI0_0:
; CHECK-NEXT: tbb [pc, r1]
; CHECK-NEXT: @ %bb.2:
Expand All @@ -22,7 +23,7 @@ define internal i32 @table_switch(i32 %x) "branch-target-enforcement" {
; CHECK-NEXT: movs r0, #2
; CHECK-NEXT: bx lr
; CHECK-NEXT: .LBB0_4: @ %bb3
; CHECK-NEXT: movs r0, #3
; CHECK-NEXT: movs r0, #1
; CHECK-NEXT: bx lr
; CHECK-NEXT: .LBB0_5: @ %bb4
; CHECK-NEXT: movs r0, #4
Expand Down Expand Up @@ -51,7 +52,7 @@ sw.epilog:
br label %return

return:
%ret = phi i32 [ 0, %sw.epilog ], [ 1, %bb1 ], [ 2, %bb2 ], [ 3, %bb3 ], [ 4, %bb4 ]
%ret = phi i32 [ 0, %sw.epilog ], [ 3, %bb1 ], [ 2, %bb2 ], [ 1, %bb3 ], [ 4, %bb4 ]
ret i32 %ret
}

Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Transforms/PhaseOrdering/X86/merge-functions.ll
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ bb3: ; preds = %bb1, %bb2

define i1 @test2(i32 %c) {
; CHECK-LABEL: @test2(
; CHECK-NEXT: [[TMP2:%.*]] = tail call noundef i1 @test1(i32 [[TMP0:%.*]]) #[[ATTR0:[0-9]+]]
; CHECK-NEXT: [[TMP2:%.*]] = tail call i1 @test1(i32 [[TMP0:%.*]]) #[[ATTR0:[0-9]+]]
; CHECK-NEXT: ret i1 [[TMP2]]
;
entry:
Expand Down
Loading
Loading