Skip to content
5 changes: 2 additions & 3 deletions llvm/include/llvm/CodeGen/AsmPrinter.h
Original file line number Diff line number Diff line change
Expand Up @@ -893,9 +893,8 @@ class AsmPrinter : public MachineFunctionPass {
// Internal Implementation Details
//===------------------------------------------------------------------===//

void emitJumpTableImpl(const MachineJumpTableInfo &MJTI,
ArrayRef<unsigned> JumpTableIndices,
bool JTInDiffSection);
virtual void emitJumpTableImpl(const MachineJumpTableInfo &MJTI,
ArrayRef<unsigned> JumpTableIndices);
void emitJumpTableEntry(const MachineJumpTableInfo &MJTI,
const MachineBasicBlock *MBB, unsigned uid) const;

Expand Down
46 changes: 21 additions & 25 deletions llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2855,22 +2855,12 @@ void AsmPrinter::emitConstantPool() {
void AsmPrinter::emitJumpTableInfo() {
const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
if (!MJTI) return;
if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_Inline) return;

const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
if (JT.empty()) return;

// Pick the directive to use to print the jump table entries, and switch to
// the appropriate section.
const Function &F = MF->getFunction();
const TargetLoweringObjectFile &TLOF = getObjFileLowering();
bool JTInDiffSection = !TLOF.shouldPutJumpTableInFunctionSection(
MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 ||
MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference64,
F);

if (!TM.Options.EnableStaticDataPartitioning) {
emitJumpTableImpl(*MJTI, llvm::to_vector(llvm::seq<unsigned>(JT.size())),
JTInDiffSection);
emitJumpTableImpl(*MJTI, llvm::to_vector(llvm::seq<unsigned>(JT.size())));
return;
}

Expand All @@ -2886,33 +2876,39 @@ void AsmPrinter::emitJumpTableInfo() {
}
}

emitJumpTableImpl(*MJTI, HotJumpTableIndices, JTInDiffSection);
emitJumpTableImpl(*MJTI, ColdJumpTableIndices, JTInDiffSection);
emitJumpTableImpl(*MJTI, HotJumpTableIndices);
emitJumpTableImpl(*MJTI, ColdJumpTableIndices);
}

void AsmPrinter::emitJumpTableImpl(const MachineJumpTableInfo &MJTI,
ArrayRef<unsigned> JumpTableIndices,
bool JTInDiffSection) {
if (JumpTableIndices.empty())
ArrayRef<unsigned> JumpTableIndices) {
if (MJTI.getEntryKind() == MachineJumpTableInfo::EK_Inline ||
JumpTableIndices.empty())
return;

const TargetLoweringObjectFile &TLOF = getObjFileLowering();
const Function &F = MF->getFunction();
const std::vector<MachineJumpTableEntry> &JT = MJTI.getJumpTables();
MCSection *JumpTableSection = nullptr;
if (TM.Options.EnableStaticDataPartitioning) {
JumpTableSection =
TLOF.getSectionForJumpTable(F, TM, &JT[JumpTableIndices.front()]);
} else {
JumpTableSection = TLOF.getSectionForJumpTable(F, TM);
}

const DataLayout &DL = MF->getDataLayout();
// Pick the directive to use to print the jump table entries, and switch to
// the appropriate section.
const bool JTInDiffSection = !TLOF.shouldPutJumpTableInFunctionSection(
MJTI.getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 ||
MJTI.getEntryKind() == MachineJumpTableInfo::EK_LabelDifference64,
F);
if (JTInDiffSection) {
if (TM.Options.EnableStaticDataPartitioning) {
JumpTableSection =
TLOF.getSectionForJumpTable(F, TM, &JT[JumpTableIndices.front()]);
} else {
JumpTableSection = TLOF.getSectionForJumpTable(F, TM);
}
OutStreamer->switchSection(JumpTableSection);
}

emitAlignment(Align(MJTI.getEntryAlignment(MF->getDataLayout())));
const DataLayout &DL = MF->getDataLayout();
emitAlignment(Align(MJTI.getEntryAlignment(DL)));

// Jump tables in code sections are marked with a data_region directive
// where that's supported.
Expand Down
28 changes: 18 additions & 10 deletions llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,8 @@ class AArch64AsmPrinter : public AsmPrinter {
const MCExpr *lowerBlockAddressConstant(const BlockAddress &BA) override;

void emitStartOfAsmFile(Module &M) override;
void emitJumpTableInfo() override;
void emitJumpTableImpl(const MachineJumpTableInfo &MJTI,
ArrayRef<unsigned> JumpTableIndices) override;
std::tuple<const MCSymbol *, uint64_t, const MCSymbol *,
codeview::JumpTableEntrySize>
getCodeViewJumpTableInfo(int JTI, const MachineInstr *BranchInstr,
Expand Down Expand Up @@ -1273,19 +1274,26 @@ void AArch64AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
printOperand(MI, NOps - 2, OS);
}

void AArch64AsmPrinter::emitJumpTableInfo() {
const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
if (!MJTI) return;

const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
if (JT.empty()) return;

void AArch64AsmPrinter::emitJumpTableImpl(const MachineJumpTableInfo &MJTI,
ArrayRef<unsigned> JumpTableIndices) {
// Fast return if there is nothing to emit to avoid creating empty sections.
if (JumpTableIndices.empty())
return;
const TargetLoweringObjectFile &TLOF = getObjFileLowering();
MCSection *ReadOnlySec = TLOF.getSectionForJumpTable(MF->getFunction(), TM);
const auto &F = MF->getFunction();
ArrayRef<MachineJumpTableEntry> JT = MJTI.getJumpTables();

MCSection *ReadOnlySec = nullptr;
if (TM.Options.EnableStaticDataPartitioning) {
ReadOnlySec =
TLOF.getSectionForJumpTable(F, TM, &JT[JumpTableIndices.front()]);
} else {
ReadOnlySec = TLOF.getSectionForJumpTable(F, TM);
}
OutStreamer->switchSection(ReadOnlySec);

auto AFI = MF->getInfo<AArch64FunctionInfo>();
for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) {
for (unsigned JTI : JumpTableIndices) {
const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;

// If this jump table was deleted, ignore it.
Expand Down
249 changes: 249 additions & 0 deletions llvm/test/CodeGen/AArch64/jump-table-partition.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,249 @@
; The llc commands override two options
; - 'aarch64-enable-atomic-cfg-tidy' to false to turn off simplifycfg pass,
; which can simplify away switch instructions before isel lowers switch instructions.
; - 'aarch64-min-jump-table-entries' so 'switch' needs fewer cases to generate
; a jump table.

; The static-data-splitter pass doesn't run.
; RUN: llc -mtriple=aarch64-unknown-linux-gnu -function-sections=true \
; RUN: -aarch64-enable-atomic-cfg-tidy=false -aarch64-min-jump-table-entries=2 \
; RUN: -unique-section-names=true %s -o - 2>&1 | FileCheck %s --check-prefixes=DEFAULT

; DEFAULT: .section .rodata.hot.foo,"a",@progbits
; DEFAULT: .LJTI0_0:
; DEFAULT: .LJTI0_1:
; DEFAULT: .LJTI0_2:
; DEFAULT: .LJTI0_3:
; DEFAULT: .section .rodata.func_without_profile,"a",@progbits
; DEFAULT: .LJTI1_0:
; DEFAULT: .section .rodata.bar_prefix.bar,"a",@progbits
; DEFAULT: .LJTI2_0

; RUN: llc -mtriple=aarch64-unknown-linux-gnu -enable-split-machine-functions \
; RUN: -partition-static-data-sections=true -function-sections=true \
; RUN: -aarch64-enable-atomic-cfg-tidy=false -aarch64-min-jump-table-entries=2 \
; RUN: -unique-section-names=false %s -o - 2>&1 | FileCheck %s --check-prefixes=NUM,JT

; Section names will optionally have `.<func>` if -function-sections is enabled.
; RUN: llc -mtriple=aarch64-unknown-linux-gnu -enable-split-machine-functions \
; RUN: -partition-static-data-sections=true -function-sections=true \
; RUN: -aarch64-enable-atomic-cfg-tidy=false -aarch64-min-jump-table-entries=2 \
; RUN: %s -o - 2>&1 | FileCheck %s --check-prefixes=FUNC,JT

; RUN: llc -mtriple=aarch64-unknown-linux-gnu -enable-split-machine-functions \
; RUN: -partition-static-data-sections=true -function-sections=false \
; RUN: -aarch64-enable-atomic-cfg-tidy=false -aarch64-min-jump-table-entries=2 \
; RUN: %s -o - 2>&1 | FileCheck %s --check-prefixes=FUNCLESS,JT

; In function @foo, the 2 switch instructions to jt0.* and jt1.* are placed in
; hot-prefixed sections, and the 2 switch instructions to jt2.* and jt3.* are
; placed in cold-prefixed sections.
; NUM: .section .rodata.hot.,"a",@progbits,unique,2
; FUNC: .section .rodata.hot.foo,"a",@progbits
; FUNCLESS: .section .rodata.hot.,"a",@progbits
; JT: .LJTI0_0:
; JT: .LJTI0_2:
; NUM: .section .rodata.unlikely.,"a",@progbits,unique,3
; FUNC: .section .rodata.unlikely.foo,
; FUNCLESS: .section .rodata.unlikely.,"a",@progbits
; JT: .LJTI0_1:
; JT: .LJTI0_3:

; func_without_profile doesn't have profiles, so its jumptable doesn't have
; hotness-based prefix.
; NUM: .section .rodata,"a",@progbits,unique,5
; FUNC: .section .rodata.func_without_profile,"a",@progbits
; FUNCLESS: .section .rodata,"a",@progbits
; JT: .LJTI1_0:

; @bar doesn't have profile information and it has a section prefix.
; Tests that its jump tables are placed in sections with function prefixes.
; NUM: .section .rodata.bar_prefix.,"a",@progbits,unique,7
; FUNC: .section .rodata.bar_prefix.bar
; FUNCLESS: .section .rodata.bar_prefix.,"a"
; JT: .LJTI2_0

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "aarch64-unknown-linux-gnu"

@str.9 = private constant [7 x i8] c".str.9\00"
@str.10 = private constant [8 x i8] c".str.10\00"
@str.11 = private constant [8 x i8] c".str.11\00"

@case2 = private constant [7 x i8] c"case 2\00"
@case1 = private constant [7 x i8] c"case 1\00"
@default = private constant [8 x i8] c"default\00"
@jt3 = private constant [4 x i8] c"jt3\00"

; jt0 and jt2 are hot. jt1 and jt3 are cold.
define i32 @foo(i32 %num) !prof !13 {
entry:
%mod3 = sdiv i32 %num, 3
switch i32 %mod3, label %jt0.default [
i32 1, label %jt0.bb1
i32 2, label %jt0.bb2
], !prof !14

jt0.bb1:
call i32 @puts(ptr @case1)
br label %jt0.epilog

jt0.bb2:
call i32 @puts(ptr @case2)
br label %jt0.epilog

jt0.default:
call i32 @puts(ptr @default)
br label %jt0.epilog

jt0.epilog:
%zero = icmp eq i32 %num, 0
br i1 %zero, label %hot, label %cold, !prof !17

hot:
%c2 = call i32 @transform(i32 %num)
switch i32 %c2, label %jt2.default [
i32 1, label %jt2.bb1
i32 2, label %jt2.bb2
], !prof !14

jt2.bb1:
call i32 @puts(ptr @case1)
br label %jt1.epilog

jt2.bb2:
call i32 @puts(ptr @case2)
br label %jt1.epilog

jt2.default:
call i32 @puts(ptr @default)
br label %jt2.epilog

jt2.epilog:
%c2cmp = icmp ne i32 %c2, 0
br i1 %c2cmp, label %return, label %jt3.prologue, !prof !18

cold:
%c1 = call i32 @compute(i32 %num)
switch i32 %c1, label %jt1.default [
i32 1, label %jt1.bb1
i32 2, label %jt1.bb2
], !prof !14

jt1.bb1:
call i32 @puts(ptr @case1)
br label %jt1.epilog

jt1.bb2:
call i32 @puts(ptr @case2)
br label %jt1.epilog

jt1.default:
call i32 @puts(ptr @default)
br label %jt1.epilog

jt1.epilog:
br label %return

jt3.prologue:
%c3 = call i32 @cleanup(i32 %num)
switch i32 %c3, label %jt3.default [
i32 1, label %jt3.bb1
i32 2, label %jt3.bb2
], !prof !14

jt3.bb1:
call i32 @puts(ptr @case1)
br label %jt3.epilog

jt3.bb2:
call i32 @puts(ptr @case2)
br label %jt3.epilog

jt3.default:
call i32 @puts(ptr @default)
br label %jt3.epilog

jt3.epilog:
call i32 @puts(ptr @jt3)
br label %return

return:
ret i32 %mod3
}

define void @func_without_profile(i32 %num) {
entry:
switch i32 %num, label %sw.default [
i32 1, label %sw.bb
i32 2, label %sw.bb1
]

sw.bb:
call i32 @puts(ptr @str.10)
br label %sw.epilog

sw.bb1:
call i32 @puts(ptr @str.9)
br label %sw.epilog

sw.default:
call i32 @puts(ptr @str.11)
br label %sw.epilog

sw.epilog:
ret void
}

define void @bar(i32 %num) !section_prefix !20 {
entry:
switch i32 %num, label %sw.default [
i32 1, label %sw.bb
i32 2, label %sw.bb1
]

sw.bb:
call i32 @puts(ptr @str.10)
br label %sw.epilog

sw.bb1:
call i32 @puts(ptr @str.9)
br label %sw.epilog

sw.default:
call i32 @puts(ptr @str.11)
br label %sw.epilog

sw.epilog:
ret void
}

declare i32 @puts(ptr)
declare i32 @printf(ptr, ...)
declare i32 @compute(i32)
declare i32 @transform(i32)
declare i32 @cleanup(i32)

!llvm.module.flags = !{!0}

!0 = !{i32 1, !"ProfileSummary", !1}
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
!2 = !{!"ProfileFormat", !"InstrProf"}
!3 = !{!"TotalCount", i64 230002}
!4 = !{!"MaxCount", i64 100000}
!5 = !{!"MaxInternalCount", i64 50000}
!6 = !{!"MaxFunctionCount", i64 100000}
!7 = !{!"NumCounts", i64 14}
!8 = !{!"NumFunctions", i64 3}
!9 = !{!"DetailedSummary", !10}
!10 = !{!11, !12}
!11 = !{i32 990000, i64 10000, i32 7}
!12 = !{i32 999999, i64 1, i32 9}
!13 = !{!"function_entry_count", i64 100000}
!14 = !{!"branch_weights", i32 60000, i32 20000, i32 20000}
!15 = !{!"function_entry_count", i64 1}
!16 = !{!"branch_weights", i32 1, i32 0, i32 0, i32 0, i32 0, i32 0}
!17 = !{!"branch_weights", i32 99999, i32 1}
!18 = !{!"branch_weights", i32 99998, i32 1}
!19 = !{!"branch_weights", i32 97000, i32 1000, i32 1000, i32 1000}
!20 = !{!"function_section_prefix", !"bar_prefix"}