Skip to content

Commit 8b473aa

Browse files
committed
[Clang][CMake] Add CSSPGO support to LLVM_BUILD_INSTRUMENTED
Build on Clang-BOLT infrastructure to collect sample profiles for CSSPGO. Add clang/cmake/caches/CSSPGO.cmake to automate CSSPGO Clang build. Differential Revision: https://reviews.llvm.org/D155419
1 parent f122484 commit 8b473aa

File tree

8 files changed

+120
-16
lines changed

8 files changed

+120
-16
lines changed

clang/CMakeLists.txt

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -754,11 +754,22 @@ if (CLANG_ENABLE_BOOTSTRAP)
754754
if(BOOTSTRAP_LLVM_BUILD_INSTRUMENTED)
755755
add_dependencies(clang-bootstrap-deps llvm-profdata)
756756
set(PGO_OPT -DLLVM_PROFDATA=${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-profdata)
757+
string(TOUPPER "${BOOTSTRAP_LLVM_BUILD_INSTRUMENTED}" BOOTSTRAP_LLVM_BUILD_INSTRUMENTED)
758+
if (BOOTSTRAP_LLVM_BUILD_INSTRUMENTED STREQUAL "CSSPGO")
759+
add_dependencies(clang-bootstrap-deps llvm-profgen)
760+
list(APPEND PGO_OPT -DLLVM_PROFGEN=${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-profgen)
761+
endif()
757762
endif()
758763

759764
if(LLVM_BUILD_INSTRUMENTED)
760-
add_dependencies(clang-bootstrap-deps generate-profdata)
761-
set(PGO_OPT -DLLVM_PROFDATA_FILE=${CMAKE_CURRENT_BINARY_DIR}/utils/perf-training/clang.profdata)
765+
string(TOUPPER "${LLVM_BUILD_INSTRUMENTED}" LLVM_BUILD_INSTRUMENTED)
766+
if (LLVM_BUILD_INSTRUMENTED STREQUAL "CSSPGO")
767+
add_dependencies(clang-bootstrap-deps generate-sprofdata)
768+
set(PGO_OPT -DLLVM_SPROFDATA_FILE=${CMAKE_CURRENT_BINARY_DIR}/utils/perf-training/clang.sprofdata)
769+
else()
770+
add_dependencies(clang-bootstrap-deps generate-profdata)
771+
set(PGO_OPT -DLLVM_PROFDATA_FILE=${CMAKE_CURRENT_BINARY_DIR}/utils/perf-training/clang.profdata)
772+
endif()
762773
# Use the current tools for LTO instead of the instrumented ones
763774
list(APPEND _BOOTSTRAP_DEFAULT_PASSTHROUGH
764775
CMAKE_CXX_COMPILER
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
set(BOLT_PGO_CMAKE_CACHE "CSSPGO" CACHE STRING "")
2+
set(BOOTSTRAP_CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING "")
3+
include(${CMAKE_CURRENT_LIST_DIR}/BOLT-PGO.cmake)

clang/cmake/caches/BOLT-PGO.cmake

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
set(BOLT_PGO_CMAKE_CACHE "PGO" CACHE STRING "")
12
set(LLVM_ENABLE_PROJECTS "bolt;clang;lld" CACHE STRING "")
23

34
set(CLANG_BOOTSTRAP_TARGETS
@@ -14,4 +15,4 @@ set(BOOTSTRAP_CLANG_BOOTSTRAP_TARGETS
1415
set(PGO_BUILD_CONFIGURATION
1516
${CMAKE_CURRENT_LIST_DIR}/BOLT.cmake
1617
CACHE STRING "")
17-
include(${CMAKE_CURRENT_LIST_DIR}/PGO.cmake)
18+
include(${CMAKE_CURRENT_LIST_DIR}/${BOLT_PGO_CMAKE_CACHE}.cmake)

clang/cmake/caches/CSSPGO.cmake

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
set(BOOTSTRAP_LLVM_BUILD_INSTRUMENTED "CSSPGO" CACHE STRING "")
2+
include(${CMAKE_CURRENT_LIST_DIR}/PGO.cmake)

clang/utils/perf-training/CMakeLists.txt

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ if(LLVM_BUILD_INSTRUMENTED)
2121
add_lit_testsuite(generate-profraw "Generating clang PGO data"
2222
${CMAKE_CURRENT_BINARY_DIR}/pgo-data/
2323
EXCLUDE_FROM_CHECK_ALL
24-
DEPENDS clear-profraw
24+
DEPENDS clear-profraw clang
2525
)
2626

2727
add_custom_target(clear-profraw
@@ -55,6 +55,32 @@ if(LLVM_BUILD_INSTRUMENTED)
5555
USE_TOOLCHAIN EXLUDE_FROM_ALL NO_INSTALL DEPENDS generate-profraw)
5656
add_dependencies(generate-profdata generate-profraw-external)
5757
endif()
58+
59+
if(NOT LLVM_PROFGEN)
60+
find_program(LLVM_PROFGEN llvm-profgen)
61+
endif()
62+
63+
if(NOT LLVM_PROFGEN)
64+
message(STATUS "To enable converting CSSPGO samples LLVM_PROFGEN has to point to llvm-profgen")
65+
elseif(NOT CLANG_PGO_TRAINING_DATA_SOURCE_DIR)
66+
message(STATUS "CLANG_PGO_TRAINING_DATA_SOURCE_DIR must be set to collect CSSPGO samples")
67+
else()
68+
set(PERF_HELPER "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py)
69+
set(CLANG_SPROFDATA ${CMAKE_CURRENT_BINARY_DIR}/clang.sprofdata)
70+
add_custom_command(
71+
OUTPUT ${CLANG_SPROFDATA}
72+
# Execute generate-profraw-external under perf
73+
COMMAND ${PERF_HELPER} perf --csspgo -- ${CMAKE_COMMAND} --build ${CMAKE_BINARY_DIR} --target generate-profraw-external
74+
# Convert perf profile into profraw
75+
COMMAND ${PERF_HELPER} perf2prof ${LLVM_PROFGEN} $<TARGET_FILE:clang> ${CMAKE_CURRENT_BINARY_DIR}
76+
# Merge profdata
77+
COMMAND ${PERF_HELPER} merge --sample ${LLVM_PROFDATA} ${CLANG_SPROFDATA} ${CMAKE_CURRENT_BINARY_DIR}
78+
DEPENDS clang ${CLANG_PGO_TRAINING_DEPS} clear-perf-data generate-profraw-external-clean
79+
VERBATIM
80+
USES_TERMINAL
81+
)
82+
add_custom_target(generate-sprofdata DEPENDS ${CLANG_SPROFDATA})
83+
endif()
5884
endif()
5985
endif()
6086

clang/utils/perf-training/perf-helper.py

Lines changed: 45 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -45,14 +45,22 @@ def clean(args):
4545

4646

4747
def merge(args):
48-
if len(args) < 3:
49-
print(
50-
"Usage: %s merge <llvm-profdata> <output> <paths>\n" % __file__
51-
+ "\tMerges all profraw files from path into output."
52-
)
53-
return 1
54-
cmd = [args[0], "merge", "-o", args[1]]
55-
for path in args[2:]:
48+
parser = argparse.ArgumentParser(
49+
prog="perf-helper merge",
50+
description="Merges all profraw files from path(s) into output",
51+
)
52+
parser.add_argument("profdata", help="Path to llvm-profdata tool")
53+
parser.add_argument("output", help="Output filename")
54+
parser.add_argument(
55+
"paths", nargs="+", help="Folder(s) containing input profraw files"
56+
)
57+
parser.add_argument("--sample", action="store_true", help="Sample profile")
58+
opts = parser.parse_args(args)
59+
60+
cmd = [opts.profdata, "merge", "-o", opts.output]
61+
if opts.sample:
62+
cmd += ["--sample"]
63+
for path in opts.paths:
5664
cmd.extend(findFilesWithExtension(path, "profraw"))
5765
subprocess.check_call(cmd)
5866
return 0
@@ -73,25 +81,30 @@ def merge_fdata(args):
7381

7482
def perf(args):
7583
parser = argparse.ArgumentParser(
76-
prog="perf-helper perf", description="perf wrapper for BOLT profile collection"
84+
prog="perf-helper perf",
85+
description="perf wrapper for BOLT/CSSPGO profile collection",
7786
)
7887
parser.add_argument(
7988
"--lbr", action="store_true", help="Use perf with branch stacks"
8089
)
90+
parser.add_argument("--csspgo", action="store_true", help="Enable CSSPGO flags")
8191
parser.add_argument("cmd", nargs=argparse.REMAINDER, help="")
8292

8393
opts = parser.parse_args(args)
8494
cmd = opts.cmd[1:]
8595

96+
event = "br_inst_retired.near_taken:uppp" if opts.csspgo else "cycles:u"
8697
perf_args = [
8798
"perf",
8899
"record",
89-
"--event=cycles:u",
100+
f"--event={event}",
90101
"--freq=max",
91102
"--output=%d.perf.data" % os.getpid(),
92103
]
93-
if opts.lbr:
104+
if opts.lbr or opts.csspgo:
94105
perf_args += ["--branch-filter=any,u"]
106+
if opts.csspgo:
107+
perf_args += ["-g", "--call-graph=fp"]
95108
perf_args.extend(cmd)
96109

97110
start_time = time.time()
@@ -127,6 +140,26 @@ def perf2bolt(args):
127140
return 0
128141

129142

143+
def perf2prof(args):
144+
parser = argparse.ArgumentParser(
145+
prog="perf-helper perf2prof",
146+
description="perf to CSSPGO prof conversion wrapper",
147+
)
148+
parser.add_argument("profgen", help="Path to llvm-profgen binary")
149+
parser.add_argument("binary", help="Input binary")
150+
parser.add_argument("paths", nargs="+", help="Path containing perf.data files")
151+
opts = parser.parse_args(args)
152+
153+
profgen_args = [opts.profgen, f"--binary={opts.binary}"]
154+
for path in opts.paths:
155+
for filename in findFilesWithExtension(path, "perf.data"):
156+
subprocess.check_call(
157+
profgen_args
158+
+ [f"--perfdata={filename}", f"--output={filename}.profraw"]
159+
)
160+
return 0
161+
162+
130163
def dtrace(args):
131164
parser = argparse.ArgumentParser(
132165
prog="perf-helper dtrace",
@@ -707,6 +740,7 @@ def bolt_optimize(args):
707740
"merge-fdata": merge_fdata,
708741
"perf": perf,
709742
"perf2bolt": perf2bolt,
743+
"perf2prof": perf2prof,
710744
}
711745

712746

llvm/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1011,6 +1011,9 @@ set(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR ${LLVM_ENABLE_PER_TARGET_RUNTIME_DIR_defa
10111011
set(LLVM_PROFDATA_FILE "" CACHE FILEPATH
10121012
"Profiling data file to use when compiling in order to improve runtime performance.")
10131013

1014+
set(LLVM_SPROFDATA_FILE "" CACHE FILEPATH
1015+
"Sampling profiling data file to use when compiling in order to improve runtime performance.")
1016+
10141017
if(LLVM_INCLUDE_TESTS)
10151018
# All LLVM Python files should be compatible down to this minimum version.
10161019
set(LLVM_MINIMUM_PYTHON_VERSION 3.8)

llvm/cmake/modules/HandleLLVMOptions.cmake

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1184,7 +1184,7 @@ if(LLVM_ENABLE_EH AND NOT LLVM_ENABLE_RTTI)
11841184
message(FATAL_ERROR "Exception handling requires RTTI. You must set LLVM_ENABLE_RTTI to ON")
11851185
endif()
11861186

1187-
set(LLVM_BUILD_INSTRUMENTED OFF CACHE STRING "Build LLVM and tools with PGO instrumentation. May be specified as IR or Frontend")
1187+
set(LLVM_BUILD_INSTRUMENTED OFF CACHE STRING "Build LLVM and tools with PGO instrumentation. May be specified as IR, Frontend, CSIR, CSSPGO")
11881188
set(LLVM_VP_COUNTERS_PER_SITE "1.5" CACHE STRING "Value profile counters to use per site for IR PGO with Clang")
11891189
mark_as_advanced(LLVM_BUILD_INSTRUMENTED LLVM_VP_COUNTERS_PER_SITE)
11901190
string(TOUPPER "${LLVM_BUILD_INSTRUMENTED}" uppercase_LLVM_BUILD_INSTRUMENTED)
@@ -1217,6 +1217,15 @@ if (LLVM_BUILD_INSTRUMENTED)
12171217
CMAKE_EXE_LINKER_FLAGS
12181218
CMAKE_SHARED_LINKER_FLAGS)
12191219
endif()
1220+
elseif(uppercase_LLVM_BUILD_INSTRUMENTED STREQUAL "CSSPGO")
1221+
append("-fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -fno-optimize-sibling-calls -fpseudo-probe-for-profiling"
1222+
CMAKE_CXX_FLAGS
1223+
CMAKE_C_FLAGS)
1224+
if(NOT LINKER_IS_LLD_LINK)
1225+
append("-fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -fno-optimize-sibling-calls -fpseudo-probe-for-profiling"
1226+
CMAKE_EXE_LINKER_FLAGS
1227+
CMAKE_SHARED_LINKER_FLAGS)
1228+
endif()
12201229
else()
12211230
append("-fprofile-instr-generate=\"${LLVM_PROFILE_FILE_PATTERN}\""
12221231
CMAKE_CXX_FLAGS
@@ -1269,6 +1278,21 @@ elseif(LLVM_PROFDATA_FILE)
12691278
message(WARNING "LLVM_PROFDATA_FILE specified, but ${LLVM_PROFDATA_FILE} not found")
12701279
endif()
12711280

1281+
if(LLVM_SPROFDATA_FILE AND EXISTS ${LLVM_SPROFDATA_FILE})
1282+
if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" )
1283+
append("-fpseudo-probe-for-profiling -fprofile-sample-use=\"${LLVM_SPROFDATA_FILE}\""
1284+
CMAKE_CXX_FLAGS
1285+
CMAKE_C_FLAGS)
1286+
if(NOT LINKER_IS_LLD_LINK)
1287+
append("-fpseudo-probe-for-profiling -fprofile-sample-use=\"${LLVM_SPROFDATA_FILE}\""
1288+
CMAKE_EXE_LINKER_FLAGS
1289+
CMAKE_SHARED_LINKER_FLAGS)
1290+
endif()
1291+
else()
1292+
message(FATAL_ERROR "LLVM_SPROFDATA_FILE can only be specified when compiling with clang")
1293+
endif()
1294+
endif()
1295+
12721296
option(LLVM_BUILD_INSTRUMENTED_COVERAGE "Build LLVM and tools with Code Coverage instrumentation" Off)
12731297
option(LLVM_INDIVIDUAL_TEST_COVERAGE "Emit individual coverage file for each test case." OFF)
12741298
mark_as_advanced(LLVM_BUILD_INSTRUMENTED_COVERAGE)

0 commit comments

Comments
 (0)