Skip to content

Commit 527af30

Browse files
Add support for dynamic libraries in CLANG_BOLT (#127020)
1 parent a9f02a4 commit 527af30

File tree

3 files changed

+109
-47
lines changed

3 files changed

+109
-47
lines changed

clang/cmake/caches/BOLT.cmake

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
set(CMAKE_BUILD_TYPE Release CACHE STRING "")
22
set(CLANG_BOLT "INSTRUMENT" CACHE STRING "")
33
set(CMAKE_EXE_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "")
4+
set(CMAKE_SHARED_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "")
45

56
set(LLVM_ENABLE_PROJECTS "bolt;clang" CACHE STRING "")
67
set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "")

clang/tools/driver/CMakeLists.txt

Lines changed: 31 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,28 @@ if (CLANG_BOLT AND NOT LLVM_BUILD_INSTRUMENTED)
168168
)
169169
set(LIT_COMMAND "${lit_base_dir}/${lit_file_name}")
170170

171+
set(CLANG_BOLT_INPUTS $<TARGET_FILE:clang>)
172+
set(CLANG_INSTRUMENTED_OUTPUTS ${CLANG_INSTRUMENTED})
173+
174+
# Add in dynamically linked libraries, if needs be. Currently only supported
175+
# on Linux because it relies on LD_PRELOAD for instrumentation.
176+
if (CMAKE_SYSTEM_NAME STREQUAL "Linux")
177+
if (CLANG_LINK_CLANG_DYLIB)
178+
set(CLANG_CPP_BOLT_INSTRUMENTED "clang-cxx-bolt.inst" CACHE STRING
179+
"Name of BOLT-instrumented Clang library")
180+
set(CLANG_CPP_INSTRUMENTED ${LLVM_RUNTIME_OUTPUT_INTDIR}/${CLANG_CPP_BOLT_INSTRUMENTED})
181+
list(APPEND CLANG_BOLT_INPUTS $<TARGET_FILE:clang-cpp>)
182+
list(APPEND CLANG_INSTRUMENTED_OUTPUTS ${CLANG_CPP_INSTRUMENTED})
183+
endif()
184+
if (LLVM_LINK_LLVM_DYLIB)
185+
set(LLVM_BOLT_INSTRUMENTED "LLVM-bolt.inst" CACHE STRING
186+
"Name of BOLT-instrumented LLVM library")
187+
set(LLVM_INSTRUMENTED ${LLVM_RUNTIME_OUTPUT_INTDIR}/${LLVM_BOLT_INSTRUMENTED})
188+
list(APPEND CLANG_BOLT_INPUTS $<TARGET_FILE:LLVM>)
189+
list(APPEND CLANG_INSTRUMENTED_OUTPUTS ${LLVM_INSTRUMENTED})
190+
endif()
191+
endif()
192+
171193
# This POST_BUILD command is executed unconditionally even if the clang target
172194
# is already built. We need to wrap the whole bolt optimization process in
173195
# a single python wrapper, so that we can first check if the binary has
@@ -176,15 +198,15 @@ if (CLANG_BOLT AND NOT LLVM_BUILD_INSTRUMENTED)
176198
TARGET clang POST_BUILD
177199
COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/../../utils/perf-training/perf-helper.py
178200
bolt-optimize
179-
--method ${CLANG_BOLT}
180-
--input $<TARGET_FILE:clang>
181-
--instrumented-output ${CLANG_INSTRUMENTED}
182-
--fdata ${BOLT_FDATA}
183-
--perf-training-binary-dir ${PERF_TRAINING_BINARY_DIR}
184-
--readelf $<TARGET_FILE:llvm-readobj>
185-
--bolt $<TARGET_FILE:llvm-bolt>
186-
--lit "${LIT_COMMAND}"
187-
--merge-fdata $<TARGET_FILE:merge-fdata>
201+
--method ${CLANG_BOLT}
202+
--input "${CLANG_BOLT_INPUTS}"
203+
--instrumented-output "${CLANG_INSTRUMENTED_OUTPUTS}"
204+
--fdata ${BOLT_FDATA}
205+
--perf-training-binary-dir ${PERF_TRAINING_BINARY_DIR}
206+
--readelf $<TARGET_FILE:llvm-readobj>
207+
--bolt $<TARGET_FILE:llvm-bolt>
208+
--lit "${LIT_COMMAND}"
209+
--merge-fdata $<TARGET_FILE:merge-fdata>
188210
COMMENT "Optimizing Clang with BOLT"
189211
USES_TERMINAL
190212
VERBATIM

clang/utils/perf-training/perf-helper.py

Lines changed: 77 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -560,6 +560,23 @@ def genOrderFile(args):
560560
return 0
561561

562562

563+
def filter_bolt_optimized(inputs, instrumented_outputs):
564+
new_inputs = []
565+
new_instrumented_ouputs = []
566+
for input, instrumented_output in zip(inputs, instrumented_outputs):
567+
output = subprocess.check_output(
568+
[opts.readelf, "-WS", input], universal_newlines=True
569+
)
570+
571+
# This binary has already been bolt-optimized, so skip further processing.
572+
if re.search("\\.bolt\\.org\\.text", output, re.MULTILINE):
573+
print(f"Skipping {input}, it's already instrumented")
574+
else:
575+
new_inputs.append(input)
576+
new_instrumented_ouputs.append(instrumented_output)
577+
return new_inputs, new_instrumented_ouputs
578+
579+
563580
def bolt_optimize(args):
564581
parser = argparse.ArgumentParser("%prog [options] ")
565582
parser.add_argument("--method", choices=["INSTRUMENT", "PERF", "LBR"])
@@ -574,47 +591,67 @@ def bolt_optimize(args):
574591

575592
opts = parser.parse_args(args)
576593

577-
output = subprocess.check_output(
578-
[opts.readelf, "-WS", opts.input], universal_newlines=True
579-
)
594+
inputs = opts.input.split(";")
595+
instrumented_outputs = opts.instrumented_output.split(";")
596+
assert len(inputs) == len(
597+
instrumented_outputs
598+
), "inconsistent --input / --instrumented-output arguments"
580599

581-
# This binary has already been bolt-optimized, so skip further processing.
582-
if re.search("\\.bolt\\.org\\.text", output, re.MULTILINE):
600+
inputs, instrumented_outputs = filter_bolt_optimized(inputs, instrumented_outputs)
601+
if not inputs:
583602
return 0
584603

604+
environ = os.environ.copy()
585605
if opts.method == "INSTRUMENT":
586-
process = subprocess.run(
587-
[
606+
preloads = []
607+
for input, instrumented_output in zip(inputs, instrumented_outputs):
608+
args = [
588609
opts.bolt,
589-
opts.input,
610+
input,
590611
"-o",
591-
opts.instrumented_output,
612+
instrumented_output,
592613
"-instrument",
593614
"--instrumentation-file-append-pid",
594615
f"--instrumentation-file={opts.fdata}",
595-
],
596-
stdout=subprocess.PIPE,
597-
stderr=subprocess.STDOUT,
598-
text=True,
599-
)
616+
]
617+
print("Running: " + " ".join(args))
618+
process = subprocess.run(
619+
args,
620+
stdout=subprocess.PIPE,
621+
stderr=subprocess.STDOUT,
622+
text=True,
623+
)
600624

601-
print(process.args)
602-
for line in process.stdout:
603-
sys.stdout.write(line)
604-
process.check_returncode()
625+
for line in process.stdout:
626+
sys.stdout.write(line)
627+
process.check_returncode()
605628

629+
output = subprocess.check_output(
630+
[opts.readelf, "--file-header", input], universal_newlines=True
631+
)
632+
if re.search(r"Type:\s*((Shared)|(DYN))", output):
633+
# force using the instrumented version
634+
preloads.append(instrumented_output)
635+
636+
if preloads:
637+
print("Patching execution environment for dynamic library")
638+
environ["LD_PRELOAD"] = os.pathsep.join(preloads)
639+
640+
args = [
641+
sys.executable,
642+
opts.lit,
643+
"-v",
644+
os.path.join(opts.perf_training_binary_dir, f"bolt-fdata"),
645+
]
646+
print("Running: " + " ".join(args))
606647
process = subprocess.run(
607-
[
608-
sys.executable,
609-
opts.lit,
610-
os.path.join(opts.perf_training_binary_dir, "bolt-fdata"),
611-
],
648+
args,
612649
stdout=subprocess.PIPE,
613650
stderr=subprocess.STDOUT,
614651
text=True,
652+
env=environ,
615653
)
616654

617-
print(process.args)
618655
for line in process.stdout:
619656
sys.stdout.write(line)
620657
process.check_returncode()
@@ -624,14 +661,14 @@ def bolt_optimize(args):
624661

625662
merge_fdata([opts.merge_fdata, opts.fdata, opts.perf_training_binary_dir])
626663

627-
shutil.copy(opts.input, f"{opts.input}-prebolt")
664+
for input in inputs:
665+
shutil.copy(input, f"{input}-prebolt")
628666

629-
process = subprocess.run(
630-
[
667+
args = [
631668
opts.bolt,
632-
f"{opts.input}-prebolt",
669+
f"{input}-prebolt",
633670
"-o",
634-
opts.input,
671+
input,
635672
"-data",
636673
opts.fdata,
637674
"-reorder-blocks=ext-tsp",
@@ -643,16 +680,18 @@ def bolt_optimize(args):
643680
"-use-gnu-stack",
644681
"-update-debug-sections",
645682
"-nl" if opts.method == "PERF" else "",
646-
],
647-
stdout=subprocess.PIPE,
648-
stderr=subprocess.STDOUT,
649-
text=True,
650-
)
683+
]
684+
print("Running: " + " ".join(args))
685+
process = subprocess.run(
686+
args,
687+
stdout=subprocess.PIPE,
688+
stderr=subprocess.STDOUT,
689+
text=True,
690+
)
651691

652-
print(process.args)
653-
for line in process.stdout:
654-
sys.stdout.write(line)
655-
process.check_returncode()
692+
for line in process.stdout:
693+
sys.stdout.write(line)
694+
process.check_returncode()
656695

657696

658697
commands = {

0 commit comments

Comments
 (0)