-
Notifications
You must be signed in to change notification settings - Fork 15.4k
Add support for dynamic libraries in CLANG_BOLT #127020
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -560,6 +560,23 @@ def genOrderFile(args): | |
| return 0 | ||
|
|
||
|
|
||
| def filter_bolt_optimized(inputs, instrumented_outputs): | ||
| new_inputs = [] | ||
| new_instrumented_ouputs = [] | ||
| for input, instrumented_output in zip(inputs, instrumented_outputs): | ||
| output = subprocess.check_output( | ||
| [opts.readelf, "-WS", input], universal_newlines=True | ||
| ) | ||
|
|
||
| # This binary has already been bolt-optimized, so skip further processing. | ||
| if re.search("\\.bolt\\.org\\.text", output, re.MULTILINE): | ||
| print(f"Skipping {input}, it's already instrumented") | ||
| else: | ||
| new_inputs.append(input) | ||
| new_instrumented_ouputs.append(instrumented_output) | ||
| return new_inputs, new_instrumented_ouputs | ||
|
|
||
|
|
||
| def bolt_optimize(args): | ||
| parser = argparse.ArgumentParser("%prog [options] ") | ||
| parser.add_argument("--method", choices=["INSTRUMENT", "PERF", "LBR"]) | ||
|
|
@@ -574,47 +591,67 @@ def bolt_optimize(args): | |
|
|
||
| opts = parser.parse_args(args) | ||
|
|
||
| output = subprocess.check_output( | ||
| [opts.readelf, "-WS", opts.input], universal_newlines=True | ||
| ) | ||
| inputs = opts.input.split(";") | ||
| instrumented_outputs = opts.instrumented_output.split(";") | ||
| assert len(inputs) == len( | ||
| instrumented_outputs | ||
| ), "inconsistent --input / --instrumented-output arguments" | ||
|
|
||
| # This binary has already been bolt-optimized, so skip further processing. | ||
| if re.search("\\.bolt\\.org\\.text", output, re.MULTILINE): | ||
| inputs, instrumented_outputs = filter_bolt_optimized(inputs, instrumented_outputs) | ||
| if not inputs: | ||
| return 0 | ||
|
|
||
| environ = os.environ.copy() | ||
| if opts.method == "INSTRUMENT": | ||
| process = subprocess.run( | ||
| [ | ||
| preloads = [] | ||
| for input, instrumented_output in zip(inputs, instrumented_outputs): | ||
| args = [ | ||
| opts.bolt, | ||
| opts.input, | ||
| input, | ||
| "-o", | ||
| opts.instrumented_output, | ||
| instrumented_output, | ||
| "-instrument", | ||
| "--instrumentation-file-append-pid", | ||
| f"--instrumentation-file={opts.fdata}", | ||
| ], | ||
| stdout=subprocess.PIPE, | ||
| stderr=subprocess.STDOUT, | ||
| text=True, | ||
| ) | ||
| ] | ||
| print("Running: " + " ".join(args)) | ||
| process = subprocess.run( | ||
| args, | ||
| stdout=subprocess.PIPE, | ||
| stderr=subprocess.STDOUT, | ||
| text=True, | ||
| ) | ||
|
|
||
| print(process.args) | ||
| for line in process.stdout: | ||
| sys.stdout.write(line) | ||
| process.check_returncode() | ||
| for line in process.stdout: | ||
| sys.stdout.write(line) | ||
| process.check_returncode() | ||
|
|
||
| output = subprocess.check_output( | ||
| [opts.readelf, "--file-header", input], universal_newlines=True | ||
| ) | ||
| if re.search(r"Type:\s*((Shared)|(DYN))", output): | ||
| # force using the instrumented version | ||
| preloads.append(instrumented_output) | ||
|
|
||
| if preloads: | ||
| print("Patching execution environment for dynamic library") | ||
| environ["LD_PRELOAD"] = os.pathsep.join(preloads) | ||
|
Comment on lines
+636
to
+638
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do we need this?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The instrumented binary is still linked to the non-instrumented libraries. |
||
|
|
||
| args = [ | ||
| sys.executable, | ||
| opts.lit, | ||
| "-v", | ||
| os.path.join(opts.perf_training_binary_dir, f"bolt-fdata"), | ||
| ] | ||
| print("Running: " + " ".join(args)) | ||
| process = subprocess.run( | ||
| [ | ||
| sys.executable, | ||
| opts.lit, | ||
| os.path.join(opts.perf_training_binary_dir, "bolt-fdata"), | ||
| ], | ||
| args, | ||
| stdout=subprocess.PIPE, | ||
| stderr=subprocess.STDOUT, | ||
| text=True, | ||
| env=environ, | ||
| ) | ||
|
|
||
| print(process.args) | ||
| for line in process.stdout: | ||
| sys.stdout.write(line) | ||
| process.check_returncode() | ||
|
|
@@ -624,14 +661,14 @@ def bolt_optimize(args): | |
|
|
||
| merge_fdata([opts.merge_fdata, opts.fdata, opts.perf_training_binary_dir]) | ||
|
|
||
| shutil.copy(opts.input, f"{opts.input}-prebolt") | ||
| for input in inputs: | ||
| shutil.copy(input, f"{input}-prebolt") | ||
|
|
||
| process = subprocess.run( | ||
| [ | ||
| args = [ | ||
| opts.bolt, | ||
| f"{opts.input}-prebolt", | ||
| f"{input}-prebolt", | ||
| "-o", | ||
| opts.input, | ||
| input, | ||
| "-data", | ||
| opts.fdata, | ||
| "-reorder-blocks=ext-tsp", | ||
|
|
@@ -643,16 +680,18 @@ def bolt_optimize(args): | |
| "-use-gnu-stack", | ||
| "-update-debug-sections", | ||
| "-nl" if opts.method == "PERF" else "", | ||
| ], | ||
| stdout=subprocess.PIPE, | ||
| stderr=subprocess.STDOUT, | ||
| text=True, | ||
| ) | ||
| ] | ||
| print("Running: " + " ".join(args)) | ||
| process = subprocess.run( | ||
| args, | ||
| stdout=subprocess.PIPE, | ||
| stderr=subprocess.STDOUT, | ||
| text=True, | ||
| ) | ||
|
|
||
| print(process.args) | ||
| for line in process.stdout: | ||
| sys.stdout.write(line) | ||
| process.check_returncode() | ||
| for line in process.stdout: | ||
| sys.stdout.write(line) | ||
| process.check_returncode() | ||
|
|
||
|
|
||
| commands = { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why condition this on CLANG_LINK_CLANG_DYLIB here ? Do we only want to do the optimization if clang is linking against the shared library? Wouldn't library consumers benefit from these optimizations even if clang is linked statically.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In theory yes, but in this scenario, we would need another executable linking with that library to run, otherwise we can't gather the runtime information from the instrumented library.