-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[clang][utils] Add auto mode to reduction script #163282
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 3 commits
12a89a5
cac2faa
81d4fea
f166d35
7cd989e
ac41dd7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,12 +18,22 @@ | |
| import subprocess | ||
| import shlex | ||
| import tempfile | ||
| import shutil | ||
| import multiprocessing | ||
| from enum import StrEnum, auto | ||
| from typing import List, Optional | ||
|
|
||
| verbose = False | ||
| creduce_cmd = None | ||
| clang_cmd = None | ||
| opt_cmd = None | ||
| llc_cmd = None | ||
|
|
||
|
|
||
| class FailureType(StrEnum): | ||
| FrontEnd = auto() | ||
| MiddleEnd = auto() | ||
| BackEnd = auto() | ||
| Unknown = auto() | ||
|
|
||
|
|
||
| def verbose_print(*args, **kwargs): | ||
|
|
@@ -70,6 +80,44 @@ def write_to_script(text, filename): | |
| os.chmod(filename, os.stat(filename).st_mode | stat.S_IEXEC) | ||
|
|
||
|
|
||
| def extract_opt_level(args_list: List[str]) -> Optional[str]: | ||
| """ | ||
| Finds the last optimization flag (-O...) from a list of arguments. | ||
|
|
||
| Args: | ||
| args_list: A list of string arguments passed to the compiler. | ||
|
|
||
| Returns: | ||
| The last matching optimization flag string if found, otherwise None. | ||
| """ | ||
| valid_opt_flags = {"-O0", "-O1", "-O2", "-O3", "-Os", "-Oz", "-Og", "-Ofast"} | ||
|
|
||
| # Iterate in reverse to find the last occurrence | ||
| for arg in reversed(args_list): | ||
| if arg in valid_opt_flags: | ||
| return arg | ||
| return None | ||
|
|
||
|
|
||
| def remove_first_line(file_path): | ||
| """ | ||
| Removes the first line from a specified file. | ||
| """ | ||
| try: | ||
| with open(file_path, "r") as f: | ||
| lines = f.readlines() | ||
|
|
||
| # If the file is not empty, write all lines except the first one back. | ||
| if lines: | ||
| with open(file_path, "w") as f: | ||
| f.writelines(lines[1:]) | ||
|
|
||
| except FileNotFoundError: | ||
| print(f"Error: File '{file_path}' not found.") | ||
| except Exception as e: | ||
| print(f"An error occurred: {e}") | ||
|
|
||
|
|
||
| class Reduce(object): | ||
| def __init__(self, crash_script, file_to_reduce, creduce_flags): | ||
| crash_script_name, crash_script_ext = os.path.splitext(crash_script) | ||
|
|
@@ -85,6 +133,9 @@ def __init__(self, crash_script, file_to_reduce, creduce_flags): | |
| self.expected_output = [] | ||
| self.needs_stack_trace = False | ||
| self.creduce_flags = ["--tidy"] + creduce_flags | ||
| self.opt = opt_cmd | ||
| self.llc = llc_cmd | ||
| self.ir_file = "crash.ll" | ||
|
|
||
| self.read_clang_args(crash_script, file_to_reduce) | ||
| self.read_expected_output() | ||
|
|
@@ -186,22 +237,30 @@ def skip_function(func_name): | |
|
|
||
| self.expected_output = result | ||
|
|
||
| def check_expected_output(self, args=None, filename=None): | ||
| def check_expected_output(self, cmd=None, args=None, filename=None): | ||
| if not cmd: | ||
|
||
| cmd = self.clang | ||
| if not args: | ||
| args = self.clang_args | ||
| if not filename: | ||
| filename = self.file_to_reduce | ||
|
|
||
| p = subprocess.Popen( | ||
| self.get_crash_cmd(args=args, filename=filename), | ||
| self.get_crash_cmd(cmd=cmd, args=args, filename=filename), | ||
| stdout=subprocess.PIPE, | ||
| stderr=subprocess.STDOUT, | ||
| ) | ||
| crash_output, _ = p.communicate() | ||
| return all(msg in crash_output.decode("utf-8") for msg in self.expected_output) | ||
|
|
||
| def write_interestingness_test(self): | ||
| def write_interestingness_test(self, cmd=None, use_llvm_reduce=False): | ||
| print("\nCreating the interestingness test...") | ||
| if not cmd: | ||
| cmd = self.get_crash_cmd() | ||
|
|
||
| # llvm-reduce interestingness tests take the file as the first argument. | ||
| # NOTE: this cannot be escaped by quote_cmd(), since it needs expansion. | ||
| filename = '< "$1"' if use_llvm_reduce else "" | ||
|
|
||
| # Disable symbolization if it's not required to avoid slow symbolization. | ||
| disable_symbolization = "" | ||
|
|
@@ -210,32 +269,39 @@ def write_interestingness_test(self): | |
|
|
||
| output = """#!/bin/bash | ||
| %s | ||
| if %s >& t.log ; then | ||
| if %s %s >& t.log ; then | ||
| exit 1 | ||
| fi | ||
| """ % ( | ||
| disable_symbolization, | ||
| quote_cmd(self.get_crash_cmd()), | ||
| quote_cmd(cmd), | ||
| filename, | ||
| ) | ||
|
|
||
| for msg in self.expected_output: | ||
| output += "grep -F %s t.log || exit 1\n" % shlex.quote(msg) | ||
|
|
||
| write_to_script(output, self.testfile) | ||
| self.check_interestingness() | ||
| self.check_interestingness(cmd, use_llvm_reduce=use_llvm_reduce) | ||
|
|
||
| def check_interestingness(self): | ||
| testfile = os.path.abspath(self.testfile) | ||
| def check_interestingness(self, cmd, use_llvm_reduce=False): | ||
| test_cmd = [os.path.abspath(self.testfile)] | ||
|
|
||
| # llvm-reduce interestingness tests take the file as the first arg. | ||
| if use_llvm_reduce: | ||
| test_cmd += [self.ir_file] | ||
| # Check that the test considers the original file interesting | ||
| returncode = subprocess.call(testfile, stdout=subprocess.DEVNULL) | ||
| if returncode: | ||
| result = subprocess.run(args=test_cmd, stdout=subprocess.DEVNULL) | ||
| if result.returncode: | ||
| sys.exit("The interestingness test does not pass for the original file.") | ||
|
|
||
| # Check that an empty file is not interesting | ||
| # Instead of modifying the filename in the test file, just run the command | ||
| with tempfile.NamedTemporaryFile() as empty_file: | ||
| is_interesting = self.check_expected_output(filename=empty_file.name) | ||
| new_args = cmd[1:] if use_llvm_reduce else cmd[1:-1] | ||
| is_interesting = self.check_expected_output( | ||
| cmd=cmd[0], args=new_args, filename=empty_file.name | ||
| ) | ||
| if is_interesting: | ||
| sys.exit("The interestingness test passes for an empty file.") | ||
|
|
||
|
|
@@ -424,11 +490,129 @@ def run_creduce(self): | |
| print("\n\nctrl-c detected, killed reduction tool") | ||
| p.kill() | ||
|
|
||
| def run_llvm_reduce(self): | ||
| full_llvm_reduce_cmd = [ | ||
| llvm_reduce_cmd, | ||
| f"--test={self.testfile}", | ||
| self.ir_file, | ||
| ] | ||
| print("\nRunning llvm-reduce tool...") | ||
| verbose_print(quote_cmd(full_llvm_reduce_cmd)) | ||
| try: | ||
| p = subprocess.Popen(full_llvm_reduce_cmd) | ||
| p.communicate() | ||
| except KeyboardInterrupt: | ||
| print("\n\nctrl-c detected, killed reduction tool") | ||
aeubanks marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| p.kill() | ||
|
|
||
| def classify_crash(self) -> FailureType: | ||
| print("Classifying crash ...") | ||
| if self.check_expected_output(args=self.clang_args + ["-fsyntax-only"]): | ||
| print("Found Frontend Crash") | ||
| return FailureType.FrontEnd | ||
|
|
||
| print("Found Middle/Backend failure") | ||
|
|
||
| self.opt_level = extract_opt_level(self.clang_args) or "-O2" | ||
| backend_result = self.check_backend() | ||
| if backend_result == FailureType.BackEnd: | ||
| return backend_result | ||
|
|
||
| # Try running w/ -emit-llvm to generate an IR file, | ||
|
||
| # if it succeeds we have a backend failure, and can use llc. | ||
| if not self.check_expected_output( | ||
| args=self.clang_args + ["-emit-llvm", "-o", self.ir_file] | ||
| ): | ||
| print("Checking llc for failure") | ||
| if self.check_expected_output( | ||
| cmd=self.llc, | ||
| args=[self.opt_level, "-filetype=obj"], | ||
| filename=self.ir_file, | ||
| ): | ||
| print("Found BackEnd Crash") | ||
| return FailureType.BackEnd | ||
| elif os.path.exists(self.ir_file): | ||
| # clean up the IR file if we generated it, but won't use it. | ||
| print(f"clean up {self.ir_file}, since we can't repro w/ llc") | ||
| os.remove(self.ir_file) | ||
|
|
||
| # The IR file will be in 'self.ir_file' | ||
| self.extract_crashing_ir() | ||
| return self.check_middle_end() | ||
|
|
||
| def check_llc_failure(self) -> bool: | ||
| return self.check_expected_output( | ||
| cmd=self.llc, args=[self.opt_level, "-filetype=obj"], filename=self.ir_file | ||
| ) | ||
|
|
||
| def extract_backend_ir(self) -> bool: | ||
| return not self.check_expected_output( | ||
| args=self.clang_args + ["-emit-llvm", "-o", self.ir_file] | ||
| ) | ||
|
|
||
| def check_backend(self) -> Optional[FailureType]: | ||
| # Try running w/ -emit-llvm to generate an IR file, | ||
| # if it succeeds we have a backend failure, and can use llc. | ||
| if self.extract_backend_ir(): | ||
| print("Checking llc for failure") | ||
| if self.check_llc_failure(): | ||
| print("Found BackEnd Crash") | ||
| return FailureType.BackEnd | ||
| elif os.path.exists(self.ir_file): | ||
| # clean up the IR file if we generated it, but won't use it. | ||
| print(f"clean up {self.ir_file}, since we can't repro w/ llc") | ||
| os.remove(self.ir_file) | ||
|
|
||
| def extract_crashing_ir(self): | ||
| """ | ||
| Extract IR just before the crash using --print-on-crash | ||
| """ | ||
| args = self.clang_args + [ | ||
| "-mllvm", | ||
| "--print-on-crash", | ||
|
||
| "-mllvm", | ||
| f"--print-on-crash-path={self.ir_file}", | ||
| "-mllvm", | ||
| "--print-module-scope", | ||
| ] | ||
|
|
||
| if not self.check_expected_output(args=args): | ||
| sys.exit("The interestingness test does not pass with '--print-on-crash'.") | ||
|
|
||
| # The output from --print-on-crash has an invalid first line (pass name). | ||
| remove_first_line(self.ir_file) | ||
|
||
|
|
||
| def check_middle_end(self) -> FailureType: | ||
| # TODO: parse the exact pass from the backtrace and set the pass | ||
| # pipeline directly via -passes="...". | ||
| print("Checking opt for failure") | ||
| if self.check_expected_output( | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. running the entire pipeline on the IR at the point of the crash isn't precisely matching what the crashing pass would see since we're running all the passes up to that point twice ideally we'd go through a couple of steps:
and similarly for llc reduction it should run on the output post-optimization ( if you want to push this off to another PR, please add a TODO
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, I'd like to have something more sophisticated. I have a TODO around line 640, but maybe that's better here. The backtrace seems to contain the exact pass spelling that things crash on, so I think it will be not that hard to adapt, but I wanted something we could iterate on w/o worrying about every detail. But, I agree there's lots of room for improvement here.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. iiuc
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. let me double check, but I think opt definitly still crashes here (at least in my test case). it certainly was in my earlier testing (and I'm pretty sure I confirmed it after I refactored this yesterday). but I'd actually like to get the precise pass from the backtrace and use that to just craft the This is the backtrace I'm seeing locally. I'm not 100% on if any of this is gated on a specific cmake flag we set in a typical build of our toolchain, but having the pass string printed seems like a really easy way to get it to reproduce if we get the IR from -print-on-crash.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i've added a fallback to just extracting the unmodified IR instead of trying to get it just before the crash w/ I tried things w/ a few more cases from the issue tracker, and this seemed to work more reliably/faster. |
||
| cmd=self.opt, | ||
| args=[self.opt_level, "-disable-output"], | ||
| filename=self.ir_file, | ||
| ): | ||
| print("Found MiddleEnd Crash") | ||
| return FailureType.MiddleEnd | ||
| print( | ||
| "Could not automatically detect crash type, falling back to creduce/cvise." | ||
| ) | ||
| return FailureType.Unknown | ||
|
|
||
| def reduce_ir_crash(self, new_cmd: List[str]): | ||
| print("Writing interestingness test...") | ||
| self.write_interestingness_test(cmd=new_cmd, use_llvm_reduce=True) | ||
| print("Starting llvm-reduce with llc test case") | ||
| self.run_llvm_reduce() | ||
| print("Done Reducing IR file.") | ||
|
|
||
|
|
||
| def main(): | ||
| global verbose | ||
| global creduce_cmd | ||
| global llvm_reduce_cmd | ||
| global clang_cmd | ||
| global opt_cmd | ||
| global llc_cmd | ||
|
|
||
| parser = ArgumentParser(description=__doc__, formatter_class=RawTextHelpFormatter) | ||
| parser.add_argument( | ||
|
|
@@ -450,20 +634,50 @@ def main(): | |
| help="The path to the `clang` executable. " | ||
| "By default uses the llvm-bin directory.", | ||
| ) | ||
| parser.add_argument( | ||
| "--opt", | ||
| dest="opt", | ||
| type=str, | ||
| help="The path to the `opt` executable. " | ||
| "By default uses the llvm-bin directory.", | ||
| ) | ||
| parser.add_argument( | ||
| "--llc", | ||
| dest="llc", | ||
| type=str, | ||
| help="The path to the `llc` executable. " | ||
| "By default uses the llvm-bin directory.", | ||
| ) | ||
| parser.add_argument( | ||
| "--creduce", | ||
| dest="creduce", | ||
| type=str, | ||
| help="The path to the `creduce` or `cvise` executable. " | ||
| "Required if neither `creduce` nor `cvise` are on PATH.", | ||
| ) | ||
| parser.add_argument( | ||
| "--llvm-reduce", | ||
| dest="llvm_reduce", | ||
| type=str, | ||
| help="The path to the `llvm-reduce` executable. " | ||
| "By default uses the llvm-bin directory.", | ||
| ) | ||
| parser.add_argument( | ||
| "--auto", | ||
| action="store_true", | ||
| help="Use auto reduction mode, that uses `creduce`/`cvise`" | ||
| "for frontend crashes and llvm-reduce for middle/backend crashes.", | ||
| ) | ||
| parser.add_argument("-v", "--verbose", action="store_true") | ||
| args, creduce_flags = parser.parse_known_args() | ||
| verbose = args.verbose | ||
| llvm_bin = os.path.abspath(args.llvm_bin) if args.llvm_bin else None | ||
| creduce_cmd = check_cmd("creduce", None, args.creduce) | ||
| creduce_cmd = check_cmd("cvise", None, args.creduce) | ||
| llvm_reduce_cmd = check_cmd("llvm-reduce", llvm_bin, args.llvm_reduce) | ||
| clang_cmd = check_cmd("clang", llvm_bin, args.clang) | ||
| opt_cmd = check_cmd("opt", llvm_bin, args.opt) | ||
| llc_cmd = check_cmd("llc", llvm_bin, args.llc) | ||
|
|
||
| crash_script = check_file(args.crash_script[0]) | ||
| file_to_reduce = check_file(args.file_to_reduce[0]) | ||
|
|
@@ -472,6 +686,20 @@ def main(): | |
| creduce_flags += ["--n", str(max(4, multiprocessing.cpu_count() // 2))] | ||
|
|
||
| r = Reduce(crash_script, file_to_reduce, creduce_flags) | ||
| if args.auto: | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe we only try to reduce in opt/llc if those binaries are provided to us, rather than on the
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wanted to gate everything behind a flag to start and be sure the default flow wasn't changed. Happy to modify the mechanism if we're by and large happy w/ the direction of this patch.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If you're OK w/ it, I'd like to keep the |
||
| crash_type = r.classify_crash() | ||
| match crash_type: | ||
| case FailureType.FrontEnd | FailureType.Unknown: | ||
| print("Starting reduction with creduce/cvise") | ||
| pass | ||
| case FailureType.MiddleEnd: | ||
| new_cmd = [opt_cmd, "-disable-output", r.opt_level] | ||
| r.reduce_ir_crash(new_cmd) | ||
| return | ||
| case FailureType.BackEnd: | ||
| new_cmd = [llc_cmd, "-filetype=obj", r.opt_level] | ||
| r.reduce_ir_crash(new_cmd) | ||
| return | ||
|
|
||
| r.simplify_clang_args() | ||
| r.write_interestingness_test() | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
llc/opt won't recognize -Og/-Ofast
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
crap, so much for simple.