Skip to content

Commit 12a89a5

Browse files
committed
[clang][utils] Add auto mode to reduction script
The existing script allows us to run creduce/cvise automatically on clang reproducers. However, llvm-reduce is much more efficient for middle or backend issues. In the early stages, we can just determine which phase the crash happens, and choose the best tool. For now, we only support simple test scripts for opt and llc, but this should cover many more cases. This feature is behind a new flag `--auto`, and the default behavior of the script is otherwise unchanged.
1 parent 672672b commit 12a89a5

File tree

1 file changed

+189
-12
lines changed

1 file changed

+189
-12
lines changed

clang/utils/reduce-clang-crash.py

Lines changed: 189 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,22 @@
1818
import subprocess
1919
import shlex
2020
import tempfile
21-
import shutil
2221
import multiprocessing
22+
from enum import StrEnum, auto
23+
from typing import List, Optional
2324

2425
verbose = False
2526
creduce_cmd = None
2627
clang_cmd = None
28+
opt_cmd = None
29+
llc_cmd = None
30+
31+
32+
class FailureType(StrEnum):
33+
FrontEnd = auto()
34+
MiddleEnd = auto()
35+
BackEnd = auto()
36+
Unknown = auto()
2737

2838

2939
def verbose_print(*args, **kwargs):
@@ -70,6 +80,44 @@ def write_to_script(text, filename):
7080
os.chmod(filename, os.stat(filename).st_mode | stat.S_IEXEC)
7181

7282

83+
def extract_opt_level(args_list: List[str]) -> Optional[str]:
84+
"""
85+
Finds the last optimization flag (-O...) from a list of arguments.
86+
87+
Args:
88+
args_list: A list of string arguments passed to the compiler.
89+
90+
Returns:
91+
The last matching optimization flag string if found, otherwise None.
92+
"""
93+
valid_opt_flags = {"-O0", "-O1", "-O2", "-O3", "-Os", "-Oz", "-Og", "-Ofast"}
94+
95+
# Iterate in reverse to find the last occurrence
96+
for arg in reversed(args_list):
97+
if arg in valid_opt_flags:
98+
return arg
99+
return None
100+
101+
102+
def remove_first_line(file_path):
103+
"""
104+
Removes the first line from a specified file.
105+
"""
106+
try:
107+
with open(file_path, "r") as f:
108+
lines = f.readlines()
109+
110+
# If the file is not empty, write all lines except the first one back.
111+
if lines:
112+
with open(file_path, "w") as f:
113+
f.writelines(lines[1:])
114+
115+
except FileNotFoundError:
116+
print(f"Error: File '{file_path}' not found.")
117+
except Exception as e:
118+
print(f"An error occurred: {e}")
119+
120+
73121
class Reduce(object):
74122
def __init__(self, crash_script, file_to_reduce, creduce_flags):
75123
crash_script_name, crash_script_ext = os.path.splitext(crash_script)
@@ -85,6 +133,9 @@ def __init__(self, crash_script, file_to_reduce, creduce_flags):
85133
self.expected_output = []
86134
self.needs_stack_trace = False
87135
self.creduce_flags = ["--tidy"] + creduce_flags
136+
self.opt = opt_cmd
137+
self.llc = llc_cmd
138+
self.ir_file = "crash.ll"
88139

89140
self.read_clang_args(crash_script, file_to_reduce)
90141
self.read_expected_output()
@@ -186,22 +237,30 @@ def skip_function(func_name):
186237

187238
self.expected_output = result
188239

189-
def check_expected_output(self, args=None, filename=None):
240+
def check_expected_output(self, cmd=None, args=None, filename=None):
241+
if not cmd:
242+
cmd = self.clang
190243
if not args:
191244
args = self.clang_args
192245
if not filename:
193246
filename = self.file_to_reduce
194247

195248
p = subprocess.Popen(
196-
self.get_crash_cmd(args=args, filename=filename),
249+
self.get_crash_cmd(cmd=cmd, args=args, filename=filename),
197250
stdout=subprocess.PIPE,
198251
stderr=subprocess.STDOUT,
199252
)
200253
crash_output, _ = p.communicate()
201254
return all(msg in crash_output.decode("utf-8") for msg in self.expected_output)
202255

203-
def write_interestingness_test(self):
256+
def write_interestingness_test(self, cmd=None, use_llvm_reduce=False):
204257
print("\nCreating the interestingness test...")
258+
if not cmd:
259+
cmd = self.get_crash_cmd()
260+
261+
# llvm-reduce interestingness tests take the file as the first argument.
262+
# NOTE: this cannot be escaped by quote_cmd(), since it needs expansion.
263+
filename = '< "$1"' if use_llvm_reduce else ""
205264

206265
# Disable symbolization if it's not required to avoid slow symbolization.
207266
disable_symbolization = ""
@@ -210,32 +269,39 @@ def write_interestingness_test(self):
210269

211270
output = """#!/bin/bash
212271
%s
213-
if %s >& t.log ; then
272+
if %s %s >& t.log ; then
214273
exit 1
215274
fi
216275
""" % (
217276
disable_symbolization,
218-
quote_cmd(self.get_crash_cmd()),
277+
quote_cmd(cmd),
278+
filename,
219279
)
220280

221281
for msg in self.expected_output:
222282
output += "grep -F %s t.log || exit 1\n" % shlex.quote(msg)
223283

224284
write_to_script(output, self.testfile)
225-
self.check_interestingness()
285+
self.check_interestingness(cmd, use_llvm_reduce=use_llvm_reduce)
226286

227-
def check_interestingness(self):
228-
testfile = os.path.abspath(self.testfile)
287+
def check_interestingness(self, cmd, use_llvm_reduce=False):
288+
test_cmd = [os.path.abspath(self.testfile)]
229289

290+
# llvm-reduce interestingness tests take the file as the first arg.
291+
if use_llvm_reduce:
292+
test_cmd += [self.ir_file]
230293
# Check that the test considers the original file interesting
231-
returncode = subprocess.call(testfile, stdout=subprocess.DEVNULL)
232-
if returncode:
294+
result = subprocess.run(args=test_cmd, stdout=subprocess.DEVNULL)
295+
if result.returncode:
233296
sys.exit("The interestingness test does not pass for the original file.")
234297

235298
# Check that an empty file is not interesting
236299
# Instead of modifying the filename in the test file, just run the command
237300
with tempfile.NamedTemporaryFile() as empty_file:
238-
is_interesting = self.check_expected_output(filename=empty_file.name)
301+
new_args = cmd[1:] if use_llvm_reduce else cmd[1:-1]
302+
is_interesting = self.check_expected_output(
303+
cmd=cmd[0], args=new_args, filename=empty_file.name
304+
)
239305
if is_interesting:
240306
sys.exit("The interestingness test passes for an empty file.")
241307

@@ -424,11 +490,76 @@ def run_creduce(self):
424490
print("\n\nctrl-c detected, killed reduction tool")
425491
p.kill()
426492

493+
def run_llvm_reduce(self):
494+
full_llvm_reduce_cmd = [
495+
llvm_reduce_cmd,
496+
f"--test={self.testfile}",
497+
self.ir_file,
498+
]
499+
print("\nRunning llvm-reduce tool...")
500+
verbose_print(quote_cmd(full_llvm_reduce_cmd))
501+
try:
502+
p = subprocess.Popen(full_llvm_reduce_cmd)
503+
p.communicate()
504+
except KeyboardInterrupt:
505+
# Hack to kill C-Reduce because it jumps into its own pgid
506+
print("\n\nctrl-c detected, killed reduction tool")
507+
p.kill()
508+
509+
def classify_crash(self) -> FailureType:
510+
print("classifying crash ...")
511+
if self.check_expected_output(args=self.clang_args + ["-fsyntax-only"]):
512+
print("Found Frontend Crash")
513+
return FailureType.FrontEnd
514+
515+
print("Found Middle/Backend failure")
516+
args = self.clang_args + [
517+
"-mllvm",
518+
"--print-on-crash",
519+
"-mllvm",
520+
f"--print-on-crash-path={self.ir_file}",
521+
"-mllvm",
522+
"--print-module-scope",
523+
]
524+
525+
if not self.check_expected_output(args=args):
526+
sys.exit("The interestingness test does not pass with '--print-on-crash'.")
527+
528+
# The output from --print-on-crash has an invalid first line (pass name).
529+
remove_first_line(self.ir_file)
530+
531+
self.opt_level = extract_opt_level(self.clang_args) or "-O2"
532+
533+
if self.check_expected_output(
534+
cmd=self.opt,
535+
args=[self.opt_level, "-disable-output"],
536+
filename=self.ir_file,
537+
):
538+
print("Found MiddleEnd Crash")
539+
return FailureType.MiddleEnd
540+
if self.check_expected_output(
541+
cmd=self.llc, args=[self.opt_level], filename=self.ir_file
542+
):
543+
print("Found BackEnd Crash")
544+
return FailureType.BackEnd
545+
print("Found Unknow Crash Type. Falling back to creduce")
546+
return FailureType.Unknown
547+
548+
def reduce_ir_crash(self, new_cmd: List[str]):
549+
print("Writing interestingness test...")
550+
self.write_interestingness_test(cmd=new_cmd, use_llvm_reduce=True)
551+
print("Starting llvm-reduce with llc test case")
552+
self.run_llvm_reduce()
553+
print("Done Reducing IR file.")
554+
427555

428556
def main():
429557
global verbose
430558
global creduce_cmd
559+
global llvm_reduce_cmd
431560
global clang_cmd
561+
global opt_cmd
562+
global llc_cmd
432563

433564
parser = ArgumentParser(description=__doc__, formatter_class=RawTextHelpFormatter)
434565
parser.add_argument(
@@ -450,20 +581,50 @@ def main():
450581
help="The path to the `clang` executable. "
451582
"By default uses the llvm-bin directory.",
452583
)
584+
parser.add_argument(
585+
"--opt",
586+
dest="opt",
587+
type=str,
588+
help="The path to the `opt` executable. "
589+
"By default uses the llvm-bin directory.",
590+
)
591+
parser.add_argument(
592+
"--llc",
593+
dest="llc",
594+
type=str,
595+
help="The path to the `llc` executable. "
596+
"By default uses the llvm-bin directory.",
597+
)
453598
parser.add_argument(
454599
"--creduce",
455600
dest="creduce",
456601
type=str,
457602
help="The path to the `creduce` or `cvise` executable. "
458603
"Required if neither `creduce` nor `cvise` are on PATH.",
459604
)
605+
parser.add_argument(
606+
"--llvm-reduce",
607+
dest="llvm_reduce",
608+
type=str,
609+
help="The path to the `llvm-reduce` executable. "
610+
"By default uses the llvm-bin directory.",
611+
)
612+
parser.add_argument(
613+
"--auto",
614+
action="store_true",
615+
help="Use auto reduction mode, that uses `creduce`/`cvise`"
616+
"for frontend crashes and llvm-reduce for middle/backend crashes.",
617+
)
460618
parser.add_argument("-v", "--verbose", action="store_true")
461619
args, creduce_flags = parser.parse_known_args()
462620
verbose = args.verbose
463621
llvm_bin = os.path.abspath(args.llvm_bin) if args.llvm_bin else None
464622
creduce_cmd = check_cmd("creduce", None, args.creduce)
465623
creduce_cmd = check_cmd("cvise", None, args.creduce)
624+
llvm_reduce_cmd = check_cmd("llvm-reduce", llvm_bin, args.llvm_reduce)
466625
clang_cmd = check_cmd("clang", llvm_bin, args.clang)
626+
opt_cmd = check_cmd("opt", llvm_bin, args.opt)
627+
llc_cmd = check_cmd("llc", llvm_bin, args.llc)
467628

468629
crash_script = check_file(args.crash_script[0])
469630
file_to_reduce = check_file(args.file_to_reduce[0])
@@ -472,6 +633,22 @@ def main():
472633
creduce_flags += ["--n", str(max(4, multiprocessing.cpu_count() // 2))]
473634

474635
r = Reduce(crash_script, file_to_reduce, creduce_flags)
636+
if args.auto:
637+
crash_type = r.classify_crash()
638+
match crash_type:
639+
case FailureType.FrontEnd | FailureType.Unknown:
640+
print("Starting reduction with creduce/cvise")
641+
pass
642+
case FailureType.MiddleEnd:
643+
# TODO: parse the exact pass from the backtrace and set the
644+
# pass pipeline directly.
645+
new_cmd = [opt_cmd, "-disable-output", r.opt_level]
646+
r.reduce_ir_crash(new_cmd)
647+
return
648+
case FailureType.BackEnd:
649+
new_cmd = [llc_cmd, r.opt_level]
650+
r.reduce_ir_crash(new_cmd)
651+
return
475652

476653
r.simplify_clang_args()
477654
r.write_interestingness_test()

0 commit comments

Comments
 (0)