123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412 |
- #!/usr/bin/env python
- """Calls C-Reduce to create a minimal reproducer for clang crashes.
- Output files:
- *.reduced.sh -- crash reproducer with minimal arguments
- *.reduced.cpp -- the reduced file
- *.test.sh -- interestingness test for C-Reduce
- """
- from __future__ import print_function
- from argparse import ArgumentParser, RawTextHelpFormatter
- import os
- import re
- import stat
- import sys
- import subprocess
- import pipes
- import shlex
- import tempfile
- import shutil
- from distutils.spawn import find_executable
- verbose = False
- creduce_cmd = None
- clang_cmd = None
- not_cmd = None
- def verbose_print(*args, **kwargs):
- if verbose:
- print(*args, **kwargs)
- def check_file(fname):
- if not os.path.isfile(fname):
- sys.exit("ERROR: %s does not exist" % (fname))
- return fname
- def check_cmd(cmd_name, cmd_dir, cmd_path=None):
- """
- Returns absolute path to cmd_path if it is given,
- or absolute path to cmd_dir/cmd_name.
- """
- if cmd_path:
- cmd = find_executable(cmd_path)
- if cmd:
- return cmd
- sys.exit("ERROR: executable `%s` not found" % (cmd_path))
- cmd = find_executable(cmd_name, path=cmd_dir)
- if cmd:
- return cmd
- if not cmd_dir:
- cmd_dir = "$PATH"
- sys.exit("ERROR: `%s` not found in %s" % (cmd_name, cmd_dir))
- def quote_cmd(cmd):
- return ' '.join(pipes.quote(arg) for arg in cmd)
- def write_to_script(text, filename):
- with open(filename, 'w') as f:
- f.write(text)
- os.chmod(filename, os.stat(filename).st_mode | stat.S_IEXEC)
- class Reduce(object):
- def __init__(self, crash_script, file_to_reduce):
- crash_script_name, crash_script_ext = os.path.splitext(crash_script)
- file_reduce_name, file_reduce_ext = os.path.splitext(file_to_reduce)
- self.testfile = file_reduce_name + '.test.sh'
- self.crash_script = crash_script_name + '.reduced' + crash_script_ext
- self.file_to_reduce = file_reduce_name + '.reduced' + file_reduce_ext
- shutil.copy(file_to_reduce, self.file_to_reduce)
- self.clang = clang_cmd
- self.clang_args = []
- self.expected_output = []
- self.is_crash = True
- self.creduce_flags = ["--tidy"]
- self.read_clang_args(crash_script, file_to_reduce)
- self.read_expected_output()
- def get_crash_cmd(self, cmd=None, args=None, filename=None):
- if not cmd:
- cmd = self.clang
- if not args:
- args = self.clang_args
- if not filename:
- filename = self.file_to_reduce
- return [cmd] + args + [filename]
- def read_clang_args(self, crash_script, filename):
- print("\nReading arguments from crash script...")
- with open(crash_script) as f:
- # Assume clang call is the first non comment line.
- cmd = []
- for line in f:
- if not line.lstrip().startswith('#'):
- cmd = shlex.split(line)
- break
- if not cmd:
- sys.exit("Could not find command in the crash script.");
- # Remove clang and filename from the command
- # Assume the last occurrence of the filename is the clang input file
- del cmd[0]
- for i in range(len(cmd)-1, -1, -1):
- if cmd[i] == filename:
- del cmd[i]
- break
- self.clang_args = cmd
- verbose_print("Clang arguments:", quote_cmd(self.clang_args))
- def read_expected_output(self):
- print("\nGetting expected crash output...")
- p = subprocess.Popen(self.get_crash_cmd(),
- stdout=subprocess.PIPE,
- stderr=subprocess.STDOUT)
- crash_output, _ = p.communicate()
- result = []
- # Remove color codes
- ansi_escape = r'\x1b\[[0-?]*m'
- crash_output = re.sub(ansi_escape, '', crash_output.decode('utf-8'))
- # Look for specific error messages
- regexes = [r"Assertion `(.+)' failed", # Linux assert()
- r"Assertion failed: (.+),", # FreeBSD/Mac assert()
- r"fatal error: error in backend: (.+)",
- r"LLVM ERROR: (.+)",
- r"UNREACHABLE executed (at .+)?!",
- r"LLVM IR generation of ceclaration '(.+)'",
- r"Generating code for declaration '(.+)'",
- r"\*\*\* Bad machine code: (.+) \*\*\*"]
- for msg_re in regexes:
- match = re.search(msg_re, crash_output)
- if match:
- msg = match.group(1)
- result = [msg]
- print("Found message:", msg)
- if "fatal error:" in msg_re:
- self.is_crash = False
- break
- # If no message was found, use the top five stack trace functions,
- # ignoring some common functions
- # Five is a somewhat arbitrary number; the goal is to get a small number
- # of identifying functions with some leeway for common functions
- if not result:
- stacktrace_re = r'[0-9]+\s+0[xX][0-9a-fA-F]+\s*([^(]+)\('
- filters = ["PrintStackTraceSignalHandler",
- "llvm::sys::RunSignalHandlers",
- "SignalHandler", "__restore_rt", "gsignal", "abort"]
- matches = re.findall(stacktrace_re, crash_output)
- result = [x for x in matches if x and x.strip() not in filters][:5]
- for msg in result:
- print("Found stack trace function:", msg)
- if not result:
- print("ERROR: no crash was found")
- print("The crash output was:\n========\n%s========" % crash_output)
- sys.exit(1)
- self.expected_output = result
- def check_expected_output(self, args=None, filename=None):
- if not args:
- args = self.clang_args
- if not filename:
- filename = self.file_to_reduce
- p = subprocess.Popen(self.get_crash_cmd(args=args, filename=filename),
- stdout=subprocess.PIPE,
- stderr=subprocess.STDOUT)
- crash_output, _ = p.communicate()
- return all(msg in crash_output.decode('utf-8') for msg in
- self.expected_output)
- def write_interestingness_test(self):
- print("\nCreating the interestingness test...")
- crash_flag = "--crash" if self.is_crash else ""
- output = "#!/bin/bash\n%s %s %s >& t.log || exit 1\n" % \
- (pipes.quote(not_cmd), crash_flag, quote_cmd(self.get_crash_cmd()))
- for msg in self.expected_output:
- output += 'grep -F %s t.log || exit 1\n' % pipes.quote(msg)
- write_to_script(output, self.testfile)
- self.check_interestingness()
- def check_interestingness(self):
- testfile = os.path.abspath(self.testfile)
- # Check that the test considers the original file interesting
- with open(os.devnull, 'w') as devnull:
- returncode = subprocess.call(testfile, stdout=devnull)
- if returncode:
- sys.exit("The interestingness test does not pass for the original file.")
- # Check that an empty file is not interesting
- # Instead of modifying the filename in the test file, just run the command
- with tempfile.NamedTemporaryFile() as empty_file:
- is_interesting = self.check_expected_output(filename=empty_file.name)
- if is_interesting:
- sys.exit("The interestingness test passes for an empty file.")
- def clang_preprocess(self):
- print("\nTrying to preprocess the source file...")
- with tempfile.NamedTemporaryFile() as tmpfile:
- cmd_preprocess = self.get_crash_cmd() + ['-E', '-o', tmpfile.name]
- cmd_preprocess_no_lines = cmd_preprocess + ['-P']
- try:
- subprocess.check_call(cmd_preprocess_no_lines)
- if self.check_expected_output(filename=tmpfile.name):
- print("Successfully preprocessed with line markers removed")
- shutil.copy(tmpfile.name, self.file_to_reduce)
- else:
- subprocess.check_call(cmd_preprocess)
- if self.check_expected_output(filename=tmpfile.name):
- print("Successfully preprocessed without removing line markers")
- shutil.copy(tmpfile.name, self.file_to_reduce)
- else:
- print("No longer crashes after preprocessing -- "
- "using original source")
- except subprocess.CalledProcessError:
- print("Preprocessing failed")
- @staticmethod
- def filter_args(args, opts_equal=[], opts_startswith=[],
- opts_one_arg_startswith=[]):
- result = []
- skip_next = False
- for arg in args:
- if skip_next:
- skip_next = False
- continue
- if any(arg == a for a in opts_equal):
- continue
- if any(arg.startswith(a) for a in opts_startswith):
- continue
- if any(arg.startswith(a) for a in opts_one_arg_startswith):
- skip_next = True
- continue
- result.append(arg)
- return result
- def try_remove_args(self, args, msg=None, extra_arg=None, **kwargs):
- new_args = self.filter_args(args, **kwargs)
- if extra_arg:
- if extra_arg in new_args:
- new_args.remove(extra_arg)
- new_args.append(extra_arg)
- if (new_args != args and
- self.check_expected_output(args=new_args)):
- if msg:
- verbose_print(msg)
- return new_args
- return args
- def try_remove_arg_by_index(self, args, index):
- new_args = args[:index] + args[index+1:]
- removed_arg = args[index]
- # Heuristic for grouping arguments:
- # remove next argument if it doesn't start with "-"
- if index < len(new_args) and not new_args[index].startswith('-'):
- del new_args[index]
- removed_arg += ' ' + args[index+1]
- if self.check_expected_output(args=new_args):
- verbose_print("Removed", removed_arg)
- return new_args, index
- return args, index+1
- def simplify_clang_args(self):
- """Simplify clang arguments before running C-Reduce to reduce the time the
- interestingness test takes to run.
- """
- print("\nSimplifying the clang command...")
- # Remove some clang arguments to speed up the interestingness test
- new_args = self.clang_args
- new_args = self.try_remove_args(new_args,
- msg="Removed debug info options",
- opts_startswith=["-gcodeview",
- "-debug-info-kind=",
- "-debugger-tuning="])
- new_args = self.try_remove_args(new_args,
- msg="Removed --show-includes",
- opts_startswith=["--show-includes"])
- # Not suppressing warnings (-w) sometimes prevents the crash from occurring
- # after preprocessing
- new_args = self.try_remove_args(new_args,
- msg="Replaced -W options with -w",
- extra_arg='-w',
- opts_startswith=["-W"])
- new_args = self.try_remove_args(new_args,
- msg="Replaced optimization level with -O0",
- extra_arg="-O0",
- opts_startswith=["-O"])
- # Try to remove compilation steps
- new_args = self.try_remove_args(new_args, msg="Added -emit-llvm",
- extra_arg="-emit-llvm")
- new_args = self.try_remove_args(new_args, msg="Added -fsyntax-only",
- extra_arg="-fsyntax-only")
- # Try to make implicit int an error for more sensible test output
- new_args = self.try_remove_args(new_args, msg="Added -Werror=implicit-int",
- opts_equal=["-w"],
- extra_arg="-Werror=implicit-int")
- self.clang_args = new_args
- verbose_print("Simplified command:", quote_cmd(self.get_crash_cmd()))
- def reduce_clang_args(self):
- """Minimize the clang arguments after running C-Reduce, to get the smallest
- command that reproduces the crash on the reduced file.
- """
- print("\nReducing the clang crash command...")
- new_args = self.clang_args
- # Remove some often occurring args
- new_args = self.try_remove_args(new_args, msg="Removed -D options",
- opts_startswith=["-D"])
- new_args = self.try_remove_args(new_args, msg="Removed -D options",
- opts_one_arg_startswith=["-D"])
- new_args = self.try_remove_args(new_args, msg="Removed -I options",
- opts_startswith=["-I"])
- new_args = self.try_remove_args(new_args, msg="Removed -I options",
- opts_one_arg_startswith=["-I"])
- new_args = self.try_remove_args(new_args, msg="Removed -W options",
- opts_startswith=["-W"])
- # Remove other cases that aren't covered by the heuristic
- new_args = self.try_remove_args(new_args, msg="Removed -mllvm",
- opts_one_arg_startswith=["-mllvm"])
- i = 0
- while i < len(new_args):
- new_args, i = self.try_remove_arg_by_index(new_args, i)
- self.clang_args = new_args
- reduced_cmd = quote_cmd(self.get_crash_cmd())
- write_to_script(reduced_cmd, self.crash_script)
- print("Reduced command:", reduced_cmd)
- def run_creduce(self):
- print("\nRunning C-Reduce...")
- try:
- p = subprocess.Popen([creduce_cmd] + self.creduce_flags +
- [self.testfile, self.file_to_reduce])
- p.communicate()
- except KeyboardInterrupt:
- # Hack to kill C-Reduce because it jumps into its own pgid
- print('\n\nctrl-c detected, killed creduce')
- p.kill()
- def main():
- global verbose
- global creduce_cmd
- global clang_cmd
- global not_cmd
- parser = ArgumentParser(description=__doc__,
- formatter_class=RawTextHelpFormatter)
- parser.add_argument('crash_script', type=str, nargs=1,
- help="Name of the script that generates the crash.")
- parser.add_argument('file_to_reduce', type=str, nargs=1,
- help="Name of the file to be reduced.")
- parser.add_argument('--llvm-bin', dest='llvm_bin', type=str,
- help="Path to the LLVM bin directory.")
- parser.add_argument('--llvm-not', dest='llvm_not', type=str,
- help="The path to the `not` executable. "
- "By default uses the llvm-bin directory.")
- parser.add_argument('--clang', dest='clang', type=str,
- help="The path to the `clang` executable. "
- "By default uses the llvm-bin directory.")
- parser.add_argument('--creduce', dest='creduce', type=str,
- help="The path to the `creduce` executable. "
- "Required if `creduce` is not in PATH environment.")
- parser.add_argument('-v', '--verbose', action='store_true')
- args = parser.parse_args()
- verbose = args.verbose
- llvm_bin = os.path.abspath(args.llvm_bin) if args.llvm_bin else None
- creduce_cmd = check_cmd('creduce', None, args.creduce)
- clang_cmd = check_cmd('clang', llvm_bin, args.clang)
- not_cmd = check_cmd('not', llvm_bin, args.llvm_not)
- crash_script = check_file(args.crash_script[0])
- file_to_reduce = check_file(args.file_to_reduce[0])
- r = Reduce(crash_script, file_to_reduce)
- r.simplify_clang_args()
- r.write_interestingness_test()
- r.clang_preprocess()
- r.run_creduce()
- r.reduce_clang_args()
- if __name__ == '__main__':
- main()
|