creduce-clang-crash.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412
  1. #!/usr/bin/env python
  2. """Calls C-Reduce to create a minimal reproducer for clang crashes.
  3. Output files:
  4. *.reduced.sh -- crash reproducer with minimal arguments
  5. *.reduced.cpp -- the reduced file
  6. *.test.sh -- interestingness test for C-Reduce
  7. """
  8. from __future__ import print_function
  9. from argparse import ArgumentParser, RawTextHelpFormatter
  10. import os
  11. import re
  12. import stat
  13. import sys
  14. import subprocess
  15. import pipes
  16. import shlex
  17. import tempfile
  18. import shutil
  19. from distutils.spawn import find_executable
  20. verbose = False
  21. creduce_cmd = None
  22. clang_cmd = None
  23. not_cmd = None
  24. def verbose_print(*args, **kwargs):
  25. if verbose:
  26. print(*args, **kwargs)
  27. def check_file(fname):
  28. if not os.path.isfile(fname):
  29. sys.exit("ERROR: %s does not exist" % (fname))
  30. return fname
  31. def check_cmd(cmd_name, cmd_dir, cmd_path=None):
  32. """
  33. Returns absolute path to cmd_path if it is given,
  34. or absolute path to cmd_dir/cmd_name.
  35. """
  36. if cmd_path:
  37. cmd = find_executable(cmd_path)
  38. if cmd:
  39. return cmd
  40. sys.exit("ERROR: executable `%s` not found" % (cmd_path))
  41. cmd = find_executable(cmd_name, path=cmd_dir)
  42. if cmd:
  43. return cmd
  44. if not cmd_dir:
  45. cmd_dir = "$PATH"
  46. sys.exit("ERROR: `%s` not found in %s" % (cmd_name, cmd_dir))
  47. def quote_cmd(cmd):
  48. return ' '.join(pipes.quote(arg) for arg in cmd)
  49. def write_to_script(text, filename):
  50. with open(filename, 'w') as f:
  51. f.write(text)
  52. os.chmod(filename, os.stat(filename).st_mode | stat.S_IEXEC)
  53. class Reduce(object):
  54. def __init__(self, crash_script, file_to_reduce):
  55. crash_script_name, crash_script_ext = os.path.splitext(crash_script)
  56. file_reduce_name, file_reduce_ext = os.path.splitext(file_to_reduce)
  57. self.testfile = file_reduce_name + '.test.sh'
  58. self.crash_script = crash_script_name + '.reduced' + crash_script_ext
  59. self.file_to_reduce = file_reduce_name + '.reduced' + file_reduce_ext
  60. shutil.copy(file_to_reduce, self.file_to_reduce)
  61. self.clang = clang_cmd
  62. self.clang_args = []
  63. self.expected_output = []
  64. self.is_crash = True
  65. self.creduce_flags = ["--tidy"]
  66. self.read_clang_args(crash_script, file_to_reduce)
  67. self.read_expected_output()
  68. def get_crash_cmd(self, cmd=None, args=None, filename=None):
  69. if not cmd:
  70. cmd = self.clang
  71. if not args:
  72. args = self.clang_args
  73. if not filename:
  74. filename = self.file_to_reduce
  75. return [cmd] + args + [filename]
  76. def read_clang_args(self, crash_script, filename):
  77. print("\nReading arguments from crash script...")
  78. with open(crash_script) as f:
  79. # Assume clang call is the first non comment line.
  80. cmd = []
  81. for line in f:
  82. if not line.lstrip().startswith('#'):
  83. cmd = shlex.split(line)
  84. break
  85. if not cmd:
  86. sys.exit("Could not find command in the crash script.");
  87. # Remove clang and filename from the command
  88. # Assume the last occurrence of the filename is the clang input file
  89. del cmd[0]
  90. for i in range(len(cmd)-1, -1, -1):
  91. if cmd[i] == filename:
  92. del cmd[i]
  93. break
  94. self.clang_args = cmd
  95. verbose_print("Clang arguments:", quote_cmd(self.clang_args))
  96. def read_expected_output(self):
  97. print("\nGetting expected crash output...")
  98. p = subprocess.Popen(self.get_crash_cmd(),
  99. stdout=subprocess.PIPE,
  100. stderr=subprocess.STDOUT)
  101. crash_output, _ = p.communicate()
  102. result = []
  103. # Remove color codes
  104. ansi_escape = r'\x1b\[[0-?]*m'
  105. crash_output = re.sub(ansi_escape, '', crash_output.decode('utf-8'))
  106. # Look for specific error messages
  107. regexes = [r"Assertion `(.+)' failed", # Linux assert()
  108. r"Assertion failed: (.+),", # FreeBSD/Mac assert()
  109. r"fatal error: error in backend: (.+)",
  110. r"LLVM ERROR: (.+)",
  111. r"UNREACHABLE executed (at .+)?!",
  112. r"LLVM IR generation of ceclaration '(.+)'",
  113. r"Generating code for declaration '(.+)'",
  114. r"\*\*\* Bad machine code: (.+) \*\*\*"]
  115. for msg_re in regexes:
  116. match = re.search(msg_re, crash_output)
  117. if match:
  118. msg = match.group(1)
  119. result = [msg]
  120. print("Found message:", msg)
  121. if "fatal error:" in msg_re:
  122. self.is_crash = False
  123. break
  124. # If no message was found, use the top five stack trace functions,
  125. # ignoring some common functions
  126. # Five is a somewhat arbitrary number; the goal is to get a small number
  127. # of identifying functions with some leeway for common functions
  128. if not result:
  129. stacktrace_re = r'[0-9]+\s+0[xX][0-9a-fA-F]+\s*([^(]+)\('
  130. filters = ["PrintStackTraceSignalHandler",
  131. "llvm::sys::RunSignalHandlers",
  132. "SignalHandler", "__restore_rt", "gsignal", "abort"]
  133. matches = re.findall(stacktrace_re, crash_output)
  134. result = [x for x in matches if x and x.strip() not in filters][:5]
  135. for msg in result:
  136. print("Found stack trace function:", msg)
  137. if not result:
  138. print("ERROR: no crash was found")
  139. print("The crash output was:\n========\n%s========" % crash_output)
  140. sys.exit(1)
  141. self.expected_output = result
  142. def check_expected_output(self, args=None, filename=None):
  143. if not args:
  144. args = self.clang_args
  145. if not filename:
  146. filename = self.file_to_reduce
  147. p = subprocess.Popen(self.get_crash_cmd(args=args, filename=filename),
  148. stdout=subprocess.PIPE,
  149. stderr=subprocess.STDOUT)
  150. crash_output, _ = p.communicate()
  151. return all(msg in crash_output.decode('utf-8') for msg in
  152. self.expected_output)
  153. def write_interestingness_test(self):
  154. print("\nCreating the interestingness test...")
  155. crash_flag = "--crash" if self.is_crash else ""
  156. output = "#!/bin/bash\n%s %s %s >& t.log || exit 1\n" % \
  157. (pipes.quote(not_cmd), crash_flag, quote_cmd(self.get_crash_cmd()))
  158. for msg in self.expected_output:
  159. output += 'grep -F %s t.log || exit 1\n' % pipes.quote(msg)
  160. write_to_script(output, self.testfile)
  161. self.check_interestingness()
  162. def check_interestingness(self):
  163. testfile = os.path.abspath(self.testfile)
  164. # Check that the test considers the original file interesting
  165. with open(os.devnull, 'w') as devnull:
  166. returncode = subprocess.call(testfile, stdout=devnull)
  167. if returncode:
  168. sys.exit("The interestingness test does not pass for the original file.")
  169. # Check that an empty file is not interesting
  170. # Instead of modifying the filename in the test file, just run the command
  171. with tempfile.NamedTemporaryFile() as empty_file:
  172. is_interesting = self.check_expected_output(filename=empty_file.name)
  173. if is_interesting:
  174. sys.exit("The interestingness test passes for an empty file.")
  175. def clang_preprocess(self):
  176. print("\nTrying to preprocess the source file...")
  177. with tempfile.NamedTemporaryFile() as tmpfile:
  178. cmd_preprocess = self.get_crash_cmd() + ['-E', '-o', tmpfile.name]
  179. cmd_preprocess_no_lines = cmd_preprocess + ['-P']
  180. try:
  181. subprocess.check_call(cmd_preprocess_no_lines)
  182. if self.check_expected_output(filename=tmpfile.name):
  183. print("Successfully preprocessed with line markers removed")
  184. shutil.copy(tmpfile.name, self.file_to_reduce)
  185. else:
  186. subprocess.check_call(cmd_preprocess)
  187. if self.check_expected_output(filename=tmpfile.name):
  188. print("Successfully preprocessed without removing line markers")
  189. shutil.copy(tmpfile.name, self.file_to_reduce)
  190. else:
  191. print("No longer crashes after preprocessing -- "
  192. "using original source")
  193. except subprocess.CalledProcessError:
  194. print("Preprocessing failed")
  195. @staticmethod
  196. def filter_args(args, opts_equal=[], opts_startswith=[],
  197. opts_one_arg_startswith=[]):
  198. result = []
  199. skip_next = False
  200. for arg in args:
  201. if skip_next:
  202. skip_next = False
  203. continue
  204. if any(arg == a for a in opts_equal):
  205. continue
  206. if any(arg.startswith(a) for a in opts_startswith):
  207. continue
  208. if any(arg.startswith(a) for a in opts_one_arg_startswith):
  209. skip_next = True
  210. continue
  211. result.append(arg)
  212. return result
  213. def try_remove_args(self, args, msg=None, extra_arg=None, **kwargs):
  214. new_args = self.filter_args(args, **kwargs)
  215. if extra_arg:
  216. if extra_arg in new_args:
  217. new_args.remove(extra_arg)
  218. new_args.append(extra_arg)
  219. if (new_args != args and
  220. self.check_expected_output(args=new_args)):
  221. if msg:
  222. verbose_print(msg)
  223. return new_args
  224. return args
  225. def try_remove_arg_by_index(self, args, index):
  226. new_args = args[:index] + args[index+1:]
  227. removed_arg = args[index]
  228. # Heuristic for grouping arguments:
  229. # remove next argument if it doesn't start with "-"
  230. if index < len(new_args) and not new_args[index].startswith('-'):
  231. del new_args[index]
  232. removed_arg += ' ' + args[index+1]
  233. if self.check_expected_output(args=new_args):
  234. verbose_print("Removed", removed_arg)
  235. return new_args, index
  236. return args, index+1
  237. def simplify_clang_args(self):
  238. """Simplify clang arguments before running C-Reduce to reduce the time the
  239. interestingness test takes to run.
  240. """
  241. print("\nSimplifying the clang command...")
  242. # Remove some clang arguments to speed up the interestingness test
  243. new_args = self.clang_args
  244. new_args = self.try_remove_args(new_args,
  245. msg="Removed debug info options",
  246. opts_startswith=["-gcodeview",
  247. "-debug-info-kind=",
  248. "-debugger-tuning="])
  249. new_args = self.try_remove_args(new_args,
  250. msg="Removed --show-includes",
  251. opts_startswith=["--show-includes"])
  252. # Not suppressing warnings (-w) sometimes prevents the crash from occurring
  253. # after preprocessing
  254. new_args = self.try_remove_args(new_args,
  255. msg="Replaced -W options with -w",
  256. extra_arg='-w',
  257. opts_startswith=["-W"])
  258. new_args = self.try_remove_args(new_args,
  259. msg="Replaced optimization level with -O0",
  260. extra_arg="-O0",
  261. opts_startswith=["-O"])
  262. # Try to remove compilation steps
  263. new_args = self.try_remove_args(new_args, msg="Added -emit-llvm",
  264. extra_arg="-emit-llvm")
  265. new_args = self.try_remove_args(new_args, msg="Added -fsyntax-only",
  266. extra_arg="-fsyntax-only")
  267. # Try to make implicit int an error for more sensible test output
  268. new_args = self.try_remove_args(new_args, msg="Added -Werror=implicit-int",
  269. opts_equal=["-w"],
  270. extra_arg="-Werror=implicit-int")
  271. self.clang_args = new_args
  272. verbose_print("Simplified command:", quote_cmd(self.get_crash_cmd()))
  273. def reduce_clang_args(self):
  274. """Minimize the clang arguments after running C-Reduce, to get the smallest
  275. command that reproduces the crash on the reduced file.
  276. """
  277. print("\nReducing the clang crash command...")
  278. new_args = self.clang_args
  279. # Remove some often occurring args
  280. new_args = self.try_remove_args(new_args, msg="Removed -D options",
  281. opts_startswith=["-D"])
  282. new_args = self.try_remove_args(new_args, msg="Removed -D options",
  283. opts_one_arg_startswith=["-D"])
  284. new_args = self.try_remove_args(new_args, msg="Removed -I options",
  285. opts_startswith=["-I"])
  286. new_args = self.try_remove_args(new_args, msg="Removed -I options",
  287. opts_one_arg_startswith=["-I"])
  288. new_args = self.try_remove_args(new_args, msg="Removed -W options",
  289. opts_startswith=["-W"])
  290. # Remove other cases that aren't covered by the heuristic
  291. new_args = self.try_remove_args(new_args, msg="Removed -mllvm",
  292. opts_one_arg_startswith=["-mllvm"])
  293. i = 0
  294. while i < len(new_args):
  295. new_args, i = self.try_remove_arg_by_index(new_args, i)
  296. self.clang_args = new_args
  297. reduced_cmd = quote_cmd(self.get_crash_cmd())
  298. write_to_script(reduced_cmd, self.crash_script)
  299. print("Reduced command:", reduced_cmd)
  300. def run_creduce(self):
  301. print("\nRunning C-Reduce...")
  302. try:
  303. p = subprocess.Popen([creduce_cmd] + self.creduce_flags +
  304. [self.testfile, self.file_to_reduce])
  305. p.communicate()
  306. except KeyboardInterrupt:
  307. # Hack to kill C-Reduce because it jumps into its own pgid
  308. print('\n\nctrl-c detected, killed creduce')
  309. p.kill()
  310. def main():
  311. global verbose
  312. global creduce_cmd
  313. global clang_cmd
  314. global not_cmd
  315. parser = ArgumentParser(description=__doc__,
  316. formatter_class=RawTextHelpFormatter)
  317. parser.add_argument('crash_script', type=str, nargs=1,
  318. help="Name of the script that generates the crash.")
  319. parser.add_argument('file_to_reduce', type=str, nargs=1,
  320. help="Name of the file to be reduced.")
  321. parser.add_argument('--llvm-bin', dest='llvm_bin', type=str,
  322. help="Path to the LLVM bin directory.")
  323. parser.add_argument('--llvm-not', dest='llvm_not', type=str,
  324. help="The path to the `not` executable. "
  325. "By default uses the llvm-bin directory.")
  326. parser.add_argument('--clang', dest='clang', type=str,
  327. help="The path to the `clang` executable. "
  328. "By default uses the llvm-bin directory.")
  329. parser.add_argument('--creduce', dest='creduce', type=str,
  330. help="The path to the `creduce` executable. "
  331. "Required if `creduce` is not in PATH environment.")
  332. parser.add_argument('-v', '--verbose', action='store_true')
  333. args = parser.parse_args()
  334. verbose = args.verbose
  335. llvm_bin = os.path.abspath(args.llvm_bin) if args.llvm_bin else None
  336. creduce_cmd = check_cmd('creduce', None, args.creduce)
  337. clang_cmd = check_cmd('clang', llvm_bin, args.clang)
  338. not_cmd = check_cmd('not', llvm_bin, args.llvm_not)
  339. crash_script = check_file(args.crash_script[0])
  340. file_to_reduce = check_file(args.file_to_reduce[0])
  341. r = Reduce(crash_script, file_to_reduce)
  342. r.simplify_clang_args()
  343. r.write_interestingness_test()
  344. r.clang_preprocess()
  345. r.run_creduce()
  346. r.reduce_clang_args()
  347. if __name__ == '__main__':
  348. main()