demangle_tree.py 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228
  1. # Given a path to llvm-objdump and a directory tree, spider the directory tree
  2. # dumping every object file encountered with correct options needed to demangle
  3. # symbols in the object file, and collect statistics about failed / crashed
  4. # demanglings. Useful for stress testing the demangler against a large corpus
  5. # of inputs.
  6. from __future__ import print_function
  7. import argparse
  8. import functools
  9. import os
  10. import re
  11. import sys
  12. import subprocess
  13. import traceback
  14. from multiprocessing import Pool
  15. import multiprocessing
  16. args = None
  17. def parse_line(line):
  18. question = line.find('?')
  19. if question == -1:
  20. return None, None
  21. open_paren = line.find('(', question)
  22. if open_paren == -1:
  23. return None, None
  24. close_paren = line.rfind(')', open_paren)
  25. if open_paren == -1:
  26. return None, None
  27. mangled = line[question : open_paren]
  28. demangled = line[open_paren+1 : close_paren]
  29. return mangled.strip(), demangled.strip()
  30. class Result(object):
  31. def __init__(self):
  32. self.crashed = []
  33. self.file = None
  34. self.nsymbols = 0
  35. self.errors = set()
  36. self.nfiles = 0
  37. class MapContext(object):
  38. def __init__(self):
  39. self.rincomplete = None
  40. self.rcumulative = Result()
  41. self.pending_objs = []
  42. self.npending = 0
  43. def process_file(path, objdump):
  44. r = Result()
  45. r.file = path
  46. popen_args = [objdump, '-t', '-demangle', path]
  47. p = subprocess.Popen(popen_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  48. stdout, stderr = p.communicate()
  49. if p.returncode != 0:
  50. r.crashed = [r.file]
  51. return r
  52. output = stdout.decode('utf-8')
  53. for line in output.splitlines():
  54. mangled, demangled = parse_line(line)
  55. if mangled is None:
  56. continue
  57. r.nsymbols += 1
  58. if "invalid mangled name" in demangled:
  59. r.errors.add(mangled)
  60. return r
  61. def add_results(r1, r2):
  62. r1.crashed.extend(r2.crashed)
  63. r1.errors.update(r2.errors)
  64. r1.nsymbols += r2.nsymbols
  65. r1.nfiles += r2.nfiles
  66. def print_result_row(directory, result):
  67. print("[{0} files, {1} crashes, {2} errors, {3} symbols]: '{4}'".format(
  68. result.nfiles, len(result.crashed), len(result.errors), result.nsymbols, directory))
  69. def process_one_chunk(pool, chunk_size, objdump, context):
  70. objs = []
  71. incomplete = False
  72. dir_results = {}
  73. ordered_dirs = []
  74. while context.npending > 0 and len(objs) < chunk_size:
  75. this_dir = context.pending_objs[0][0]
  76. ordered_dirs.append(this_dir)
  77. re = Result()
  78. if context.rincomplete is not None:
  79. re = context.rincomplete
  80. context.rincomplete = None
  81. dir_results[this_dir] = re
  82. re.file = this_dir
  83. nneeded = chunk_size - len(objs)
  84. objs_this_dir = context.pending_objs[0][1]
  85. navail = len(objs_this_dir)
  86. ntaken = min(nneeded, navail)
  87. objs.extend(objs_this_dir[0:ntaken])
  88. remaining_objs_this_dir = objs_this_dir[ntaken:]
  89. context.pending_objs[0] = (context.pending_objs[0][0], remaining_objs_this_dir)
  90. context.npending -= ntaken
  91. if ntaken == navail:
  92. context.pending_objs.pop(0)
  93. else:
  94. incomplete = True
  95. re.nfiles += ntaken
  96. assert(len(objs) == chunk_size or context.npending == 0)
  97. copier = functools.partial(process_file, objdump=objdump)
  98. mapped_results = list(pool.map(copier, objs))
  99. for mr in mapped_results:
  100. result_dir = os.path.dirname(mr.file)
  101. result_entry = dir_results[result_dir]
  102. add_results(result_entry, mr)
  103. # It's only possible that a single item is incomplete, and it has to be the
  104. # last item.
  105. if incomplete:
  106. context.rincomplete = dir_results[ordered_dirs[-1]]
  107. ordered_dirs.pop()
  108. # Now ordered_dirs contains a list of all directories which *did* complete.
  109. for c in ordered_dirs:
  110. re = dir_results[c]
  111. add_results(context.rcumulative, re)
  112. print_result_row(c, re)
  113. def process_pending_files(pool, chunk_size, objdump, context):
  114. while context.npending >= chunk_size:
  115. process_one_chunk(pool, chunk_size, objdump, context)
  116. def go():
  117. global args
  118. obj_dir = args.dir
  119. extensions = args.extensions.split(',')
  120. extensions = [x if x[0] == '.' else '.' + x for x in extensions]
  121. pool_size = 48
  122. pool = Pool(processes=pool_size)
  123. try:
  124. nfiles = 0
  125. context = MapContext()
  126. for root, dirs, files in os.walk(obj_dir):
  127. root = os.path.normpath(root)
  128. pending = []
  129. for f in files:
  130. file, ext = os.path.splitext(f)
  131. if not ext in extensions:
  132. continue
  133. nfiles += 1
  134. full_path = os.path.join(root, f)
  135. full_path = os.path.normpath(full_path)
  136. pending.append(full_path)
  137. # If this directory had no object files, just print a default
  138. # status line and continue with the next dir
  139. if len(pending) == 0:
  140. print_result_row(root, Result())
  141. continue
  142. context.npending += len(pending)
  143. context.pending_objs.append((root, pending))
  144. # Drain the tasks, `pool_size` at a time, until we have less than
  145. # `pool_size` tasks remaining.
  146. process_pending_files(pool, pool_size, args.objdump, context)
  147. assert(context.npending < pool_size);
  148. process_one_chunk(pool, pool_size, args.objdump, context)
  149. total = context.rcumulative
  150. nfailed = len(total.errors)
  151. nsuccess = total.nsymbols - nfailed
  152. ncrashed = len(total.crashed)
  153. if (nfailed > 0):
  154. print("Failures:")
  155. for m in sorted(total.errors):
  156. print(" " + m)
  157. if (ncrashed > 0):
  158. print("Crashes:")
  159. for f in sorted(total.crashed):
  160. print(" " + f)
  161. print("Summary:")
  162. spct = float(nsuccess)/float(total.nsymbols)
  163. fpct = float(nfailed)/float(total.nsymbols)
  164. cpct = float(ncrashed)/float(nfiles)
  165. print("Processed {0} object files.".format(nfiles))
  166. print("{0}/{1} symbols successfully demangled ({2:.4%})".format(nsuccess, total.nsymbols, spct))
  167. print("{0} symbols could not be demangled ({1:.4%})".format(nfailed, fpct))
  168. print("{0} files crashed while demangling ({1:.4%})".format(ncrashed, cpct))
  169. except:
  170. traceback.print_exc()
  171. pool.close()
  172. pool.join()
  173. if __name__ == "__main__":
  174. def_obj = 'obj' if sys.platform == 'win32' else 'o'
  175. parser = argparse.ArgumentParser(description='Demangle all symbols in a tree of object files, looking for failures.')
  176. parser.add_argument('dir', type=str, help='the root directory at which to start crawling')
  177. parser.add_argument('--objdump', type=str, help='path to llvm-objdump. If not specified ' +
  178. 'the tool is located as if by `which llvm-objdump`.')
  179. parser.add_argument('--extensions', type=str, default=def_obj,
  180. help='comma separated list of extensions to demangle (e.g. `o,obj`). ' +
  181. 'By default this will be `obj` on Windows and `o` otherwise.')
  182. args = parser.parse_args()
  183. multiprocessing.freeze_support()
  184. go()