123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228 |
- # Given a path to llvm-objdump and a directory tree, spider the directory tree
- # dumping every object file encountered with correct options needed to demangle
- # symbols in the object file, and collect statistics about failed / crashed
- # demanglings. Useful for stress testing the demangler against a large corpus
- # of inputs.
- from __future__ import print_function
- import argparse
- import functools
- import os
- import re
- import sys
- import subprocess
- import traceback
- from multiprocessing import Pool
- import multiprocessing
- args = None
- def parse_line(line):
- question = line.find('?')
- if question == -1:
- return None, None
- open_paren = line.find('(', question)
- if open_paren == -1:
- return None, None
- close_paren = line.rfind(')', open_paren)
- if open_paren == -1:
- return None, None
- mangled = line[question : open_paren]
- demangled = line[open_paren+1 : close_paren]
- return mangled.strip(), demangled.strip()
- class Result(object):
- def __init__(self):
- self.crashed = []
- self.file = None
- self.nsymbols = 0
- self.errors = set()
- self.nfiles = 0
- class MapContext(object):
- def __init__(self):
- self.rincomplete = None
- self.rcumulative = Result()
- self.pending_objs = []
- self.npending = 0
- def process_file(path, objdump):
- r = Result()
- r.file = path
- popen_args = [objdump, '-t', '-demangle', path]
- p = subprocess.Popen(popen_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
- stdout, stderr = p.communicate()
- if p.returncode != 0:
- r.crashed = [r.file]
- return r
- output = stdout.decode('utf-8')
- for line in output.splitlines():
- mangled, demangled = parse_line(line)
- if mangled is None:
- continue
- r.nsymbols += 1
- if "invalid mangled name" in demangled:
- r.errors.add(mangled)
- return r
- def add_results(r1, r2):
- r1.crashed.extend(r2.crashed)
- r1.errors.update(r2.errors)
- r1.nsymbols += r2.nsymbols
- r1.nfiles += r2.nfiles
- def print_result_row(directory, result):
- print("[{0} files, {1} crashes, {2} errors, {3} symbols]: '{4}'".format(
- result.nfiles, len(result.crashed), len(result.errors), result.nsymbols, directory))
- def process_one_chunk(pool, chunk_size, objdump, context):
- objs = []
- incomplete = False
- dir_results = {}
- ordered_dirs = []
- while context.npending > 0 and len(objs) < chunk_size:
- this_dir = context.pending_objs[0][0]
- ordered_dirs.append(this_dir)
- re = Result()
- if context.rincomplete is not None:
- re = context.rincomplete
- context.rincomplete = None
- dir_results[this_dir] = re
- re.file = this_dir
- nneeded = chunk_size - len(objs)
- objs_this_dir = context.pending_objs[0][1]
- navail = len(objs_this_dir)
- ntaken = min(nneeded, navail)
- objs.extend(objs_this_dir[0:ntaken])
- remaining_objs_this_dir = objs_this_dir[ntaken:]
- context.pending_objs[0] = (context.pending_objs[0][0], remaining_objs_this_dir)
- context.npending -= ntaken
- if ntaken == navail:
- context.pending_objs.pop(0)
- else:
- incomplete = True
- re.nfiles += ntaken
- assert(len(objs) == chunk_size or context.npending == 0)
- copier = functools.partial(process_file, objdump=objdump)
- mapped_results = list(pool.map(copier, objs))
- for mr in mapped_results:
- result_dir = os.path.dirname(mr.file)
- result_entry = dir_results[result_dir]
- add_results(result_entry, mr)
- # It's only possible that a single item is incomplete, and it has to be the
- # last item.
- if incomplete:
- context.rincomplete = dir_results[ordered_dirs[-1]]
- ordered_dirs.pop()
- # Now ordered_dirs contains a list of all directories which *did* complete.
- for c in ordered_dirs:
- re = dir_results[c]
- add_results(context.rcumulative, re)
- print_result_row(c, re)
- def process_pending_files(pool, chunk_size, objdump, context):
- while context.npending >= chunk_size:
- process_one_chunk(pool, chunk_size, objdump, context)
- def go():
- global args
- obj_dir = args.dir
- extensions = args.extensions.split(',')
- extensions = [x if x[0] == '.' else '.' + x for x in extensions]
- pool_size = 48
- pool = Pool(processes=pool_size)
- try:
- nfiles = 0
- context = MapContext()
- for root, dirs, files in os.walk(obj_dir):
- root = os.path.normpath(root)
- pending = []
- for f in files:
- file, ext = os.path.splitext(f)
- if not ext in extensions:
- continue
- nfiles += 1
- full_path = os.path.join(root, f)
- full_path = os.path.normpath(full_path)
- pending.append(full_path)
- # If this directory had no object files, just print a default
- # status line and continue with the next dir
- if len(pending) == 0:
- print_result_row(root, Result())
- continue
- context.npending += len(pending)
- context.pending_objs.append((root, pending))
- # Drain the tasks, `pool_size` at a time, until we have less than
- # `pool_size` tasks remaining.
- process_pending_files(pool, pool_size, args.objdump, context)
- assert(context.npending < pool_size);
- process_one_chunk(pool, pool_size, args.objdump, context)
- total = context.rcumulative
- nfailed = len(total.errors)
- nsuccess = total.nsymbols - nfailed
- ncrashed = len(total.crashed)
- if (nfailed > 0):
- print("Failures:")
- for m in sorted(total.errors):
- print(" " + m)
- if (ncrashed > 0):
- print("Crashes:")
- for f in sorted(total.crashed):
- print(" " + f)
- print("Summary:")
- spct = float(nsuccess)/float(total.nsymbols)
- fpct = float(nfailed)/float(total.nsymbols)
- cpct = float(ncrashed)/float(nfiles)
- print("Processed {0} object files.".format(nfiles))
- print("{0}/{1} symbols successfully demangled ({2:.4%})".format(nsuccess, total.nsymbols, spct))
- print("{0} symbols could not be demangled ({1:.4%})".format(nfailed, fpct))
- print("{0} files crashed while demangling ({1:.4%})".format(ncrashed, cpct))
-
- except:
- traceback.print_exc()
- pool.close()
- pool.join()
- if __name__ == "__main__":
- def_obj = 'obj' if sys.platform == 'win32' else 'o'
- parser = argparse.ArgumentParser(description='Demangle all symbols in a tree of object files, looking for failures.')
- parser.add_argument('dir', type=str, help='the root directory at which to start crawling')
- parser.add_argument('--objdump', type=str, help='path to llvm-objdump. If not specified ' +
- 'the tool is located as if by `which llvm-objdump`.')
- parser.add_argument('--extensions', type=str, default=def_obj,
- help='comma separated list of extensions to demangle (e.g. `o,obj`). ' +
- 'By default this will be `obj` on Windows and `o` otherwise.')
- args = parser.parse_args()
- multiprocessing.freeze_support()
- go()
|