extract.py 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194
  1. # -*- Python -*- vim: set syntax=python tabstop=4 expandtab cc=80:
  2. #===----------------------------------------------------------------------===##
  3. #
  4. # The LLVM Compiler Infrastructure
  5. #
  6. # This file is dual licensed under the MIT and the University of Illinois Open
  7. # Source Licenses. See LICENSE.TXT for details.
  8. #
  9. #===----------------------------------------------------------------------===##
  10. """
  11. extract - A set of function that extract symbol lists from shared libraries.
  12. """
  13. import distutils.spawn
  14. import sys
  15. import re
  16. import libcxx.util
  17. from libcxx.sym_check import util
  18. extract_ignore_names = ['_init', '_fini']
  19. class NMExtractor(object):
  20. """
  21. NMExtractor - Extract symbol lists from libraries using nm.
  22. """
  23. @staticmethod
  24. def find_tool():
  25. """
  26. Search for the nm executable and return the path.
  27. """
  28. return distutils.spawn.find_executable('nm')
  29. def __init__(self):
  30. """
  31. Initialize the nm executable and flags that will be used to extract
  32. symbols from shared libraries.
  33. """
  34. self.nm_exe = self.find_tool()
  35. if self.nm_exe is None:
  36. # ERROR no NM found
  37. print("ERROR: Could not find nm")
  38. sys.exit(1)
  39. self.flags = ['-P', '-g']
  40. def extract(self, lib):
  41. """
  42. Extract symbols from a library and return the results as a dict of
  43. parsed symbols.
  44. """
  45. cmd = [self.nm_exe] + self.flags + [lib]
  46. out, _, exit_code = util.executeCommandVerbose(cmd)
  47. if exit_code != 0:
  48. raise RuntimeError('Failed to run %s on %s' % (self.nm_exe, lib))
  49. fmt_syms = (self._extract_sym(l)
  50. for l in out.splitlines() if l.strip())
  51. # Cast symbol to string.
  52. final_syms = (repr(s) for s in fmt_syms if self._want_sym(s))
  53. # Make unique and sort strings.
  54. tmp_list = list(sorted(set(final_syms)))
  55. # Cast string back to symbol.
  56. return util.read_syms_from_list(tmp_list)
  57. def _extract_sym(self, sym_str):
  58. bits = sym_str.split()
  59. # Everything we want has at least two columns.
  60. if len(bits) < 2:
  61. return None
  62. new_sym = {
  63. 'name': bits[0],
  64. 'type': bits[1],
  65. 'is_defined': (bits[1].lower() != 'u')
  66. }
  67. new_sym['name'] = new_sym['name'].replace('@@', '@')
  68. new_sym = self._transform_sym_type(new_sym)
  69. # NM types which we want to save the size for.
  70. if new_sym['type'] == 'OBJECT' and len(bits) > 3:
  71. new_sym['size'] = int(bits[3], 16)
  72. return new_sym
  73. @staticmethod
  74. def _want_sym(sym):
  75. """
  76. Check that s is a valid symbol that we want to keep.
  77. """
  78. if sym is None or len(sym) < 2:
  79. return False
  80. if sym['name'] in extract_ignore_names:
  81. return False
  82. bad_types = ['t', 'b', 'r', 'd', 'w']
  83. return (sym['type'] not in bad_types
  84. and sym['name'] not in ['__bss_start', '_end', '_edata'])
  85. @staticmethod
  86. def _transform_sym_type(sym):
  87. """
  88. Map the nm single letter output for type to either FUNC or OBJECT.
  89. If the type is not recognized it is left unchanged.
  90. """
  91. func_types = ['T', 'W']
  92. obj_types = ['B', 'D', 'R', 'V', 'S']
  93. if sym['type'] in func_types:
  94. sym['type'] = 'FUNC'
  95. elif sym['type'] in obj_types:
  96. sym['type'] = 'OBJECT'
  97. return sym
  98. class ReadElfExtractor(object):
  99. """
  100. ReadElfExtractor - Extract symbol lists from libraries using readelf.
  101. """
  102. @staticmethod
  103. def find_tool():
  104. """
  105. Search for the readelf executable and return the path.
  106. """
  107. return distutils.spawn.find_executable('readelf')
  108. def __init__(self):
  109. """
  110. Initialize the readelf executable and flags that will be used to
  111. extract symbols from shared libraries.
  112. """
  113. self.tool = self.find_tool()
  114. if self.tool is None:
  115. # ERROR no NM found
  116. print("ERROR: Could not find readelf")
  117. sys.exit(1)
  118. self.flags = ['--wide', '--symbols']
  119. def extract(self, lib):
  120. """
  121. Extract symbols from a library and return the results as a dict of
  122. parsed symbols.
  123. """
  124. cmd = [self.tool] + self.flags + [lib]
  125. out, _, exit_code = libcxx.util.executeCommandVerbose(cmd)
  126. if exit_code != 0:
  127. raise RuntimeError('Failed to run %s on %s' % (self.nm_exe, lib))
  128. dyn_syms = self.get_dynsym_table(out)
  129. return self.process_syms(dyn_syms)
  130. def process_syms(self, sym_list):
  131. new_syms = []
  132. for s in sym_list:
  133. parts = s.split()
  134. if not parts:
  135. continue
  136. assert len(parts) == 7 or len(parts) == 8 or len(parts) == 9
  137. if len(parts) == 7:
  138. continue
  139. new_sym = {
  140. 'name': parts[7],
  141. 'size': int(parts[2]),
  142. 'type': parts[3],
  143. 'is_defined': (parts[6] != 'UND')
  144. }
  145. assert new_sym['type'] in ['OBJECT', 'FUNC', 'NOTYPE']
  146. if new_sym['name'] in extract_ignore_names:
  147. continue
  148. if new_sym['type'] == 'NOTYPE':
  149. continue
  150. if new_sym['type'] == 'FUNC':
  151. del new_sym['size']
  152. new_syms += [new_sym]
  153. return new_syms
  154. def get_dynsym_table(self, out):
  155. lines = out.splitlines()
  156. start = -1
  157. end = -1
  158. for i in range(len(lines)):
  159. if lines[i].startswith("Symbol table '.dynsym'"):
  160. start = i + 2
  161. if start != -1 and end == -1 and not lines[i].strip():
  162. end = i + 1
  163. assert start != -1
  164. if end == -1:
  165. end = len(lines)
  166. return lines[start:end]
  167. def extract_symbols(lib_file):
  168. """
  169. Extract and return a list of symbols extracted from a dynamic library.
  170. The symbols are extracted using NM. They are then filtered and formated.
  171. Finally they symbols are made unique.
  172. """
  173. if ReadElfExtractor.find_tool():
  174. extractor = ReadElfExtractor()
  175. else:
  176. extractor = NMExtractor()
  177. return extractor.extract(lib_file)