dump_ast_matchers.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369
  1. #!/usr/bin/env python
  2. # A tool to parse ASTMatchers.h and update the documentation in
  3. # ../LibASTMatchersReference.html automatically. Run from the
  4. # directory in which this file is located to update the docs.
  5. import collections
  6. import re
  7. import urllib2
  8. MATCHERS_FILE = '../../include/clang/ASTMatchers/ASTMatchers.h'
  9. # Each matcher is documented in one row of the form:
  10. # result | name | argA
  11. # The subsequent row contains the documentation and is hidden by default,
  12. # becoming visible via javascript when the user clicks the matcher name.
  13. TD_TEMPLATE="""
  14. <tr><td>%(result)s</td><td class="name" onclick="toggle('%(id)s')"><a name="%(id)sAnchor">%(name)s</a></td><td>%(args)s</td></tr>
  15. <tr><td colspan="4" class="doc" id="%(id)s"><pre>%(comment)s</pre></td></tr>
  16. """
  17. # We categorize the matchers into these three categories in the reference:
  18. node_matchers = {}
  19. narrowing_matchers = {}
  20. traversal_matchers = {}
  21. # We output multiple rows per matcher if the matcher can be used on multiple
  22. # node types. Thus, we need a new id per row to control the documentation
  23. # pop-up. ids[name] keeps track of those ids.
  24. ids = collections.defaultdict(int)
  25. # Cache for doxygen urls we have already verified.
  26. doxygen_probes = {}
  27. def esc(text):
  28. """Escape any html in the given text."""
  29. text = re.sub(r'&', '&amp;', text)
  30. text = re.sub(r'<', '&lt;', text)
  31. text = re.sub(r'>', '&gt;', text)
  32. def link_if_exists(m):
  33. name = m.group(1)
  34. url = 'http://clang.llvm.org/doxygen/classclang_1_1%s.html' % name
  35. if url not in doxygen_probes:
  36. try:
  37. print 'Probing %s...' % url
  38. urllib2.urlopen(url)
  39. doxygen_probes[url] = True
  40. except:
  41. doxygen_probes[url] = False
  42. if doxygen_probes[url]:
  43. return r'Matcher&lt<a href="%s">%s</a>&gt;' % (url, name)
  44. else:
  45. return m.group(0)
  46. text = re.sub(
  47. r'Matcher&lt;([^\*&]+)&gt;', link_if_exists, text)
  48. return text
  49. def extract_result_types(comment):
  50. """Extracts a list of result types from the given comment.
  51. We allow annotations in the comment of the matcher to specify what
  52. nodes a matcher can match on. Those comments have the form:
  53. Usable as: Any Matcher | (Matcher<T1>[, Matcher<t2>[, ...]])
  54. Returns ['*'] in case of 'Any Matcher', or ['T1', 'T2', ...].
  55. Returns the empty list if no 'Usable as' specification could be
  56. parsed.
  57. """
  58. result_types = []
  59. m = re.search(r'Usable as: Any Matcher[\s\n]*$', comment, re.S)
  60. if m:
  61. return ['*']
  62. while True:
  63. m = re.match(r'^(.*)Matcher<([^>]+)>\s*,?[\s\n]*$', comment, re.S)
  64. if not m:
  65. if re.search(r'Usable as:\s*$', comment):
  66. return result_types
  67. else:
  68. return None
  69. result_types += [m.group(2)]
  70. comment = m.group(1)
  71. def strip_doxygen(comment):
  72. """Returns the given comment without \-escaped words."""
  73. # If there is only a doxygen keyword in the line, delete the whole line.
  74. comment = re.sub(r'^\\[^\s]+\n', r'', comment, flags=re.M)
  75. # Delete the doxygen command and the following whitespace.
  76. comment = re.sub(r'\\[^\s]+\s+', r'', comment)
  77. return comment
  78. def unify_arguments(args):
  79. """Gets rid of anything the user doesn't care about in the argument list."""
  80. args = re.sub(r'internal::', r'', args)
  81. args = re.sub(r'const\s+', r'', args)
  82. args = re.sub(r'&', r' ', args)
  83. args = re.sub(r'(^|\s)M\d?(\s)', r'\1Matcher<*>\2', args)
  84. return args
  85. def add_matcher(result_type, name, args, comment, is_dyncast=False):
  86. """Adds a matcher to one of our categories."""
  87. if name == 'id':
  88. # FIXME: Figure out whether we want to support the 'id' matcher.
  89. return
  90. matcher_id = '%s%d' % (name, ids[name])
  91. ids[name] += 1
  92. args = unify_arguments(args)
  93. matcher_html = TD_TEMPLATE % {
  94. 'result': esc('Matcher<%s>' % result_type),
  95. 'name': name,
  96. 'args': esc(args),
  97. 'comment': esc(strip_doxygen(comment)),
  98. 'id': matcher_id,
  99. }
  100. if is_dyncast:
  101. node_matchers[result_type + name] = matcher_html
  102. # Use a heuristic to figure out whether a matcher is a narrowing or
  103. # traversal matcher. By default, matchers that take other matchers as
  104. # arguments (and are not node matchers) do traversal. We specifically
  105. # exclude known narrowing matchers that also take other matchers as
  106. # arguments.
  107. elif ('Matcher<' not in args or
  108. name in ['allOf', 'anyOf', 'anything', 'unless']):
  109. narrowing_matchers[result_type + name + esc(args)] = matcher_html
  110. else:
  111. traversal_matchers[result_type + name + esc(args)] = matcher_html
  112. def act_on_decl(declaration, comment, allowed_types):
  113. """Parse the matcher out of the given declaration and comment.
  114. If 'allowed_types' is set, it contains a list of node types the matcher
  115. can match on, as extracted from the static type asserts in the matcher
  116. definition.
  117. """
  118. if declaration.strip():
  119. # Node matchers are defined by writing:
  120. # VariadicDynCastAllOfMatcher<ResultType, ArgumentType> name;
  121. m = re.match(r""".*Variadic(?:DynCast)?AllOfMatcher\s*<
  122. \s*([^\s,]+)\s*(?:,
  123. \s*([^\s>]+)\s*)?>
  124. \s*([^\s;]+)\s*;\s*$""", declaration, flags=re.X)
  125. if m:
  126. result, inner, name = m.groups()
  127. if not inner:
  128. inner = result
  129. add_matcher(result, name, 'Matcher<%s>...' % inner,
  130. comment, is_dyncast=True)
  131. return
  132. # Parse the various matcher definition macros.
  133. m = re.match(""".*AST_TYPE_MATCHER\(
  134. \s*([^\s,]+\s*),
  135. \s*([^\s,]+\s*)
  136. \)\s*;\s*$""", declaration, flags=re.X)
  137. if m:
  138. inner, name = m.groups()
  139. add_matcher('Type', name, 'Matcher<%s>...' % inner,
  140. comment, is_dyncast=True)
  141. # FIXME: re-enable once we have implemented casting on the TypeLoc
  142. # hierarchy.
  143. # add_matcher('TypeLoc', '%sLoc' % name, 'Matcher<%sLoc>...' % inner,
  144. # comment, is_dyncast=True)
  145. return
  146. m = re.match(""".*AST_TYPE(LOC)?_TRAVERSE_MATCHER\(
  147. \s*([^\s,]+\s*),
  148. \s*(?:[^\s,]+\s*),
  149. \s*AST_POLYMORPHIC_SUPPORTED_TYPES_([^(]*)\(([^)]*)\)
  150. \)\s*;\s*$""", declaration, flags=re.X)
  151. if m:
  152. loc, name, n_results, results = m.groups()[0:4]
  153. result_types = [r.strip() for r in results.split(',')]
  154. comment_result_types = extract_result_types(comment)
  155. if (comment_result_types and
  156. sorted(result_types) != sorted(comment_result_types)):
  157. raise Exception('Inconsistent documentation for: %s' % name)
  158. for result_type in result_types:
  159. add_matcher(result_type, name, 'Matcher<Type>', comment)
  160. if loc:
  161. add_matcher('%sLoc' % result_type, '%sLoc' % name, 'Matcher<TypeLoc>',
  162. comment)
  163. return
  164. m = re.match(r"""^\s*AST_POLYMORPHIC_MATCHER(_P)?(.?)(?:_OVERLOAD)?\(
  165. \s*([^\s,]+)\s*,
  166. \s*AST_POLYMORPHIC_SUPPORTED_TYPES_([^(]*)\(([^)]*)\)
  167. (?:,\s*([^\s,]+)\s*
  168. ,\s*([^\s,]+)\s*)?
  169. (?:,\s*([^\s,]+)\s*
  170. ,\s*([^\s,]+)\s*)?
  171. (?:,\s*\d+\s*)?
  172. \)\s*{\s*$""", declaration, flags=re.X)
  173. if m:
  174. p, n, name, n_results, results = m.groups()[0:5]
  175. args = m.groups()[5:]
  176. result_types = [r.strip() for r in results.split(',')]
  177. if allowed_types and allowed_types != result_types:
  178. raise Exception('Inconsistent documentation for: %s' % name)
  179. if n not in ['', '2']:
  180. raise Exception('Cannot parse "%s"' % declaration)
  181. args = ', '.join('%s %s' % (args[i], args[i+1])
  182. for i in range(0, len(args), 2) if args[i])
  183. for result_type in result_types:
  184. add_matcher(result_type, name, args, comment)
  185. return
  186. m = re.match(r"""^\s*AST_MATCHER_FUNCTION(_P)?(.?)(?:_OVERLOAD)?\(
  187. (?:\s*([^\s,]+)\s*,)?
  188. \s*([^\s,]+)\s*
  189. (?:,\s*([^\s,]+)\s*
  190. ,\s*([^\s,]+)\s*)?
  191. (?:,\s*([^\s,]+)\s*
  192. ,\s*([^\s,]+)\s*)?
  193. (?:,\s*\d+\s*)?
  194. \)\s*{\s*$""", declaration, flags=re.X)
  195. if m:
  196. p, n, result, name = m.groups()[0:4]
  197. args = m.groups()[4:]
  198. if n not in ['', '2']:
  199. raise Exception('Cannot parse "%s"' % declaration)
  200. args = ', '.join('%s %s' % (args[i], args[i+1])
  201. for i in range(0, len(args), 2) if args[i])
  202. add_matcher(result, name, args, comment)
  203. return
  204. m = re.match(r"""^\s*AST_MATCHER(_P)?(.?)(?:_OVERLOAD)?\(
  205. (?:\s*([^\s,]+)\s*,)?
  206. \s*([^\s,]+)\s*
  207. (?:,\s*([^\s,]+)\s*
  208. ,\s*([^\s,]+)\s*)?
  209. (?:,\s*([^\s,]+)\s*
  210. ,\s*([^\s,]+)\s*)?
  211. (?:,\s*\d+\s*)?
  212. \)\s*{\s*$""", declaration, flags=re.X)
  213. if m:
  214. p, n, result, name = m.groups()[0:4]
  215. args = m.groups()[4:]
  216. if not result:
  217. if not allowed_types:
  218. raise Exception('Did not find allowed result types for: %s' % name)
  219. result_types = allowed_types
  220. else:
  221. result_types = [result]
  222. if n not in ['', '2']:
  223. raise Exception('Cannot parse "%s"' % declaration)
  224. args = ', '.join('%s %s' % (args[i], args[i+1])
  225. for i in range(0, len(args), 2) if args[i])
  226. for result_type in result_types:
  227. add_matcher(result_type, name, args, comment)
  228. return
  229. # Parse ArgumentAdapting matchers.
  230. m = re.match(
  231. r"""^.*ArgumentAdaptingMatcherFunc<.*>\s*(?:LLVM_ATTRIBUTE_UNUSED\s*)
  232. ([a-zA-Z]*)\s*=\s*{};$""",
  233. declaration, flags=re.X)
  234. if m:
  235. name = m.groups()[0]
  236. add_matcher('*', name, 'Matcher<*>', comment)
  237. return
  238. # Parse Variadic operator matchers.
  239. m = re.match(
  240. r"""^.*VariadicOperatorMatcherFunc\s*<\s*([^,]+),\s*([^\s>]+)\s*>\s*
  241. ([a-zA-Z]*)\s*=\s*{.*};$""",
  242. declaration, flags=re.X)
  243. if m:
  244. min_args, max_args, name = m.groups()[:3]
  245. if max_args == '1':
  246. add_matcher('*', name, 'Matcher<*>', comment)
  247. return
  248. elif max_args == 'UINT_MAX':
  249. add_matcher('*', name, 'Matcher<*>, ..., Matcher<*>', comment)
  250. return
  251. # Parse free standing matcher functions, like:
  252. # Matcher<ResultType> Name(Matcher<ArgumentType> InnerMatcher) {
  253. m = re.match(r"""^\s*(.*)\s+
  254. ([^\s\(]+)\s*\(
  255. (.*)
  256. \)\s*{""", declaration, re.X)
  257. if m:
  258. result, name, args = m.groups()
  259. args = ', '.join(p.strip() for p in args.split(','))
  260. m = re.match(r'.*\s+internal::(Bindable)?Matcher<([^>]+)>$', result)
  261. if m:
  262. result_types = [m.group(2)]
  263. else:
  264. result_types = extract_result_types(comment)
  265. if not result_types:
  266. if not comment:
  267. # Only overloads don't have their own doxygen comments; ignore those.
  268. print 'Ignoring "%s"' % name
  269. else:
  270. print 'Cannot determine result type for "%s"' % name
  271. else:
  272. for result_type in result_types:
  273. add_matcher(result_type, name, args, comment)
  274. else:
  275. print '*** Unparsable: "' + declaration + '" ***'
  276. def sort_table(matcher_type, matcher_map):
  277. """Returns the sorted html table for the given row map."""
  278. table = ''
  279. for key in sorted(matcher_map.keys()):
  280. table += matcher_map[key] + '\n'
  281. return ('<!-- START_%(type)s_MATCHERS -->\n' +
  282. '%(table)s' +
  283. '<!--END_%(type)s_MATCHERS -->') % {
  284. 'type': matcher_type,
  285. 'table': table,
  286. }
  287. # Parse the ast matchers.
  288. # We alternate between two modes:
  289. # body = True: We parse the definition of a matcher. We need
  290. # to parse the full definition before adding a matcher, as the
  291. # definition might contain static asserts that specify the result
  292. # type.
  293. # body = False: We parse the comments and declaration of the matcher.
  294. comment = ''
  295. declaration = ''
  296. allowed_types = []
  297. body = False
  298. for line in open(MATCHERS_FILE).read().splitlines():
  299. if body:
  300. if line.strip() and line[0] == '}':
  301. if declaration:
  302. act_on_decl(declaration, comment, allowed_types)
  303. comment = ''
  304. declaration = ''
  305. allowed_types = []
  306. body = False
  307. else:
  308. m = re.search(r'is_base_of<([^,]+), NodeType>', line)
  309. if m and m.group(1):
  310. allowed_types += [m.group(1)]
  311. continue
  312. if line.strip() and line.lstrip()[0] == '/':
  313. comment += re.sub(r'/+\s?', '', line) + '\n'
  314. else:
  315. declaration += ' ' + line
  316. if ((not line.strip()) or
  317. line.rstrip()[-1] == ';' or
  318. (line.rstrip()[-1] == '{' and line.rstrip()[-3:] != '= {')):
  319. if line.strip() and line.rstrip()[-1] == '{':
  320. body = True
  321. else:
  322. act_on_decl(declaration, comment, allowed_types)
  323. comment = ''
  324. declaration = ''
  325. allowed_types = []
  326. node_matcher_table = sort_table('DECL', node_matchers)
  327. narrowing_matcher_table = sort_table('NARROWING', narrowing_matchers)
  328. traversal_matcher_table = sort_table('TRAVERSAL', traversal_matchers)
  329. reference = open('../LibASTMatchersReference.html').read()
  330. reference = re.sub(r'<!-- START_DECL_MATCHERS.*END_DECL_MATCHERS -->',
  331. '%s', reference, flags=re.S) % node_matcher_table
  332. reference = re.sub(r'<!-- START_NARROWING_MATCHERS.*END_NARROWING_MATCHERS -->',
  333. '%s', reference, flags=re.S) % narrowing_matcher_table
  334. reference = re.sub(r'<!-- START_TRAVERSAL_MATCHERS.*END_TRAVERSAL_MATCHERS -->',
  335. '%s', reference, flags=re.S) % traversal_matcher_table
  336. with open('../LibASTMatchersReference.html', 'w') as output:
  337. output.write(reference)