parser.py 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679
  1. # -*- coding: utf-8 -*-
  2. #
  3. # QAPI schema parser
  4. #
  5. # Copyright IBM, Corp. 2011
  6. # Copyright (c) 2013-2019 Red Hat Inc.
  7. #
  8. # Authors:
  9. # Anthony Liguori <aliguori@us.ibm.com>
  10. # Markus Armbruster <armbru@redhat.com>
  11. # Marc-André Lureau <marcandre.lureau@redhat.com>
  12. # Kevin Wolf <kwolf@redhat.com>
  13. #
  14. # This work is licensed under the terms of the GNU GPL, version 2.
  15. # See the COPYING file in the top-level directory.
  16. from collections import OrderedDict
  17. import os
  18. import re
  19. from .common import must_match
  20. from .error import QAPISemError, QAPISourceError
  21. from .source import QAPISourceInfo
  22. class QAPIParseError(QAPISourceError):
  23. """Error class for all QAPI schema parsing errors."""
  24. def __init__(self, parser, msg):
  25. col = 1
  26. for ch in parser.src[parser.line_pos:parser.pos]:
  27. if ch == '\t':
  28. col = (col + 7) % 8 + 1
  29. else:
  30. col += 1
  31. super().__init__(parser.info, msg, col)
  32. class QAPISchemaParser:
  33. def __init__(self, fname, previously_included=None, incl_info=None):
  34. self._fname = fname
  35. self._included = previously_included or set()
  36. self._included.add(os.path.abspath(self._fname))
  37. self.src = ''
  38. # Lexer state (see `accept` for details):
  39. self.info = QAPISourceInfo(self._fname, incl_info)
  40. self.tok = None
  41. self.pos = 0
  42. self.cursor = 0
  43. self.val = None
  44. self.line_pos = 0
  45. # Parser output:
  46. self.exprs = []
  47. self.docs = []
  48. # Showtime!
  49. self._parse()
  50. def _parse(self):
  51. cur_doc = None
  52. # May raise OSError; allow the caller to handle it.
  53. with open(self._fname, 'r', encoding='utf-8') as fp:
  54. self.src = fp.read()
  55. if self.src == '' or self.src[-1] != '\n':
  56. self.src += '\n'
  57. # Prime the lexer:
  58. self.accept()
  59. # Parse until done:
  60. while self.tok is not None:
  61. info = self.info
  62. if self.tok == '#':
  63. self.reject_expr_doc(cur_doc)
  64. for cur_doc in self.get_doc(info):
  65. self.docs.append(cur_doc)
  66. continue
  67. expr = self.get_expr()
  68. if not isinstance(expr, dict):
  69. raise QAPISemError(
  70. info, "top-level expression must be an object")
  71. if 'include' in expr:
  72. self.reject_expr_doc(cur_doc)
  73. if len(expr) != 1:
  74. raise QAPISemError(info, "invalid 'include' directive")
  75. include = expr['include']
  76. if not isinstance(include, str):
  77. raise QAPISemError(info,
  78. "value of 'include' must be a string")
  79. incl_fname = os.path.join(os.path.dirname(self._fname),
  80. include)
  81. self.exprs.append({'expr': {'include': incl_fname},
  82. 'info': info})
  83. exprs_include = self._include(include, info, incl_fname,
  84. self._included)
  85. if exprs_include:
  86. self.exprs.extend(exprs_include.exprs)
  87. self.docs.extend(exprs_include.docs)
  88. elif "pragma" in expr:
  89. self.reject_expr_doc(cur_doc)
  90. if len(expr) != 1:
  91. raise QAPISemError(info, "invalid 'pragma' directive")
  92. pragma = expr['pragma']
  93. if not isinstance(pragma, dict):
  94. raise QAPISemError(
  95. info, "value of 'pragma' must be an object")
  96. for name, value in pragma.items():
  97. self._pragma(name, value, info)
  98. else:
  99. expr_elem = {'expr': expr,
  100. 'info': info}
  101. if cur_doc:
  102. if not cur_doc.symbol:
  103. raise QAPISemError(
  104. cur_doc.info, "definition documentation required")
  105. expr_elem['doc'] = cur_doc
  106. self.exprs.append(expr_elem)
  107. cur_doc = None
  108. self.reject_expr_doc(cur_doc)
  109. @staticmethod
  110. def reject_expr_doc(doc):
  111. if doc and doc.symbol:
  112. raise QAPISemError(
  113. doc.info,
  114. "documentation for '%s' is not followed by the definition"
  115. % doc.symbol)
  116. @staticmethod
  117. def _include(include, info, incl_fname, previously_included):
  118. incl_abs_fname = os.path.abspath(incl_fname)
  119. # catch inclusion cycle
  120. inf = info
  121. while inf:
  122. if incl_abs_fname == os.path.abspath(inf.fname):
  123. raise QAPISemError(info, "inclusion loop for %s" % include)
  124. inf = inf.parent
  125. # skip multiple include of the same file
  126. if incl_abs_fname in previously_included:
  127. return None
  128. try:
  129. return QAPISchemaParser(incl_fname, previously_included, info)
  130. except OSError as err:
  131. raise QAPISemError(
  132. info,
  133. f"can't read include file '{incl_fname}': {err.strerror}"
  134. ) from err
  135. @staticmethod
  136. def _check_pragma_list_of_str(name, value, info):
  137. if (not isinstance(value, list)
  138. or any([not isinstance(elt, str) for elt in value])):
  139. raise QAPISemError(
  140. info,
  141. "pragma %s must be a list of strings" % name)
  142. def _pragma(self, name, value, info):
  143. if name == 'doc-required':
  144. if not isinstance(value, bool):
  145. raise QAPISemError(info,
  146. "pragma 'doc-required' must be boolean")
  147. info.pragma.doc_required = value
  148. elif name == 'command-name-exceptions':
  149. self._check_pragma_list_of_str(name, value, info)
  150. info.pragma.command_name_exceptions = value
  151. elif name == 'command-returns-exceptions':
  152. self._check_pragma_list_of_str(name, value, info)
  153. info.pragma.command_returns_exceptions = value
  154. elif name == 'member-name-exceptions':
  155. self._check_pragma_list_of_str(name, value, info)
  156. info.pragma.member_name_exceptions = value
  157. else:
  158. raise QAPISemError(info, "unknown pragma '%s'" % name)
  159. def accept(self, skip_comment=True):
  160. while True:
  161. self.tok = self.src[self.cursor]
  162. self.pos = self.cursor
  163. self.cursor += 1
  164. self.val = None
  165. if self.tok == '#':
  166. if self.src[self.cursor] == '#':
  167. # Start of doc comment
  168. skip_comment = False
  169. self.cursor = self.src.find('\n', self.cursor)
  170. if not skip_comment:
  171. self.val = self.src[self.pos:self.cursor]
  172. return
  173. elif self.tok in '{}:,[]':
  174. return
  175. elif self.tok == "'":
  176. # Note: we accept only printable ASCII
  177. string = ''
  178. esc = False
  179. while True:
  180. ch = self.src[self.cursor]
  181. self.cursor += 1
  182. if ch == '\n':
  183. raise QAPIParseError(self, "missing terminating \"'\"")
  184. if esc:
  185. # Note: we recognize only \\ because we have
  186. # no use for funny characters in strings
  187. if ch != '\\':
  188. raise QAPIParseError(self,
  189. "unknown escape \\%s" % ch)
  190. esc = False
  191. elif ch == '\\':
  192. esc = True
  193. continue
  194. elif ch == "'":
  195. self.val = string
  196. return
  197. if ord(ch) < 32 or ord(ch) >= 127:
  198. raise QAPIParseError(
  199. self, "funny character in string")
  200. string += ch
  201. elif self.src.startswith('true', self.pos):
  202. self.val = True
  203. self.cursor += 3
  204. return
  205. elif self.src.startswith('false', self.pos):
  206. self.val = False
  207. self.cursor += 4
  208. return
  209. elif self.tok == '\n':
  210. if self.cursor == len(self.src):
  211. self.tok = None
  212. return
  213. self.info = self.info.next_line()
  214. self.line_pos = self.cursor
  215. elif not self.tok.isspace():
  216. # Show up to next structural, whitespace or quote
  217. # character
  218. match = must_match('[^[\\]{}:,\\s\'"]+',
  219. self.src[self.cursor-1:])
  220. raise QAPIParseError(self, "stray '%s'" % match.group(0))
  221. def get_members(self):
  222. expr = OrderedDict()
  223. if self.tok == '}':
  224. self.accept()
  225. return expr
  226. if self.tok != "'":
  227. raise QAPIParseError(self, "expected string or '}'")
  228. while True:
  229. key = self.val
  230. assert isinstance(key, str) # Guaranteed by tok == "'"
  231. self.accept()
  232. if self.tok != ':':
  233. raise QAPIParseError(self, "expected ':'")
  234. self.accept()
  235. if key in expr:
  236. raise QAPIParseError(self, "duplicate key '%s'" % key)
  237. expr[key] = self.get_expr()
  238. if self.tok == '}':
  239. self.accept()
  240. return expr
  241. if self.tok != ',':
  242. raise QAPIParseError(self, "expected ',' or '}'")
  243. self.accept()
  244. if self.tok != "'":
  245. raise QAPIParseError(self, "expected string")
  246. def get_values(self):
  247. expr = []
  248. if self.tok == ']':
  249. self.accept()
  250. return expr
  251. if self.tok not in tuple("{['tf"):
  252. raise QAPIParseError(
  253. self, "expected '{', '[', ']', string, or boolean")
  254. while True:
  255. expr.append(self.get_expr())
  256. if self.tok == ']':
  257. self.accept()
  258. return expr
  259. if self.tok != ',':
  260. raise QAPIParseError(self, "expected ',' or ']'")
  261. self.accept()
  262. def get_expr(self):
  263. if self.tok == '{':
  264. self.accept()
  265. expr = self.get_members()
  266. elif self.tok == '[':
  267. self.accept()
  268. expr = self.get_values()
  269. elif self.tok in tuple("'tf"):
  270. assert isinstance(self.val, (str, bool))
  271. expr = self.val
  272. self.accept()
  273. else:
  274. raise QAPIParseError(
  275. self, "expected '{', '[', string, or boolean")
  276. return expr
  277. def get_doc(self, info):
  278. if self.val != '##':
  279. raise QAPIParseError(
  280. self, "junk after '##' at start of documentation comment")
  281. docs = []
  282. cur_doc = QAPIDoc(self, info)
  283. self.accept(False)
  284. while self.tok == '#':
  285. assert isinstance(self.val, str)
  286. if self.val.startswith('##'):
  287. # End of doc comment
  288. if self.val != '##':
  289. raise QAPIParseError(
  290. self,
  291. "junk after '##' at end of documentation comment")
  292. cur_doc.end_comment()
  293. docs.append(cur_doc)
  294. self.accept()
  295. return docs
  296. if self.val.startswith('# ='):
  297. if cur_doc.symbol:
  298. raise QAPIParseError(
  299. self,
  300. "unexpected '=' markup in definition documentation")
  301. if cur_doc.body.text:
  302. cur_doc.end_comment()
  303. docs.append(cur_doc)
  304. cur_doc = QAPIDoc(self, info)
  305. cur_doc.append(self.val)
  306. self.accept(False)
  307. raise QAPIParseError(self, "documentation comment must end with '##'")
  308. class QAPIDoc:
  309. """
  310. A documentation comment block, either definition or free-form
  311. Definition documentation blocks consist of
  312. * a body section: one line naming the definition, followed by an
  313. overview (any number of lines)
  314. * argument sections: a description of each argument (for commands
  315. and events) or member (for structs, unions and alternates)
  316. * features sections: a description of each feature flag
  317. * additional (non-argument) sections, possibly tagged
  318. Free-form documentation blocks consist only of a body section.
  319. """
  320. class Section:
  321. def __init__(self, parser, name=None, indent=0):
  322. # parser, for error messages about indentation
  323. self._parser = parser
  324. # optional section name (argument/member or section name)
  325. self.name = name
  326. self.text = ''
  327. # the expected indent level of the text of this section
  328. self._indent = indent
  329. def append(self, line):
  330. # Strip leading spaces corresponding to the expected indent level
  331. # Blank lines are always OK.
  332. if line:
  333. indent = must_match(r'\s*', line).end()
  334. if indent < self._indent:
  335. raise QAPIParseError(
  336. self._parser,
  337. "unexpected de-indent (expected at least %d spaces)" %
  338. self._indent)
  339. line = line[self._indent:]
  340. self.text += line.rstrip() + '\n'
  341. class ArgSection(Section):
  342. def __init__(self, parser, name, indent=0):
  343. super().__init__(parser, name, indent)
  344. self.member = None
  345. def connect(self, member):
  346. self.member = member
  347. def __init__(self, parser, info):
  348. # self._parser is used to report errors with QAPIParseError. The
  349. # resulting error position depends on the state of the parser.
  350. # It happens to be the beginning of the comment. More or less
  351. # servicable, but action at a distance.
  352. self._parser = parser
  353. self.info = info
  354. self.symbol = None
  355. self.body = QAPIDoc.Section(parser)
  356. # dict mapping parameter name to ArgSection
  357. self.args = OrderedDict()
  358. self.features = OrderedDict()
  359. # a list of Section
  360. self.sections = []
  361. # the current section
  362. self._section = self.body
  363. self._append_line = self._append_body_line
  364. def has_section(self, name):
  365. """Return True if we have a section with this name."""
  366. for i in self.sections:
  367. if i.name == name:
  368. return True
  369. return False
  370. def append(self, line):
  371. """
  372. Parse a comment line and add it to the documentation.
  373. The way that the line is dealt with depends on which part of
  374. the documentation we're parsing right now:
  375. * The body section: ._append_line is ._append_body_line
  376. * An argument section: ._append_line is ._append_args_line
  377. * A features section: ._append_line is ._append_features_line
  378. * An additional section: ._append_line is ._append_various_line
  379. """
  380. line = line[1:]
  381. if not line:
  382. self._append_freeform(line)
  383. return
  384. if line[0] != ' ':
  385. raise QAPIParseError(self._parser, "missing space after #")
  386. line = line[1:]
  387. self._append_line(line)
  388. def end_comment(self):
  389. self._end_section()
  390. @staticmethod
  391. def _is_section_tag(name):
  392. return name in ('Returns:', 'Since:',
  393. # those are often singular or plural
  394. 'Note:', 'Notes:',
  395. 'Example:', 'Examples:',
  396. 'TODO:')
  397. def _append_body_line(self, line):
  398. """
  399. Process a line of documentation text in the body section.
  400. If this a symbol line and it is the section's first line, this
  401. is a definition documentation block for that symbol.
  402. If it's a definition documentation block, another symbol line
  403. begins the argument section for the argument named by it, and
  404. a section tag begins an additional section. Start that
  405. section and append the line to it.
  406. Else, append the line to the current section.
  407. """
  408. name = line.split(' ', 1)[0]
  409. # FIXME not nice: things like '# @foo:' and '# @foo: ' aren't
  410. # recognized, and get silently treated as ordinary text
  411. if not self.symbol and not self.body.text and line.startswith('@'):
  412. if not line.endswith(':'):
  413. raise QAPIParseError(self._parser, "line should end with ':'")
  414. self.symbol = line[1:-1]
  415. # FIXME invalid names other than the empty string aren't flagged
  416. if not self.symbol:
  417. raise QAPIParseError(self._parser, "invalid name")
  418. elif self.symbol:
  419. # This is a definition documentation block
  420. if name.startswith('@') and name.endswith(':'):
  421. self._append_line = self._append_args_line
  422. self._append_args_line(line)
  423. elif line == 'Features:':
  424. self._append_line = self._append_features_line
  425. elif self._is_section_tag(name):
  426. self._append_line = self._append_various_line
  427. self._append_various_line(line)
  428. else:
  429. self._append_freeform(line)
  430. else:
  431. # This is a free-form documentation block
  432. self._append_freeform(line)
  433. def _append_args_line(self, line):
  434. """
  435. Process a line of documentation text in an argument section.
  436. A symbol line begins the next argument section, a section tag
  437. section or a non-indented line after a blank line begins an
  438. additional section. Start that section and append the line to
  439. it.
  440. Else, append the line to the current section.
  441. """
  442. name = line.split(' ', 1)[0]
  443. if name.startswith('@') and name.endswith(':'):
  444. # If line is "@arg: first line of description", find
  445. # the index of 'f', which is the indent we expect for any
  446. # following lines. We then remove the leading "@arg:"
  447. # from line and replace it with spaces so that 'f' has the
  448. # same index as it did in the original line and can be
  449. # handled the same way we will handle following lines.
  450. indent = must_match(r'@\S*:\s*', line).end()
  451. line = line[indent:]
  452. if not line:
  453. # Line was just the "@arg:" header; following lines
  454. # are not indented
  455. indent = 0
  456. else:
  457. line = ' ' * indent + line
  458. self._start_args_section(name[1:-1], indent)
  459. elif self._is_section_tag(name):
  460. self._append_line = self._append_various_line
  461. self._append_various_line(line)
  462. return
  463. elif (self._section.text.endswith('\n\n')
  464. and line and not line[0].isspace()):
  465. if line == 'Features:':
  466. self._append_line = self._append_features_line
  467. else:
  468. self._start_section()
  469. self._append_line = self._append_various_line
  470. self._append_various_line(line)
  471. return
  472. self._append_freeform(line)
  473. def _append_features_line(self, line):
  474. name = line.split(' ', 1)[0]
  475. if name.startswith('@') and name.endswith(':'):
  476. # If line is "@arg: first line of description", find
  477. # the index of 'f', which is the indent we expect for any
  478. # following lines. We then remove the leading "@arg:"
  479. # from line and replace it with spaces so that 'f' has the
  480. # same index as it did in the original line and can be
  481. # handled the same way we will handle following lines.
  482. indent = must_match(r'@\S*:\s*', line).end()
  483. line = line[indent:]
  484. if not line:
  485. # Line was just the "@arg:" header; following lines
  486. # are not indented
  487. indent = 0
  488. else:
  489. line = ' ' * indent + line
  490. self._start_features_section(name[1:-1], indent)
  491. elif self._is_section_tag(name):
  492. self._append_line = self._append_various_line
  493. self._append_various_line(line)
  494. return
  495. elif (self._section.text.endswith('\n\n')
  496. and line and not line[0].isspace()):
  497. self._start_section()
  498. self._append_line = self._append_various_line
  499. self._append_various_line(line)
  500. return
  501. self._append_freeform(line)
  502. def _append_various_line(self, line):
  503. """
  504. Process a line of documentation text in an additional section.
  505. A symbol line is an error.
  506. A section tag begins an additional section. Start that
  507. section and append the line to it.
  508. Else, append the line to the current section.
  509. """
  510. name = line.split(' ', 1)[0]
  511. if name.startswith('@') and name.endswith(':'):
  512. raise QAPIParseError(self._parser,
  513. "'%s' can't follow '%s' section"
  514. % (name, self.sections[0].name))
  515. if self._is_section_tag(name):
  516. # If line is "Section: first line of description", find
  517. # the index of 'f', which is the indent we expect for any
  518. # following lines. We then remove the leading "Section:"
  519. # from line and replace it with spaces so that 'f' has the
  520. # same index as it did in the original line and can be
  521. # handled the same way we will handle following lines.
  522. indent = must_match(r'\S*:\s*', line).end()
  523. line = line[indent:]
  524. if not line:
  525. # Line was just the "Section:" header; following lines
  526. # are not indented
  527. indent = 0
  528. else:
  529. line = ' ' * indent + line
  530. self._start_section(name[:-1], indent)
  531. self._append_freeform(line)
  532. def _start_symbol_section(self, symbols_dict, name, indent):
  533. # FIXME invalid names other than the empty string aren't flagged
  534. if not name:
  535. raise QAPIParseError(self._parser, "invalid parameter name")
  536. if name in symbols_dict:
  537. raise QAPIParseError(self._parser,
  538. "'%s' parameter name duplicated" % name)
  539. assert not self.sections
  540. self._end_section()
  541. self._section = QAPIDoc.ArgSection(self._parser, name, indent)
  542. symbols_dict[name] = self._section
  543. def _start_args_section(self, name, indent):
  544. self._start_symbol_section(self.args, name, indent)
  545. def _start_features_section(self, name, indent):
  546. self._start_symbol_section(self.features, name, indent)
  547. def _start_section(self, name=None, indent=0):
  548. if name in ('Returns', 'Since') and self.has_section(name):
  549. raise QAPIParseError(self._parser,
  550. "duplicated '%s' section" % name)
  551. self._end_section()
  552. self._section = QAPIDoc.Section(self._parser, name, indent)
  553. self.sections.append(self._section)
  554. def _end_section(self):
  555. if self._section:
  556. text = self._section.text = self._section.text.strip()
  557. if self._section.name and (not text or text.isspace()):
  558. raise QAPIParseError(
  559. self._parser,
  560. "empty doc section '%s'" % self._section.name)
  561. self._section = None
  562. def _append_freeform(self, line):
  563. match = re.match(r'(@\S+:)', line)
  564. if match:
  565. raise QAPIParseError(self._parser,
  566. "'%s' not allowed in free-form documentation"
  567. % match.group(1))
  568. self._section.append(line)
  569. def connect_member(self, member):
  570. if member.name not in self.args:
  571. # Undocumented TODO outlaw
  572. self.args[member.name] = QAPIDoc.ArgSection(self._parser,
  573. member.name)
  574. self.args[member.name].connect(member)
  575. def connect_feature(self, feature):
  576. if feature.name not in self.features:
  577. raise QAPISemError(feature.info,
  578. "feature '%s' lacks documentation"
  579. % feature.name)
  580. self.features[feature.name].connect(feature)
  581. def check_expr(self, expr):
  582. if self.has_section('Returns') and 'command' not in expr:
  583. raise QAPISemError(self.info,
  584. "'Returns:' is only valid for commands")
  585. def check(self):
  586. def check_args_section(args, info, what):
  587. bogus = [name for name, section in args.items()
  588. if not section.member]
  589. if bogus:
  590. raise QAPISemError(
  591. self.info,
  592. "documented member%s '%s' %s not exist"
  593. % ("s" if len(bogus) > 1 else "",
  594. "', '".join(bogus),
  595. "do" if len(bogus) > 1 else "does"))
  596. check_args_section(self.args, self.info, 'members')
  597. check_args_section(self.features, self.info, 'features')