ShUtil.py 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247
  1. from __future__ import absolute_import
  2. import itertools
  3. import lit.util
  4. from lit.ShCommands import Command, Pipeline, Seq
  5. class ShLexer:
  6. def __init__(self, data, win32Escapes = False):
  7. self.data = data
  8. self.pos = 0
  9. self.end = len(data)
  10. self.win32Escapes = win32Escapes
  11. def eat(self):
  12. c = self.data[self.pos]
  13. self.pos += 1
  14. return c
  15. def look(self):
  16. return self.data[self.pos]
  17. def maybe_eat(self, c):
  18. """
  19. maybe_eat(c) - Consume the character c if it is the next character,
  20. returning True if a character was consumed. """
  21. if self.data[self.pos] == c:
  22. self.pos += 1
  23. return True
  24. return False
  25. def lex_arg_fast(self, c):
  26. # Get the leading whitespace free section.
  27. chunk = self.data[self.pos - 1:].split(None, 1)[0]
  28. # If it has special characters, the fast path failed.
  29. if ('|' in chunk or '&' in chunk or
  30. '<' in chunk or '>' in chunk or
  31. "'" in chunk or '"' in chunk or
  32. ';' in chunk or '\\' in chunk):
  33. return None
  34. self.pos = self.pos - 1 + len(chunk)
  35. return chunk
  36. def lex_arg_slow(self, c):
  37. if c in "'\"":
  38. str = self.lex_arg_quoted(c)
  39. else:
  40. str = c
  41. while self.pos != self.end:
  42. c = self.look()
  43. if c.isspace() or c in "|&;":
  44. break
  45. elif c in '><':
  46. # This is an annoying case; we treat '2>' as a single token so
  47. # we don't have to track whitespace tokens.
  48. # If the parse string isn't an integer, do the usual thing.
  49. if not str.isdigit():
  50. break
  51. # Otherwise, lex the operator and convert to a redirection
  52. # token.
  53. num = int(str)
  54. tok = self.lex_one_token()
  55. assert isinstance(tok, tuple) and len(tok) == 1
  56. return (tok[0], num)
  57. elif c == '"':
  58. self.eat()
  59. str += self.lex_arg_quoted('"')
  60. elif c == "'":
  61. self.eat()
  62. str += self.lex_arg_quoted("'")
  63. elif not self.win32Escapes and c == '\\':
  64. # Outside of a string, '\\' escapes everything.
  65. self.eat()
  66. if self.pos == self.end:
  67. lit.util.warning(
  68. "escape at end of quoted argument in: %r" % self.data)
  69. return str
  70. str += self.eat()
  71. else:
  72. str += self.eat()
  73. return str
  74. def lex_arg_quoted(self, delim):
  75. str = ''
  76. while self.pos != self.end:
  77. c = self.eat()
  78. if c == delim:
  79. return str
  80. elif c == '\\' and delim == '"':
  81. # Inside a '"' quoted string, '\\' only escapes the quote
  82. # character and backslash, otherwise it is preserved.
  83. if self.pos == self.end:
  84. lit.util.warning(
  85. "escape at end of quoted argument in: %r" % self.data)
  86. return str
  87. c = self.eat()
  88. if c == '"': #
  89. str += '"'
  90. elif c == '\\':
  91. str += '\\'
  92. else:
  93. str += '\\' + c
  94. else:
  95. str += c
  96. lit.util.warning("missing quote character in %r" % self.data)
  97. return str
  98. def lex_arg_checked(self, c):
  99. pos = self.pos
  100. res = self.lex_arg_fast(c)
  101. end = self.pos
  102. self.pos = pos
  103. reference = self.lex_arg_slow(c)
  104. if res is not None:
  105. if res != reference:
  106. raise ValueError("Fast path failure: %r != %r" % (
  107. res, reference))
  108. if self.pos != end:
  109. raise ValueError("Fast path failure: %r != %r" % (
  110. self.pos, end))
  111. return reference
  112. def lex_arg(self, c):
  113. return self.lex_arg_fast(c) or self.lex_arg_slow(c)
  114. def lex_one_token(self):
  115. """
  116. lex_one_token - Lex a single 'sh' token. """
  117. c = self.eat()
  118. if c == ';':
  119. return (c,)
  120. if c == '|':
  121. if self.maybe_eat('|'):
  122. return ('||',)
  123. return (c,)
  124. if c == '&':
  125. if self.maybe_eat('&'):
  126. return ('&&',)
  127. if self.maybe_eat('>'):
  128. return ('&>',)
  129. return (c,)
  130. if c == '>':
  131. if self.maybe_eat('&'):
  132. return ('>&',)
  133. if self.maybe_eat('>'):
  134. return ('>>',)
  135. return (c,)
  136. if c == '<':
  137. if self.maybe_eat('&'):
  138. return ('<&',)
  139. if self.maybe_eat('>'):
  140. return ('<<',)
  141. return (c,)
  142. return self.lex_arg(c)
  143. def lex(self):
  144. while self.pos != self.end:
  145. if self.look().isspace():
  146. self.eat()
  147. else:
  148. yield self.lex_one_token()
  149. ###
  150. class ShParser:
  151. def __init__(self, data, win32Escapes = False, pipefail = False):
  152. self.data = data
  153. self.pipefail = pipefail
  154. self.tokens = ShLexer(data, win32Escapes = win32Escapes).lex()
  155. def lex(self):
  156. for item in self.tokens:
  157. return item
  158. return None
  159. def look(self):
  160. token = self.lex()
  161. if token is not None:
  162. self.tokens = itertools.chain([token], self.tokens)
  163. return token
  164. def parse_command(self):
  165. tok = self.lex()
  166. if not tok:
  167. raise ValueError("empty command!")
  168. if isinstance(tok, tuple):
  169. raise ValueError("syntax error near unexpected token %r" % tok[0])
  170. args = [tok]
  171. redirects = []
  172. while 1:
  173. tok = self.look()
  174. # EOF?
  175. if tok is None:
  176. break
  177. # If this is an argument, just add it to the current command.
  178. if isinstance(tok, str):
  179. args.append(self.lex())
  180. continue
  181. # Otherwise see if it is a terminator.
  182. assert isinstance(tok, tuple)
  183. if tok[0] in ('|',';','&','||','&&'):
  184. break
  185. # Otherwise it must be a redirection.
  186. op = self.lex()
  187. arg = self.lex()
  188. if not arg:
  189. raise ValueError("syntax error near token %r" % op[0])
  190. redirects.append((op, arg))
  191. return Command(args, redirects)
  192. def parse_pipeline(self):
  193. negate = False
  194. commands = [self.parse_command()]
  195. while self.look() == ('|',):
  196. self.lex()
  197. commands.append(self.parse_command())
  198. return Pipeline(commands, negate, self.pipefail)
  199. def parse(self):
  200. lhs = self.parse_pipeline()
  201. while self.look():
  202. operator = self.lex()
  203. assert isinstance(operator, tuple) and len(operator) == 1
  204. if not self.look():
  205. raise ValueError(
  206. "missing argument to operator %r" % operator[0])
  207. # FIXME: Operator precedence!!
  208. lhs = Seq(lhs, operator[0], self.parse_pipeline())
  209. return lhs