decodetree.py 40 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397
  1. #!/usr/bin/env python3
  2. # Copyright (c) 2018 Linaro Limited
  3. #
  4. # This library is free software; you can redistribute it and/or
  5. # modify it under the terms of the GNU Lesser General Public
  6. # License as published by the Free Software Foundation; either
  7. # version 2 of the License, or (at your option) any later version.
  8. #
  9. # This library is distributed in the hope that it will be useful,
  10. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. # Lesser General Public License for more details.
  13. #
  14. # You should have received a copy of the GNU Lesser General Public
  15. # License along with this library; if not, see <http://www.gnu.org/licenses/>.
  16. #
  17. #
  18. # Generate a decoding tree from a specification file.
  19. # See the syntax and semantics in docs/devel/decodetree.rst.
  20. #
  21. import os
  22. import re
  23. import sys
  24. import getopt
  25. insnwidth = 32
  26. insnmask = 0xffffffff
  27. variablewidth = False
  28. fields = {}
  29. arguments = {}
  30. formats = {}
  31. allpatterns = []
  32. anyextern = False
  33. translate_prefix = 'trans'
  34. translate_scope = 'static '
  35. input_file = ''
  36. output_file = None
  37. output_fd = None
  38. insntype = 'uint32_t'
  39. decode_function = 'decode'
  40. # An identifier for C.
  41. re_C_ident = '[a-zA-Z][a-zA-Z0-9_]*'
  42. # Identifiers for Arguments, Fields, Formats and Patterns.
  43. re_arg_ident = '&[a-zA-Z0-9_]*'
  44. re_fld_ident = '%[a-zA-Z0-9_]*'
  45. re_fmt_ident = '@[a-zA-Z0-9_]*'
  46. re_pat_ident = '[a-zA-Z0-9_]*'
  47. def error_with_file(file, lineno, *args):
  48. """Print an error message from file:line and args and exit."""
  49. global output_file
  50. global output_fd
  51. prefix = ''
  52. if file:
  53. prefix += '{0}:'.format(file)
  54. if lineno:
  55. prefix += '{0}:'.format(lineno)
  56. if prefix:
  57. prefix += ' '
  58. print(prefix, end='error: ', file=sys.stderr)
  59. print(*args, file=sys.stderr)
  60. if output_file and output_fd:
  61. output_fd.close()
  62. os.remove(output_file)
  63. exit(1)
  64. # end error_with_file
  65. def error(lineno, *args):
  66. error_with_file(input_file, lineno, *args)
  67. # end error
  68. def output(*args):
  69. global output_fd
  70. for a in args:
  71. output_fd.write(a)
  72. def output_autogen():
  73. output('/* This file is autogenerated by scripts/decodetree.py. */\n\n')
  74. def str_indent(c):
  75. """Return a string with C spaces"""
  76. return ' ' * c
  77. def str_fields(fields):
  78. """Return a string uniquely identifying FIELDS"""
  79. r = ''
  80. for n in sorted(fields.keys()):
  81. r += '_' + n
  82. return r[1:]
  83. def str_match_bits(bits, mask):
  84. """Return a string pretty-printing BITS/MASK"""
  85. global insnwidth
  86. i = 1 << (insnwidth - 1)
  87. space = 0x01010100
  88. r = ''
  89. while i != 0:
  90. if i & mask:
  91. if i & bits:
  92. r += '1'
  93. else:
  94. r += '0'
  95. else:
  96. r += '.'
  97. if i & space:
  98. r += ' '
  99. i >>= 1
  100. return r
  101. def is_pow2(x):
  102. """Return true iff X is equal to a power of 2."""
  103. return (x & (x - 1)) == 0
  104. def ctz(x):
  105. """Return the number of times 2 factors into X."""
  106. assert x != 0
  107. r = 0
  108. while ((x >> r) & 1) == 0:
  109. r += 1
  110. return r
  111. def is_contiguous(bits):
  112. if bits == 0:
  113. return -1
  114. shift = ctz(bits)
  115. if is_pow2((bits >> shift) + 1):
  116. return shift
  117. else:
  118. return -1
  119. def eq_fields_for_args(flds_a, flds_b):
  120. if len(flds_a) != len(flds_b):
  121. return False
  122. for k, a in flds_a.items():
  123. if k not in flds_b:
  124. return False
  125. return True
  126. def eq_fields_for_fmts(flds_a, flds_b):
  127. if len(flds_a) != len(flds_b):
  128. return False
  129. for k, a in flds_a.items():
  130. if k not in flds_b:
  131. return False
  132. b = flds_b[k]
  133. if a.__class__ != b.__class__ or a != b:
  134. return False
  135. return True
  136. class Field:
  137. """Class representing a simple instruction field"""
  138. def __init__(self, sign, pos, len):
  139. self.sign = sign
  140. self.pos = pos
  141. self.len = len
  142. self.mask = ((1 << len) - 1) << pos
  143. def __str__(self):
  144. if self.sign:
  145. s = 's'
  146. else:
  147. s = ''
  148. return str(self.pos) + ':' + s + str(self.len)
  149. def str_extract(self):
  150. if self.sign:
  151. extr = 'sextract32'
  152. else:
  153. extr = 'extract32'
  154. return '{0}(insn, {1}, {2})'.format(extr, self.pos, self.len)
  155. def __eq__(self, other):
  156. return self.sign == other.sign and self.mask == other.mask
  157. def __ne__(self, other):
  158. return not self.__eq__(other)
  159. # end Field
  160. class MultiField:
  161. """Class representing a compound instruction field"""
  162. def __init__(self, subs, mask):
  163. self.subs = subs
  164. self.sign = subs[0].sign
  165. self.mask = mask
  166. def __str__(self):
  167. return str(self.subs)
  168. def str_extract(self):
  169. ret = '0'
  170. pos = 0
  171. for f in reversed(self.subs):
  172. if pos == 0:
  173. ret = f.str_extract()
  174. else:
  175. ret = 'deposit32({0}, {1}, {2}, {3})' \
  176. .format(ret, pos, 32 - pos, f.str_extract())
  177. pos += f.len
  178. return ret
  179. def __ne__(self, other):
  180. if len(self.subs) != len(other.subs):
  181. return True
  182. for a, b in zip(self.subs, other.subs):
  183. if a.__class__ != b.__class__ or a != b:
  184. return True
  185. return False
  186. def __eq__(self, other):
  187. return not self.__ne__(other)
  188. # end MultiField
  189. class ConstField:
  190. """Class representing an argument field with constant value"""
  191. def __init__(self, value):
  192. self.value = value
  193. self.mask = 0
  194. self.sign = value < 0
  195. def __str__(self):
  196. return str(self.value)
  197. def str_extract(self):
  198. return str(self.value)
  199. def __cmp__(self, other):
  200. return self.value - other.value
  201. # end ConstField
  202. class FunctionField:
  203. """Class representing a field passed through a function"""
  204. def __init__(self, func, base):
  205. self.mask = base.mask
  206. self.sign = base.sign
  207. self.base = base
  208. self.func = func
  209. def __str__(self):
  210. return self.func + '(' + str(self.base) + ')'
  211. def str_extract(self):
  212. return self.func + '(ctx, ' + self.base.str_extract() + ')'
  213. def __eq__(self, other):
  214. return self.func == other.func and self.base == other.base
  215. def __ne__(self, other):
  216. return not self.__eq__(other)
  217. # end FunctionField
  218. class ParameterField:
  219. """Class representing a pseudo-field read from a function"""
  220. def __init__(self, func):
  221. self.mask = 0
  222. self.sign = 0
  223. self.func = func
  224. def __str__(self):
  225. return self.func
  226. def str_extract(self):
  227. return self.func + '(ctx)'
  228. def __eq__(self, other):
  229. return self.func == other.func
  230. def __ne__(self, other):
  231. return not self.__eq__(other)
  232. # end ParameterField
  233. class Arguments:
  234. """Class representing the extracted fields of a format"""
  235. def __init__(self, nm, flds, extern):
  236. self.name = nm
  237. self.extern = extern
  238. self.fields = sorted(flds)
  239. def __str__(self):
  240. return self.name + ' ' + str(self.fields)
  241. def struct_name(self):
  242. return 'arg_' + self.name
  243. def output_def(self):
  244. if not self.extern:
  245. output('typedef struct {\n')
  246. for n in self.fields:
  247. output(' int ', n, ';\n')
  248. output('} ', self.struct_name(), ';\n\n')
  249. # end Arguments
  250. class General:
  251. """Common code between instruction formats and instruction patterns"""
  252. def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w):
  253. self.name = name
  254. self.file = input_file
  255. self.lineno = lineno
  256. self.base = base
  257. self.fixedbits = fixb
  258. self.fixedmask = fixm
  259. self.undefmask = udfm
  260. self.fieldmask = fldm
  261. self.fields = flds
  262. self.width = w
  263. def __str__(self):
  264. return self.name + ' ' + str_match_bits(self.fixedbits, self.fixedmask)
  265. def str1(self, i):
  266. return str_indent(i) + self.__str__()
  267. # end General
  268. class Format(General):
  269. """Class representing an instruction format"""
  270. def extract_name(self):
  271. global decode_function
  272. return decode_function + '_extract_' + self.name
  273. def output_extract(self):
  274. output('static void ', self.extract_name(), '(DisasContext *ctx, ',
  275. self.base.struct_name(), ' *a, ', insntype, ' insn)\n{\n')
  276. for n, f in self.fields.items():
  277. output(' a->', n, ' = ', f.str_extract(), ';\n')
  278. output('}\n\n')
  279. # end Format
  280. class Pattern(General):
  281. """Class representing an instruction pattern"""
  282. def output_decl(self):
  283. global translate_scope
  284. global translate_prefix
  285. output('typedef ', self.base.base.struct_name(),
  286. ' arg_', self.name, ';\n')
  287. output(translate_scope, 'bool ', translate_prefix, '_', self.name,
  288. '(DisasContext *ctx, arg_', self.name, ' *a);\n')
  289. def output_code(self, i, extracted, outerbits, outermask):
  290. global translate_prefix
  291. ind = str_indent(i)
  292. arg = self.base.base.name
  293. output(ind, '/* ', self.file, ':', str(self.lineno), ' */\n')
  294. if not extracted:
  295. output(ind, self.base.extract_name(),
  296. '(ctx, &u.f_', arg, ', insn);\n')
  297. for n, f in self.fields.items():
  298. output(ind, 'u.f_', arg, '.', n, ' = ', f.str_extract(), ';\n')
  299. output(ind, 'if (', translate_prefix, '_', self.name,
  300. '(ctx, &u.f_', arg, ')) return true;\n')
  301. # Normal patterns do not have children.
  302. def build_tree(self):
  303. return
  304. def prop_masks(self):
  305. return
  306. def prop_format(self):
  307. return
  308. def prop_width(self):
  309. return
  310. # end Pattern
  311. class MultiPattern(General):
  312. """Class representing a set of instruction patterns"""
  313. def __init__(self, lineno):
  314. self.file = input_file
  315. self.lineno = lineno
  316. self.pats = []
  317. self.base = None
  318. self.fixedbits = 0
  319. self.fixedmask = 0
  320. self.undefmask = 0
  321. self.width = None
  322. def __str__(self):
  323. r = 'group'
  324. if self.fixedbits is not None:
  325. r += ' ' + str_match_bits(self.fixedbits, self.fixedmask)
  326. return r
  327. def output_decl(self):
  328. for p in self.pats:
  329. p.output_decl()
  330. def prop_masks(self):
  331. global insnmask
  332. fixedmask = insnmask
  333. undefmask = insnmask
  334. # Collect fixedmask/undefmask for all of the children.
  335. for p in self.pats:
  336. p.prop_masks()
  337. fixedmask &= p.fixedmask
  338. undefmask &= p.undefmask
  339. # Widen fixedmask until all fixedbits match
  340. repeat = True
  341. fixedbits = 0
  342. while repeat and fixedmask != 0:
  343. fixedbits = None
  344. for p in self.pats:
  345. thisbits = p.fixedbits & fixedmask
  346. if fixedbits is None:
  347. fixedbits = thisbits
  348. elif fixedbits != thisbits:
  349. fixedmask &= ~(fixedbits ^ thisbits)
  350. break
  351. else:
  352. repeat = False
  353. self.fixedbits = fixedbits
  354. self.fixedmask = fixedmask
  355. self.undefmask = undefmask
  356. def build_tree(self):
  357. for p in self.pats:
  358. p.build_tree()
  359. def prop_format(self):
  360. for p in self.pats:
  361. p.build_tree()
  362. def prop_width(self):
  363. width = None
  364. for p in self.pats:
  365. p.prop_width()
  366. if width is None:
  367. width = p.width
  368. elif width != p.width:
  369. error_with_file(self.file, self.lineno,
  370. 'width mismatch in patterns within braces')
  371. self.width = width
  372. # end MultiPattern
  373. class IncMultiPattern(MultiPattern):
  374. """Class representing an overlapping set of instruction patterns"""
  375. def output_code(self, i, extracted, outerbits, outermask):
  376. global translate_prefix
  377. ind = str_indent(i)
  378. for p in self.pats:
  379. if outermask != p.fixedmask:
  380. innermask = p.fixedmask & ~outermask
  381. innerbits = p.fixedbits & ~outermask
  382. output(ind, 'if ((insn & ',
  383. '0x{0:08x}) == 0x{1:08x}'.format(innermask, innerbits),
  384. ') {\n')
  385. output(ind, ' /* ',
  386. str_match_bits(p.fixedbits, p.fixedmask), ' */\n')
  387. p.output_code(i + 4, extracted, p.fixedbits, p.fixedmask)
  388. output(ind, '}\n')
  389. else:
  390. p.output_code(i, extracted, p.fixedbits, p.fixedmask)
  391. #end IncMultiPattern
  392. class Tree:
  393. """Class representing a node in a decode tree"""
  394. def __init__(self, fm, tm):
  395. self.fixedmask = fm
  396. self.thismask = tm
  397. self.subs = []
  398. self.base = None
  399. def str1(self, i):
  400. ind = str_indent(i)
  401. r = '{0}{1:08x}'.format(ind, self.fixedmask)
  402. if self.format:
  403. r += ' ' + self.format.name
  404. r += ' [\n'
  405. for (b, s) in self.subs:
  406. r += '{0} {1:08x}:\n'.format(ind, b)
  407. r += s.str1(i + 4) + '\n'
  408. r += ind + ']'
  409. return r
  410. def __str__(self):
  411. return self.str1(0)
  412. def output_code(self, i, extracted, outerbits, outermask):
  413. ind = str_indent(i)
  414. # If we identified all nodes below have the same format,
  415. # extract the fields now.
  416. if not extracted and self.base:
  417. output(ind, self.base.extract_name(),
  418. '(ctx, &u.f_', self.base.base.name, ', insn);\n')
  419. extracted = True
  420. # Attempt to aid the compiler in producing compact switch statements.
  421. # If the bits in the mask are contiguous, extract them.
  422. sh = is_contiguous(self.thismask)
  423. if sh > 0:
  424. # Propagate SH down into the local functions.
  425. def str_switch(b, sh=sh):
  426. return '(insn >> {0}) & 0x{1:x}'.format(sh, b >> sh)
  427. def str_case(b, sh=sh):
  428. return '0x{0:x}'.format(b >> sh)
  429. else:
  430. def str_switch(b):
  431. return 'insn & 0x{0:08x}'.format(b)
  432. def str_case(b):
  433. return '0x{0:08x}'.format(b)
  434. output(ind, 'switch (', str_switch(self.thismask), ') {\n')
  435. for b, s in sorted(self.subs):
  436. assert (self.thismask & ~s.fixedmask) == 0
  437. innermask = outermask | self.thismask
  438. innerbits = outerbits | b
  439. output(ind, 'case ', str_case(b), ':\n')
  440. output(ind, ' /* ',
  441. str_match_bits(innerbits, innermask), ' */\n')
  442. s.output_code(i + 4, extracted, innerbits, innermask)
  443. output(ind, ' return false;\n')
  444. output(ind, '}\n')
  445. # end Tree
  446. class ExcMultiPattern(MultiPattern):
  447. """Class representing a non-overlapping set of instruction patterns"""
  448. def output_code(self, i, extracted, outerbits, outermask):
  449. # Defer everything to our decomposed Tree node
  450. self.tree.output_code(i, extracted, outerbits, outermask)
  451. @staticmethod
  452. def __build_tree(pats, outerbits, outermask):
  453. # Find the intersection of all remaining fixedmask.
  454. innermask = ~outermask & insnmask
  455. for i in pats:
  456. innermask &= i.fixedmask
  457. if innermask == 0:
  458. # Edge condition: One pattern covers the entire insnmask
  459. if len(pats) == 1:
  460. t = Tree(outermask, innermask)
  461. t.subs.append((0, pats[0]))
  462. return t
  463. text = 'overlapping patterns:'
  464. for p in pats:
  465. text += '\n' + p.file + ':' + str(p.lineno) + ': ' + str(p)
  466. error_with_file(pats[0].file, pats[0].lineno, text)
  467. fullmask = outermask | innermask
  468. # Sort each element of pats into the bin selected by the mask.
  469. bins = {}
  470. for i in pats:
  471. fb = i.fixedbits & innermask
  472. if fb in bins:
  473. bins[fb].append(i)
  474. else:
  475. bins[fb] = [i]
  476. # We must recurse if any bin has more than one element or if
  477. # the single element in the bin has not been fully matched.
  478. t = Tree(fullmask, innermask)
  479. for b, l in bins.items():
  480. s = l[0]
  481. if len(l) > 1 or s.fixedmask & ~fullmask != 0:
  482. s = ExcMultiPattern.__build_tree(l, b | outerbits, fullmask)
  483. t.subs.append((b, s))
  484. return t
  485. def build_tree(self):
  486. super().prop_format()
  487. self.tree = self.__build_tree(self.pats, self.fixedbits,
  488. self.fixedmask)
  489. @staticmethod
  490. def __prop_format(tree):
  491. """Propagate Format objects into the decode tree"""
  492. # Depth first search.
  493. for (b, s) in tree.subs:
  494. if isinstance(s, Tree):
  495. ExcMultiPattern.__prop_format(s)
  496. # If all entries in SUBS have the same format, then
  497. # propagate that into the tree.
  498. f = None
  499. for (b, s) in tree.subs:
  500. if f is None:
  501. f = s.base
  502. if f is None:
  503. return
  504. if f is not s.base:
  505. return
  506. tree.base = f
  507. def prop_format(self):
  508. super().prop_format()
  509. self.__prop_format(self.tree)
  510. # end ExcMultiPattern
  511. def parse_field(lineno, name, toks):
  512. """Parse one instruction field from TOKS at LINENO"""
  513. global fields
  514. global insnwidth
  515. # A "simple" field will have only one entry;
  516. # a "multifield" will have several.
  517. subs = []
  518. width = 0
  519. func = None
  520. for t in toks:
  521. if re.match('^!function=', t):
  522. if func:
  523. error(lineno, 'duplicate function')
  524. func = t.split('=')
  525. func = func[1]
  526. continue
  527. if re.fullmatch('[0-9]+:s[0-9]+', t):
  528. # Signed field extract
  529. subtoks = t.split(':s')
  530. sign = True
  531. elif re.fullmatch('[0-9]+:[0-9]+', t):
  532. # Unsigned field extract
  533. subtoks = t.split(':')
  534. sign = False
  535. else:
  536. error(lineno, 'invalid field token "{0}"'.format(t))
  537. po = int(subtoks[0])
  538. le = int(subtoks[1])
  539. if po + le > insnwidth:
  540. error(lineno, 'field {0} too large'.format(t))
  541. f = Field(sign, po, le)
  542. subs.append(f)
  543. width += le
  544. if width > insnwidth:
  545. error(lineno, 'field too large')
  546. if len(subs) == 0:
  547. if func:
  548. f = ParameterField(func)
  549. else:
  550. error(lineno, 'field with no value')
  551. else:
  552. if len(subs) == 1:
  553. f = subs[0]
  554. else:
  555. mask = 0
  556. for s in subs:
  557. if mask & s.mask:
  558. error(lineno, 'field components overlap')
  559. mask |= s.mask
  560. f = MultiField(subs, mask)
  561. if func:
  562. f = FunctionField(func, f)
  563. if name in fields:
  564. error(lineno, 'duplicate field', name)
  565. fields[name] = f
  566. # end parse_field
  567. def parse_arguments(lineno, name, toks):
  568. """Parse one argument set from TOKS at LINENO"""
  569. global arguments
  570. global re_C_ident
  571. global anyextern
  572. flds = []
  573. extern = False
  574. for t in toks:
  575. if re.fullmatch('!extern', t):
  576. extern = True
  577. anyextern = True
  578. continue
  579. if not re.fullmatch(re_C_ident, t):
  580. error(lineno, 'invalid argument set token "{0}"'.format(t))
  581. if t in flds:
  582. error(lineno, 'duplicate argument "{0}"'.format(t))
  583. flds.append(t)
  584. if name in arguments:
  585. error(lineno, 'duplicate argument set', name)
  586. arguments[name] = Arguments(name, flds, extern)
  587. # end parse_arguments
  588. def lookup_field(lineno, name):
  589. global fields
  590. if name in fields:
  591. return fields[name]
  592. error(lineno, 'undefined field', name)
  593. def add_field(lineno, flds, new_name, f):
  594. if new_name in flds:
  595. error(lineno, 'duplicate field', new_name)
  596. flds[new_name] = f
  597. return flds
  598. def add_field_byname(lineno, flds, new_name, old_name):
  599. return add_field(lineno, flds, new_name, lookup_field(lineno, old_name))
  600. def infer_argument_set(flds):
  601. global arguments
  602. global decode_function
  603. for arg in arguments.values():
  604. if eq_fields_for_args(flds, arg.fields):
  605. return arg
  606. name = decode_function + str(len(arguments))
  607. arg = Arguments(name, flds.keys(), False)
  608. arguments[name] = arg
  609. return arg
  610. def infer_format(arg, fieldmask, flds, width):
  611. global arguments
  612. global formats
  613. global decode_function
  614. const_flds = {}
  615. var_flds = {}
  616. for n, c in flds.items():
  617. if c is ConstField:
  618. const_flds[n] = c
  619. else:
  620. var_flds[n] = c
  621. # Look for an existing format with the same argument set and fields
  622. for fmt in formats.values():
  623. if arg and fmt.base != arg:
  624. continue
  625. if fieldmask != fmt.fieldmask:
  626. continue
  627. if width != fmt.width:
  628. continue
  629. if not eq_fields_for_fmts(flds, fmt.fields):
  630. continue
  631. return (fmt, const_flds)
  632. name = decode_function + '_Fmt_' + str(len(formats))
  633. if not arg:
  634. arg = infer_argument_set(flds)
  635. fmt = Format(name, 0, arg, 0, 0, 0, fieldmask, var_flds, width)
  636. formats[name] = fmt
  637. return (fmt, const_flds)
  638. # end infer_format
  639. def parse_generic(lineno, parent_pat, name, toks):
  640. """Parse one instruction format from TOKS at LINENO"""
  641. global fields
  642. global arguments
  643. global formats
  644. global allpatterns
  645. global re_arg_ident
  646. global re_fld_ident
  647. global re_fmt_ident
  648. global re_C_ident
  649. global insnwidth
  650. global insnmask
  651. global variablewidth
  652. is_format = parent_pat is None
  653. fixedmask = 0
  654. fixedbits = 0
  655. undefmask = 0
  656. width = 0
  657. flds = {}
  658. arg = None
  659. fmt = None
  660. for t in toks:
  661. # '&Foo' gives a format an explicit argument set.
  662. if re.fullmatch(re_arg_ident, t):
  663. tt = t[1:]
  664. if arg:
  665. error(lineno, 'multiple argument sets')
  666. if tt in arguments:
  667. arg = arguments[tt]
  668. else:
  669. error(lineno, 'undefined argument set', t)
  670. continue
  671. # '@Foo' gives a pattern an explicit format.
  672. if re.fullmatch(re_fmt_ident, t):
  673. tt = t[1:]
  674. if fmt:
  675. error(lineno, 'multiple formats')
  676. if tt in formats:
  677. fmt = formats[tt]
  678. else:
  679. error(lineno, 'undefined format', t)
  680. continue
  681. # '%Foo' imports a field.
  682. if re.fullmatch(re_fld_ident, t):
  683. tt = t[1:]
  684. flds = add_field_byname(lineno, flds, tt, tt)
  685. continue
  686. # 'Foo=%Bar' imports a field with a different name.
  687. if re.fullmatch(re_C_ident + '=' + re_fld_ident, t):
  688. (fname, iname) = t.split('=%')
  689. flds = add_field_byname(lineno, flds, fname, iname)
  690. continue
  691. # 'Foo=number' sets an argument field to a constant value
  692. if re.fullmatch(re_C_ident + '=[+-]?[0-9]+', t):
  693. (fname, value) = t.split('=')
  694. value = int(value)
  695. flds = add_field(lineno, flds, fname, ConstField(value))
  696. continue
  697. # Pattern of 0s, 1s, dots and dashes indicate required zeros,
  698. # required ones, or dont-cares.
  699. if re.fullmatch('[01.-]+', t):
  700. shift = len(t)
  701. fms = t.replace('0', '1')
  702. fms = fms.replace('.', '0')
  703. fms = fms.replace('-', '0')
  704. fbs = t.replace('.', '0')
  705. fbs = fbs.replace('-', '0')
  706. ubm = t.replace('1', '0')
  707. ubm = ubm.replace('.', '0')
  708. ubm = ubm.replace('-', '1')
  709. fms = int(fms, 2)
  710. fbs = int(fbs, 2)
  711. ubm = int(ubm, 2)
  712. fixedbits = (fixedbits << shift) | fbs
  713. fixedmask = (fixedmask << shift) | fms
  714. undefmask = (undefmask << shift) | ubm
  715. # Otherwise, fieldname:fieldwidth
  716. elif re.fullmatch(re_C_ident + ':s?[0-9]+', t):
  717. (fname, flen) = t.split(':')
  718. sign = False
  719. if flen[0] == 's':
  720. sign = True
  721. flen = flen[1:]
  722. shift = int(flen, 10)
  723. if shift + width > insnwidth:
  724. error(lineno, 'field {0} exceeds insnwidth'.format(fname))
  725. f = Field(sign, insnwidth - width - shift, shift)
  726. flds = add_field(lineno, flds, fname, f)
  727. fixedbits <<= shift
  728. fixedmask <<= shift
  729. undefmask <<= shift
  730. else:
  731. error(lineno, 'invalid token "{0}"'.format(t))
  732. width += shift
  733. if variablewidth and width < insnwidth and width % 8 == 0:
  734. shift = insnwidth - width
  735. fixedbits <<= shift
  736. fixedmask <<= shift
  737. undefmask <<= shift
  738. undefmask |= (1 << shift) - 1
  739. # We should have filled in all of the bits of the instruction.
  740. elif not (is_format and width == 0) and width != insnwidth:
  741. error(lineno, 'definition has {0} bits'.format(width))
  742. # Do not check for fields overlapping fields; one valid usage
  743. # is to be able to duplicate fields via import.
  744. fieldmask = 0
  745. for f in flds.values():
  746. fieldmask |= f.mask
  747. # Fix up what we've parsed to match either a format or a pattern.
  748. if is_format:
  749. # Formats cannot reference formats.
  750. if fmt:
  751. error(lineno, 'format referencing format')
  752. # If an argument set is given, then there should be no fields
  753. # without a place to store it.
  754. if arg:
  755. for f in flds.keys():
  756. if f not in arg.fields:
  757. error(lineno, 'field {0} not in argument set {1}'
  758. .format(f, arg.name))
  759. else:
  760. arg = infer_argument_set(flds)
  761. if name in formats:
  762. error(lineno, 'duplicate format name', name)
  763. fmt = Format(name, lineno, arg, fixedbits, fixedmask,
  764. undefmask, fieldmask, flds, width)
  765. formats[name] = fmt
  766. else:
  767. # Patterns can reference a format ...
  768. if fmt:
  769. # ... but not an argument simultaneously
  770. if arg:
  771. error(lineno, 'pattern specifies both format and argument set')
  772. if fixedmask & fmt.fixedmask:
  773. error(lineno, 'pattern fixed bits overlap format fixed bits')
  774. if width != fmt.width:
  775. error(lineno, 'pattern uses format of different width')
  776. fieldmask |= fmt.fieldmask
  777. fixedbits |= fmt.fixedbits
  778. fixedmask |= fmt.fixedmask
  779. undefmask |= fmt.undefmask
  780. else:
  781. (fmt, flds) = infer_format(arg, fieldmask, flds, width)
  782. arg = fmt.base
  783. for f in flds.keys():
  784. if f not in arg.fields:
  785. error(lineno, 'field {0} not in argument set {1}'
  786. .format(f, arg.name))
  787. if f in fmt.fields.keys():
  788. error(lineno, 'field {0} set by format and pattern'.format(f))
  789. for f in arg.fields:
  790. if f not in flds.keys() and f not in fmt.fields.keys():
  791. error(lineno, 'field {0} not initialized'.format(f))
  792. pat = Pattern(name, lineno, fmt, fixedbits, fixedmask,
  793. undefmask, fieldmask, flds, width)
  794. parent_pat.pats.append(pat)
  795. allpatterns.append(pat)
  796. # Validate the masks that we have assembled.
  797. if fieldmask & fixedmask:
  798. error(lineno, 'fieldmask overlaps fixedmask (0x{0:08x} & 0x{1:08x})'
  799. .format(fieldmask, fixedmask))
  800. if fieldmask & undefmask:
  801. error(lineno, 'fieldmask overlaps undefmask (0x{0:08x} & 0x{1:08x})'
  802. .format(fieldmask, undefmask))
  803. if fixedmask & undefmask:
  804. error(lineno, 'fixedmask overlaps undefmask (0x{0:08x} & 0x{1:08x})'
  805. .format(fixedmask, undefmask))
  806. if not is_format:
  807. allbits = fieldmask | fixedmask | undefmask
  808. if allbits != insnmask:
  809. error(lineno, 'bits left unspecified (0x{0:08x})'
  810. .format(allbits ^ insnmask))
  811. # end parse_general
  812. def parse_file(f, parent_pat):
  813. """Parse all of the patterns within a file"""
  814. global re_arg_ident
  815. global re_fld_ident
  816. global re_fmt_ident
  817. global re_pat_ident
  818. # Read all of the lines of the file. Concatenate lines
  819. # ending in backslash; discard empty lines and comments.
  820. toks = []
  821. lineno = 0
  822. nesting = 0
  823. nesting_pats = []
  824. for line in f:
  825. lineno += 1
  826. # Expand and strip spaces, to find indent.
  827. line = line.rstrip()
  828. line = line.expandtabs()
  829. len1 = len(line)
  830. line = line.lstrip()
  831. len2 = len(line)
  832. # Discard comments
  833. end = line.find('#')
  834. if end >= 0:
  835. line = line[:end]
  836. t = line.split()
  837. if len(toks) != 0:
  838. # Next line after continuation
  839. toks.extend(t)
  840. else:
  841. # Allow completely blank lines.
  842. if len1 == 0:
  843. continue
  844. indent = len1 - len2
  845. # Empty line due to comment.
  846. if len(t) == 0:
  847. # Indentation must be correct, even for comment lines.
  848. if indent != nesting:
  849. error(lineno, 'indentation ', indent, ' != ', nesting)
  850. continue
  851. start_lineno = lineno
  852. toks = t
  853. # Continuation?
  854. if toks[-1] == '\\':
  855. toks.pop()
  856. continue
  857. name = toks[0]
  858. del toks[0]
  859. # End nesting?
  860. if name == '}' or name == ']':
  861. if len(toks) != 0:
  862. error(start_lineno, 'extra tokens after close brace')
  863. # Make sure { } and [ ] nest properly.
  864. if (name == '}') != isinstance(parent_pat, IncMultiPattern):
  865. error(lineno, 'mismatched close brace')
  866. try:
  867. parent_pat = nesting_pats.pop()
  868. except:
  869. error(lineno, 'extra close brace')
  870. nesting -= 2
  871. if indent != nesting:
  872. error(lineno, 'indentation ', indent, ' != ', nesting)
  873. toks = []
  874. continue
  875. # Everything else should have current indentation.
  876. if indent != nesting:
  877. error(start_lineno, 'indentation ', indent, ' != ', nesting)
  878. # Start nesting?
  879. if name == '{' or name == '[':
  880. if len(toks) != 0:
  881. error(start_lineno, 'extra tokens after open brace')
  882. if name == '{':
  883. nested_pat = IncMultiPattern(start_lineno)
  884. else:
  885. nested_pat = ExcMultiPattern(start_lineno)
  886. parent_pat.pats.append(nested_pat)
  887. nesting_pats.append(parent_pat)
  888. parent_pat = nested_pat
  889. nesting += 2
  890. toks = []
  891. continue
  892. # Determine the type of object needing to be parsed.
  893. if re.fullmatch(re_fld_ident, name):
  894. parse_field(start_lineno, name[1:], toks)
  895. elif re.fullmatch(re_arg_ident, name):
  896. parse_arguments(start_lineno, name[1:], toks)
  897. elif re.fullmatch(re_fmt_ident, name):
  898. parse_generic(start_lineno, None, name[1:], toks)
  899. elif re.fullmatch(re_pat_ident, name):
  900. parse_generic(start_lineno, parent_pat, name, toks)
  901. else:
  902. error(lineno, 'invalid token "{0}"'.format(name))
  903. toks = []
  904. if nesting != 0:
  905. error(lineno, 'missing close brace')
  906. # end parse_file
  907. class SizeTree:
  908. """Class representing a node in a size decode tree"""
  909. def __init__(self, m, w):
  910. self.mask = m
  911. self.subs = []
  912. self.base = None
  913. self.width = w
  914. def str1(self, i):
  915. ind = str_indent(i)
  916. r = '{0}{1:08x}'.format(ind, self.mask)
  917. r += ' [\n'
  918. for (b, s) in self.subs:
  919. r += '{0} {1:08x}:\n'.format(ind, b)
  920. r += s.str1(i + 4) + '\n'
  921. r += ind + ']'
  922. return r
  923. def __str__(self):
  924. return self.str1(0)
  925. def output_code(self, i, extracted, outerbits, outermask):
  926. ind = str_indent(i)
  927. # If we need to load more bytes to test, do so now.
  928. if extracted < self.width:
  929. output(ind, 'insn = ', decode_function,
  930. '_load_bytes(ctx, insn, {0}, {1});\n'
  931. .format(extracted // 8, self.width // 8));
  932. extracted = self.width
  933. # Attempt to aid the compiler in producing compact switch statements.
  934. # If the bits in the mask are contiguous, extract them.
  935. sh = is_contiguous(self.mask)
  936. if sh > 0:
  937. # Propagate SH down into the local functions.
  938. def str_switch(b, sh=sh):
  939. return '(insn >> {0}) & 0x{1:x}'.format(sh, b >> sh)
  940. def str_case(b, sh=sh):
  941. return '0x{0:x}'.format(b >> sh)
  942. else:
  943. def str_switch(b):
  944. return 'insn & 0x{0:08x}'.format(b)
  945. def str_case(b):
  946. return '0x{0:08x}'.format(b)
  947. output(ind, 'switch (', str_switch(self.mask), ') {\n')
  948. for b, s in sorted(self.subs):
  949. innermask = outermask | self.mask
  950. innerbits = outerbits | b
  951. output(ind, 'case ', str_case(b), ':\n')
  952. output(ind, ' /* ',
  953. str_match_bits(innerbits, innermask), ' */\n')
  954. s.output_code(i + 4, extracted, innerbits, innermask)
  955. output(ind, '}\n')
  956. output(ind, 'return insn;\n')
  957. # end SizeTree
  958. class SizeLeaf:
  959. """Class representing a leaf node in a size decode tree"""
  960. def __init__(self, m, w):
  961. self.mask = m
  962. self.width = w
  963. def str1(self, i):
  964. ind = str_indent(i)
  965. return '{0}{1:08x}'.format(ind, self.mask)
  966. def __str__(self):
  967. return self.str1(0)
  968. def output_code(self, i, extracted, outerbits, outermask):
  969. global decode_function
  970. ind = str_indent(i)
  971. # If we need to load more bytes, do so now.
  972. if extracted < self.width:
  973. output(ind, 'insn = ', decode_function,
  974. '_load_bytes(ctx, insn, {0}, {1});\n'
  975. .format(extracted // 8, self.width // 8));
  976. extracted = self.width
  977. output(ind, 'return insn;\n')
  978. # end SizeLeaf
  979. def build_size_tree(pats, width, outerbits, outermask):
  980. global insnwidth
  981. # Collect the mask of bits that are fixed in this width
  982. innermask = 0xff << (insnwidth - width)
  983. innermask &= ~outermask
  984. minwidth = None
  985. onewidth = True
  986. for i in pats:
  987. innermask &= i.fixedmask
  988. if minwidth is None:
  989. minwidth = i.width
  990. elif minwidth != i.width:
  991. onewidth = False;
  992. if minwidth < i.width:
  993. minwidth = i.width
  994. if onewidth:
  995. return SizeLeaf(innermask, minwidth)
  996. if innermask == 0:
  997. if width < minwidth:
  998. return build_size_tree(pats, width + 8, outerbits, outermask)
  999. pnames = []
  1000. for p in pats:
  1001. pnames.append(p.name + ':' + p.file + ':' + str(p.lineno))
  1002. error_with_file(pats[0].file, pats[0].lineno,
  1003. 'overlapping patterns size {0}:'.format(width), pnames)
  1004. bins = {}
  1005. for i in pats:
  1006. fb = i.fixedbits & innermask
  1007. if fb in bins:
  1008. bins[fb].append(i)
  1009. else:
  1010. bins[fb] = [i]
  1011. fullmask = outermask | innermask
  1012. lens = sorted(bins.keys())
  1013. if len(lens) == 1:
  1014. b = lens[0]
  1015. return build_size_tree(bins[b], width + 8, b | outerbits, fullmask)
  1016. r = SizeTree(innermask, width)
  1017. for b, l in bins.items():
  1018. s = build_size_tree(l, width, b | outerbits, fullmask)
  1019. r.subs.append((b, s))
  1020. return r
  1021. # end build_size_tree
  1022. def prop_size(tree):
  1023. """Propagate minimum widths up the decode size tree"""
  1024. if isinstance(tree, SizeTree):
  1025. min = None
  1026. for (b, s) in tree.subs:
  1027. width = prop_size(s)
  1028. if min is None or min > width:
  1029. min = width
  1030. assert min >= tree.width
  1031. tree.width = min
  1032. else:
  1033. min = tree.width
  1034. return min
  1035. # end prop_size
  1036. def main():
  1037. global arguments
  1038. global formats
  1039. global allpatterns
  1040. global translate_scope
  1041. global translate_prefix
  1042. global output_fd
  1043. global output_file
  1044. global input_file
  1045. global insnwidth
  1046. global insntype
  1047. global insnmask
  1048. global decode_function
  1049. global variablewidth
  1050. global anyextern
  1051. decode_scope = 'static '
  1052. long_opts = ['decode=', 'translate=', 'output=', 'insnwidth=',
  1053. 'static-decode=', 'varinsnwidth=']
  1054. try:
  1055. (opts, args) = getopt.gnu_getopt(sys.argv[1:], 'o:vw:', long_opts)
  1056. except getopt.GetoptError as err:
  1057. error(0, err)
  1058. for o, a in opts:
  1059. if o in ('-o', '--output'):
  1060. output_file = a
  1061. elif o == '--decode':
  1062. decode_function = a
  1063. decode_scope = ''
  1064. elif o == '--static-decode':
  1065. decode_function = a
  1066. elif o == '--translate':
  1067. translate_prefix = a
  1068. translate_scope = ''
  1069. elif o in ('-w', '--insnwidth', '--varinsnwidth'):
  1070. if o == '--varinsnwidth':
  1071. variablewidth = True
  1072. insnwidth = int(a)
  1073. if insnwidth == 16:
  1074. insntype = 'uint16_t'
  1075. insnmask = 0xffff
  1076. elif insnwidth != 32:
  1077. error(0, 'cannot handle insns of width', insnwidth)
  1078. else:
  1079. assert False, 'unhandled option'
  1080. if len(args) < 1:
  1081. error(0, 'missing input file')
  1082. toppat = ExcMultiPattern(0)
  1083. for filename in args:
  1084. input_file = filename
  1085. f = open(filename, 'r')
  1086. parse_file(f, toppat)
  1087. f.close()
  1088. # We do not want to compute masks for toppat, because those masks
  1089. # are used as a starting point for build_tree. For toppat, we must
  1090. # insist that decode begins from naught.
  1091. for i in toppat.pats:
  1092. i.prop_masks()
  1093. toppat.build_tree()
  1094. toppat.prop_format()
  1095. if variablewidth:
  1096. for i in toppat.pats:
  1097. i.prop_width()
  1098. stree = build_size_tree(toppat.pats, 8, 0, 0)
  1099. prop_size(stree)
  1100. if output_file:
  1101. output_fd = open(output_file, 'w')
  1102. else:
  1103. output_fd = sys.stdout
  1104. output_autogen()
  1105. for n in sorted(arguments.keys()):
  1106. f = arguments[n]
  1107. f.output_def()
  1108. # A single translate function can be invoked for different patterns.
  1109. # Make sure that the argument sets are the same, and declare the
  1110. # function only once.
  1111. #
  1112. # If we're sharing formats, we're likely also sharing trans_* functions,
  1113. # but we can't tell which ones. Prevent issues from the compiler by
  1114. # suppressing redundant declaration warnings.
  1115. if anyextern:
  1116. output("#pragma GCC diagnostic push\n",
  1117. "#pragma GCC diagnostic ignored \"-Wredundant-decls\"\n",
  1118. "#ifdef __clang__\n"
  1119. "# pragma GCC diagnostic ignored \"-Wtypedef-redefinition\"\n",
  1120. "#endif\n\n")
  1121. out_pats = {}
  1122. for i in allpatterns:
  1123. if i.name in out_pats:
  1124. p = out_pats[i.name]
  1125. if i.base.base != p.base.base:
  1126. error(0, i.name, ' has conflicting argument sets')
  1127. else:
  1128. i.output_decl()
  1129. out_pats[i.name] = i
  1130. output('\n')
  1131. if anyextern:
  1132. output("#pragma GCC diagnostic pop\n\n")
  1133. for n in sorted(formats.keys()):
  1134. f = formats[n]
  1135. f.output_extract()
  1136. output(decode_scope, 'bool ', decode_function,
  1137. '(DisasContext *ctx, ', insntype, ' insn)\n{\n')
  1138. i4 = str_indent(4)
  1139. if len(allpatterns) != 0:
  1140. output(i4, 'union {\n')
  1141. for n in sorted(arguments.keys()):
  1142. f = arguments[n]
  1143. output(i4, i4, f.struct_name(), ' f_', f.name, ';\n')
  1144. output(i4, '} u;\n\n')
  1145. toppat.output_code(4, False, 0, 0)
  1146. output(i4, 'return false;\n')
  1147. output('}\n')
  1148. if variablewidth:
  1149. output('\n', decode_scope, insntype, ' ', decode_function,
  1150. '_load(DisasContext *ctx)\n{\n',
  1151. ' ', insntype, ' insn = 0;\n\n')
  1152. stree.output_code(4, 0, 0, 0)
  1153. output('}\n')
  1154. if output_file:
  1155. output_fd.close()
  1156. # end main
  1157. if __name__ == '__main__':
  1158. main()