decodetree.py 41 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424
  1. #!/usr/bin/env python3
  2. # Copyright (c) 2018 Linaro Limited
  3. #
  4. # This library is free software; you can redistribute it and/or
  5. # modify it under the terms of the GNU Lesser General Public
  6. # License as published by the Free Software Foundation; either
  7. # version 2.1 of the License, or (at your option) any later version.
  8. #
  9. # This library is distributed in the hope that it will be useful,
  10. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. # Lesser General Public License for more details.
  13. #
  14. # You should have received a copy of the GNU Lesser General Public
  15. # License along with this library; if not, see <http://www.gnu.org/licenses/>.
  16. #
  17. #
  18. # Generate a decoding tree from a specification file.
  19. # See the syntax and semantics in docs/devel/decodetree.rst.
  20. #
  21. import io
  22. import os
  23. import re
  24. import sys
  25. import getopt
  26. insnwidth = 32
  27. bitop_width = 32
  28. insnmask = 0xffffffff
  29. variablewidth = False
  30. fields = {}
  31. arguments = {}
  32. formats = {}
  33. allpatterns = []
  34. anyextern = False
  35. translate_prefix = 'trans'
  36. translate_scope = 'static '
  37. input_file = ''
  38. output_file = None
  39. output_fd = None
  40. insntype = 'uint32_t'
  41. decode_function = 'decode'
  42. # An identifier for C.
  43. re_C_ident = '[a-zA-Z][a-zA-Z0-9_]*'
  44. # Identifiers for Arguments, Fields, Formats and Patterns.
  45. re_arg_ident = '&[a-zA-Z0-9_]*'
  46. re_fld_ident = '%[a-zA-Z0-9_]*'
  47. re_fmt_ident = '@[a-zA-Z0-9_]*'
  48. re_pat_ident = '[a-zA-Z0-9_]*'
  49. def error_with_file(file, lineno, *args):
  50. """Print an error message from file:line and args and exit."""
  51. global output_file
  52. global output_fd
  53. prefix = ''
  54. if file:
  55. prefix += f'{file}:'
  56. if lineno:
  57. prefix += f'{lineno}:'
  58. if prefix:
  59. prefix += ' '
  60. print(prefix, end='error: ', file=sys.stderr)
  61. print(*args, file=sys.stderr)
  62. if output_file and output_fd:
  63. output_fd.close()
  64. os.remove(output_file)
  65. exit(1)
  66. # end error_with_file
  67. def error(lineno, *args):
  68. error_with_file(input_file, lineno, *args)
  69. # end error
  70. def output(*args):
  71. global output_fd
  72. for a in args:
  73. output_fd.write(a)
  74. def output_autogen():
  75. output('/* This file is autogenerated by scripts/decodetree.py. */\n\n')
  76. def str_indent(c):
  77. """Return a string with C spaces"""
  78. return ' ' * c
  79. def str_fields(fields):
  80. """Return a string uniquely identifying FIELDS"""
  81. r = ''
  82. for n in sorted(fields.keys()):
  83. r += '_' + n
  84. return r[1:]
  85. def whex(val):
  86. """Return a hex string for val padded for insnwidth"""
  87. global insnwidth
  88. return f'0x{val:0{insnwidth // 4}x}'
  89. def whexC(val):
  90. """Return a hex string for val padded for insnwidth,
  91. and with the proper suffix for a C constant."""
  92. suffix = ''
  93. if val >= 0x100000000:
  94. suffix = 'ull'
  95. elif val >= 0x80000000:
  96. suffix = 'u'
  97. return whex(val) + suffix
  98. def str_match_bits(bits, mask):
  99. """Return a string pretty-printing BITS/MASK"""
  100. global insnwidth
  101. i = 1 << (insnwidth - 1)
  102. space = 0x01010100
  103. r = ''
  104. while i != 0:
  105. if i & mask:
  106. if i & bits:
  107. r += '1'
  108. else:
  109. r += '0'
  110. else:
  111. r += '.'
  112. if i & space:
  113. r += ' '
  114. i >>= 1
  115. return r
  116. def is_pow2(x):
  117. """Return true iff X is equal to a power of 2."""
  118. return (x & (x - 1)) == 0
  119. def ctz(x):
  120. """Return the number of times 2 factors into X."""
  121. assert x != 0
  122. r = 0
  123. while ((x >> r) & 1) == 0:
  124. r += 1
  125. return r
  126. def is_contiguous(bits):
  127. if bits == 0:
  128. return -1
  129. shift = ctz(bits)
  130. if is_pow2((bits >> shift) + 1):
  131. return shift
  132. else:
  133. return -1
  134. def eq_fields_for_args(flds_a, arg):
  135. if len(flds_a) != len(arg.fields):
  136. return False
  137. # Only allow inference on default types
  138. for t in arg.types:
  139. if t != 'int':
  140. return False
  141. for k, a in flds_a.items():
  142. if k not in arg.fields:
  143. return False
  144. return True
  145. def eq_fields_for_fmts(flds_a, flds_b):
  146. if len(flds_a) != len(flds_b):
  147. return False
  148. for k, a in flds_a.items():
  149. if k not in flds_b:
  150. return False
  151. b = flds_b[k]
  152. if a.__class__ != b.__class__ or a != b:
  153. return False
  154. return True
  155. class Field:
  156. """Class representing a simple instruction field"""
  157. def __init__(self, sign, pos, len):
  158. self.sign = sign
  159. self.pos = pos
  160. self.len = len
  161. self.mask = ((1 << len) - 1) << pos
  162. def __str__(self):
  163. if self.sign:
  164. s = 's'
  165. else:
  166. s = ''
  167. return str(self.pos) + ':' + s + str(self.len)
  168. def str_extract(self):
  169. global bitop_width
  170. s = 's' if self.sign else ''
  171. return f'{s}extract{bitop_width}(insn, {self.pos}, {self.len})'
  172. def __eq__(self, other):
  173. return self.sign == other.sign and self.mask == other.mask
  174. def __ne__(self, other):
  175. return not self.__eq__(other)
  176. # end Field
  177. class MultiField:
  178. """Class representing a compound instruction field"""
  179. def __init__(self, subs, mask):
  180. self.subs = subs
  181. self.sign = subs[0].sign
  182. self.mask = mask
  183. def __str__(self):
  184. return str(self.subs)
  185. def str_extract(self):
  186. global bitop_width
  187. ret = '0'
  188. pos = 0
  189. for f in reversed(self.subs):
  190. ext = f.str_extract()
  191. if pos == 0:
  192. ret = ext
  193. else:
  194. ret = f'deposit{bitop_width}({ret}, {pos}, {bitop_width - pos}, {ext})'
  195. pos += f.len
  196. return ret
  197. def __ne__(self, other):
  198. if len(self.subs) != len(other.subs):
  199. return True
  200. for a, b in zip(self.subs, other.subs):
  201. if a.__class__ != b.__class__ or a != b:
  202. return True
  203. return False
  204. def __eq__(self, other):
  205. return not self.__ne__(other)
  206. # end MultiField
  207. class ConstField:
  208. """Class representing an argument field with constant value"""
  209. def __init__(self, value):
  210. self.value = value
  211. self.mask = 0
  212. self.sign = value < 0
  213. def __str__(self):
  214. return str(self.value)
  215. def str_extract(self):
  216. return str(self.value)
  217. def __cmp__(self, other):
  218. return self.value - other.value
  219. # end ConstField
  220. class FunctionField:
  221. """Class representing a field passed through a function"""
  222. def __init__(self, func, base):
  223. self.mask = base.mask
  224. self.sign = base.sign
  225. self.base = base
  226. self.func = func
  227. def __str__(self):
  228. return self.func + '(' + str(self.base) + ')'
  229. def str_extract(self):
  230. return self.func + '(ctx, ' + self.base.str_extract() + ')'
  231. def __eq__(self, other):
  232. return self.func == other.func and self.base == other.base
  233. def __ne__(self, other):
  234. return not self.__eq__(other)
  235. # end FunctionField
  236. class ParameterField:
  237. """Class representing a pseudo-field read from a function"""
  238. def __init__(self, func):
  239. self.mask = 0
  240. self.sign = 0
  241. self.func = func
  242. def __str__(self):
  243. return self.func
  244. def str_extract(self):
  245. return self.func + '(ctx)'
  246. def __eq__(self, other):
  247. return self.func == other.func
  248. def __ne__(self, other):
  249. return not self.__eq__(other)
  250. # end ParameterField
  251. class Arguments:
  252. """Class representing the extracted fields of a format"""
  253. def __init__(self, nm, flds, types, extern):
  254. self.name = nm
  255. self.extern = extern
  256. self.fields = flds
  257. self.types = types
  258. def __str__(self):
  259. return self.name + ' ' + str(self.fields)
  260. def struct_name(self):
  261. return 'arg_' + self.name
  262. def output_def(self):
  263. if not self.extern:
  264. output('typedef struct {\n')
  265. for (n, t) in zip(self.fields, self.types):
  266. output(f' {t} {n};\n')
  267. output('} ', self.struct_name(), ';\n\n')
  268. # end Arguments
  269. class General:
  270. """Common code between instruction formats and instruction patterns"""
  271. def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w):
  272. self.name = name
  273. self.file = input_file
  274. self.lineno = lineno
  275. self.base = base
  276. self.fixedbits = fixb
  277. self.fixedmask = fixm
  278. self.undefmask = udfm
  279. self.fieldmask = fldm
  280. self.fields = flds
  281. self.width = w
  282. def __str__(self):
  283. return self.name + ' ' + str_match_bits(self.fixedbits, self.fixedmask)
  284. def str1(self, i):
  285. return str_indent(i) + self.__str__()
  286. # end General
  287. class Format(General):
  288. """Class representing an instruction format"""
  289. def extract_name(self):
  290. global decode_function
  291. return decode_function + '_extract_' + self.name
  292. def output_extract(self):
  293. output('static void ', self.extract_name(), '(DisasContext *ctx, ',
  294. self.base.struct_name(), ' *a, ', insntype, ' insn)\n{\n')
  295. for n, f in self.fields.items():
  296. output(' a->', n, ' = ', f.str_extract(), ';\n')
  297. output('}\n\n')
  298. # end Format
  299. class Pattern(General):
  300. """Class representing an instruction pattern"""
  301. def output_decl(self):
  302. global translate_scope
  303. global translate_prefix
  304. output('typedef ', self.base.base.struct_name(),
  305. ' arg_', self.name, ';\n')
  306. output(translate_scope, 'bool ', translate_prefix, '_', self.name,
  307. '(DisasContext *ctx, arg_', self.name, ' *a);\n')
  308. def output_code(self, i, extracted, outerbits, outermask):
  309. global translate_prefix
  310. ind = str_indent(i)
  311. arg = self.base.base.name
  312. output(ind, '/* ', self.file, ':', str(self.lineno), ' */\n')
  313. if not extracted:
  314. output(ind, self.base.extract_name(),
  315. '(ctx, &u.f_', arg, ', insn);\n')
  316. for n, f in self.fields.items():
  317. output(ind, 'u.f_', arg, '.', n, ' = ', f.str_extract(), ';\n')
  318. output(ind, 'if (', translate_prefix, '_', self.name,
  319. '(ctx, &u.f_', arg, ')) return true;\n')
  320. # Normal patterns do not have children.
  321. def build_tree(self):
  322. return
  323. def prop_masks(self):
  324. return
  325. def prop_format(self):
  326. return
  327. def prop_width(self):
  328. return
  329. # end Pattern
  330. class MultiPattern(General):
  331. """Class representing a set of instruction patterns"""
  332. def __init__(self, lineno):
  333. self.file = input_file
  334. self.lineno = lineno
  335. self.pats = []
  336. self.base = None
  337. self.fixedbits = 0
  338. self.fixedmask = 0
  339. self.undefmask = 0
  340. self.width = None
  341. def __str__(self):
  342. r = 'group'
  343. if self.fixedbits is not None:
  344. r += ' ' + str_match_bits(self.fixedbits, self.fixedmask)
  345. return r
  346. def output_decl(self):
  347. for p in self.pats:
  348. p.output_decl()
  349. def prop_masks(self):
  350. global insnmask
  351. fixedmask = insnmask
  352. undefmask = insnmask
  353. # Collect fixedmask/undefmask for all of the children.
  354. for p in self.pats:
  355. p.prop_masks()
  356. fixedmask &= p.fixedmask
  357. undefmask &= p.undefmask
  358. # Widen fixedmask until all fixedbits match
  359. repeat = True
  360. fixedbits = 0
  361. while repeat and fixedmask != 0:
  362. fixedbits = None
  363. for p in self.pats:
  364. thisbits = p.fixedbits & fixedmask
  365. if fixedbits is None:
  366. fixedbits = thisbits
  367. elif fixedbits != thisbits:
  368. fixedmask &= ~(fixedbits ^ thisbits)
  369. break
  370. else:
  371. repeat = False
  372. self.fixedbits = fixedbits
  373. self.fixedmask = fixedmask
  374. self.undefmask = undefmask
  375. def build_tree(self):
  376. for p in self.pats:
  377. p.build_tree()
  378. def prop_format(self):
  379. for p in self.pats:
  380. p.build_tree()
  381. def prop_width(self):
  382. width = None
  383. for p in self.pats:
  384. p.prop_width()
  385. if width is None:
  386. width = p.width
  387. elif width != p.width:
  388. error_with_file(self.file, self.lineno,
  389. 'width mismatch in patterns within braces')
  390. self.width = width
  391. # end MultiPattern
  392. class IncMultiPattern(MultiPattern):
  393. """Class representing an overlapping set of instruction patterns"""
  394. def output_code(self, i, extracted, outerbits, outermask):
  395. global translate_prefix
  396. ind = str_indent(i)
  397. for p in self.pats:
  398. if outermask != p.fixedmask:
  399. innermask = p.fixedmask & ~outermask
  400. innerbits = p.fixedbits & ~outermask
  401. output(ind, f'if ((insn & {whexC(innermask)}) == {whexC(innerbits)}) {{\n')
  402. output(ind, f' /* {str_match_bits(p.fixedbits, p.fixedmask)} */\n')
  403. p.output_code(i + 4, extracted, p.fixedbits, p.fixedmask)
  404. output(ind, '}\n')
  405. else:
  406. p.output_code(i, extracted, p.fixedbits, p.fixedmask)
  407. #end IncMultiPattern
  408. class Tree:
  409. """Class representing a node in a decode tree"""
  410. def __init__(self, fm, tm):
  411. self.fixedmask = fm
  412. self.thismask = tm
  413. self.subs = []
  414. self.base = None
  415. def str1(self, i):
  416. ind = str_indent(i)
  417. r = ind + whex(self.fixedmask)
  418. if self.format:
  419. r += ' ' + self.format.name
  420. r += ' [\n'
  421. for (b, s) in self.subs:
  422. r += ind + f' {whex(b)}:\n'
  423. r += s.str1(i + 4) + '\n'
  424. r += ind + ']'
  425. return r
  426. def __str__(self):
  427. return self.str1(0)
  428. def output_code(self, i, extracted, outerbits, outermask):
  429. ind = str_indent(i)
  430. # If we identified all nodes below have the same format,
  431. # extract the fields now.
  432. if not extracted and self.base:
  433. output(ind, self.base.extract_name(),
  434. '(ctx, &u.f_', self.base.base.name, ', insn);\n')
  435. extracted = True
  436. # Attempt to aid the compiler in producing compact switch statements.
  437. # If the bits in the mask are contiguous, extract them.
  438. sh = is_contiguous(self.thismask)
  439. if sh > 0:
  440. # Propagate SH down into the local functions.
  441. def str_switch(b, sh=sh):
  442. return f'(insn >> {sh}) & {b >> sh:#x}'
  443. def str_case(b, sh=sh):
  444. return hex(b >> sh)
  445. else:
  446. def str_switch(b):
  447. return f'insn & {whexC(b)}'
  448. def str_case(b):
  449. return whexC(b)
  450. output(ind, 'switch (', str_switch(self.thismask), ') {\n')
  451. for b, s in sorted(self.subs):
  452. assert (self.thismask & ~s.fixedmask) == 0
  453. innermask = outermask | self.thismask
  454. innerbits = outerbits | b
  455. output(ind, 'case ', str_case(b), ':\n')
  456. output(ind, ' /* ',
  457. str_match_bits(innerbits, innermask), ' */\n')
  458. s.output_code(i + 4, extracted, innerbits, innermask)
  459. output(ind, ' break;\n')
  460. output(ind, '}\n')
  461. # end Tree
  462. class ExcMultiPattern(MultiPattern):
  463. """Class representing a non-overlapping set of instruction patterns"""
  464. def output_code(self, i, extracted, outerbits, outermask):
  465. # Defer everything to our decomposed Tree node
  466. self.tree.output_code(i, extracted, outerbits, outermask)
  467. @staticmethod
  468. def __build_tree(pats, outerbits, outermask):
  469. # Find the intersection of all remaining fixedmask.
  470. innermask = ~outermask & insnmask
  471. for i in pats:
  472. innermask &= i.fixedmask
  473. if innermask == 0:
  474. # Edge condition: One pattern covers the entire insnmask
  475. if len(pats) == 1:
  476. t = Tree(outermask, innermask)
  477. t.subs.append((0, pats[0]))
  478. return t
  479. text = 'overlapping patterns:'
  480. for p in pats:
  481. text += '\n' + p.file + ':' + str(p.lineno) + ': ' + str(p)
  482. error_with_file(pats[0].file, pats[0].lineno, text)
  483. fullmask = outermask | innermask
  484. # Sort each element of pats into the bin selected by the mask.
  485. bins = {}
  486. for i in pats:
  487. fb = i.fixedbits & innermask
  488. if fb in bins:
  489. bins[fb].append(i)
  490. else:
  491. bins[fb] = [i]
  492. # We must recurse if any bin has more than one element or if
  493. # the single element in the bin has not been fully matched.
  494. t = Tree(fullmask, innermask)
  495. for b, l in bins.items():
  496. s = l[0]
  497. if len(l) > 1 or s.fixedmask & ~fullmask != 0:
  498. s = ExcMultiPattern.__build_tree(l, b | outerbits, fullmask)
  499. t.subs.append((b, s))
  500. return t
  501. def build_tree(self):
  502. super().prop_format()
  503. self.tree = self.__build_tree(self.pats, self.fixedbits,
  504. self.fixedmask)
  505. @staticmethod
  506. def __prop_format(tree):
  507. """Propagate Format objects into the decode tree"""
  508. # Depth first search.
  509. for (b, s) in tree.subs:
  510. if isinstance(s, Tree):
  511. ExcMultiPattern.__prop_format(s)
  512. # If all entries in SUBS have the same format, then
  513. # propagate that into the tree.
  514. f = None
  515. for (b, s) in tree.subs:
  516. if f is None:
  517. f = s.base
  518. if f is None:
  519. return
  520. if f is not s.base:
  521. return
  522. tree.base = f
  523. def prop_format(self):
  524. super().prop_format()
  525. self.__prop_format(self.tree)
  526. # end ExcMultiPattern
  527. def parse_field(lineno, name, toks):
  528. """Parse one instruction field from TOKS at LINENO"""
  529. global fields
  530. global insnwidth
  531. # A "simple" field will have only one entry;
  532. # a "multifield" will have several.
  533. subs = []
  534. width = 0
  535. func = None
  536. for t in toks:
  537. if re.match('^!function=', t):
  538. if func:
  539. error(lineno, 'duplicate function')
  540. func = t.split('=')
  541. func = func[1]
  542. continue
  543. if re.fullmatch('[0-9]+:s[0-9]+', t):
  544. # Signed field extract
  545. subtoks = t.split(':s')
  546. sign = True
  547. elif re.fullmatch('[0-9]+:[0-9]+', t):
  548. # Unsigned field extract
  549. subtoks = t.split(':')
  550. sign = False
  551. else:
  552. error(lineno, f'invalid field token "{t}"')
  553. po = int(subtoks[0])
  554. le = int(subtoks[1])
  555. if po + le > insnwidth:
  556. error(lineno, f'field {t} too large')
  557. f = Field(sign, po, le)
  558. subs.append(f)
  559. width += le
  560. if width > insnwidth:
  561. error(lineno, 'field too large')
  562. if len(subs) == 0:
  563. if func:
  564. f = ParameterField(func)
  565. else:
  566. error(lineno, 'field with no value')
  567. else:
  568. if len(subs) == 1:
  569. f = subs[0]
  570. else:
  571. mask = 0
  572. for s in subs:
  573. if mask & s.mask:
  574. error(lineno, 'field components overlap')
  575. mask |= s.mask
  576. f = MultiField(subs, mask)
  577. if func:
  578. f = FunctionField(func, f)
  579. if name in fields:
  580. error(lineno, 'duplicate field', name)
  581. fields[name] = f
  582. # end parse_field
  583. def parse_arguments(lineno, name, toks):
  584. """Parse one argument set from TOKS at LINENO"""
  585. global arguments
  586. global re_C_ident
  587. global anyextern
  588. flds = []
  589. types = []
  590. extern = False
  591. for n in toks:
  592. if re.fullmatch('!extern', n):
  593. extern = True
  594. anyextern = True
  595. continue
  596. if re.fullmatch(re_C_ident + ':' + re_C_ident, n):
  597. (n, t) = n.split(':')
  598. elif re.fullmatch(re_C_ident, n):
  599. t = 'int'
  600. else:
  601. error(lineno, f'invalid argument set token "{n}"')
  602. if n in flds:
  603. error(lineno, f'duplicate argument "{n}"')
  604. flds.append(n)
  605. types.append(t)
  606. if name in arguments:
  607. error(lineno, 'duplicate argument set', name)
  608. arguments[name] = Arguments(name, flds, types, extern)
  609. # end parse_arguments
  610. def lookup_field(lineno, name):
  611. global fields
  612. if name in fields:
  613. return fields[name]
  614. error(lineno, 'undefined field', name)
  615. def add_field(lineno, flds, new_name, f):
  616. if new_name in flds:
  617. error(lineno, 'duplicate field', new_name)
  618. flds[new_name] = f
  619. return flds
  620. def add_field_byname(lineno, flds, new_name, old_name):
  621. return add_field(lineno, flds, new_name, lookup_field(lineno, old_name))
  622. def infer_argument_set(flds):
  623. global arguments
  624. global decode_function
  625. for arg in arguments.values():
  626. if eq_fields_for_args(flds, arg):
  627. return arg
  628. name = decode_function + str(len(arguments))
  629. arg = Arguments(name, flds.keys(), ['int'] * len(flds), False)
  630. arguments[name] = arg
  631. return arg
  632. def infer_format(arg, fieldmask, flds, width):
  633. global arguments
  634. global formats
  635. global decode_function
  636. const_flds = {}
  637. var_flds = {}
  638. for n, c in flds.items():
  639. if c is ConstField:
  640. const_flds[n] = c
  641. else:
  642. var_flds[n] = c
  643. # Look for an existing format with the same argument set and fields
  644. for fmt in formats.values():
  645. if arg and fmt.base != arg:
  646. continue
  647. if fieldmask != fmt.fieldmask:
  648. continue
  649. if width != fmt.width:
  650. continue
  651. if not eq_fields_for_fmts(flds, fmt.fields):
  652. continue
  653. return (fmt, const_flds)
  654. name = decode_function + '_Fmt_' + str(len(formats))
  655. if not arg:
  656. arg = infer_argument_set(flds)
  657. fmt = Format(name, 0, arg, 0, 0, 0, fieldmask, var_flds, width)
  658. formats[name] = fmt
  659. return (fmt, const_flds)
  660. # end infer_format
  661. def parse_generic(lineno, parent_pat, name, toks):
  662. """Parse one instruction format from TOKS at LINENO"""
  663. global fields
  664. global arguments
  665. global formats
  666. global allpatterns
  667. global re_arg_ident
  668. global re_fld_ident
  669. global re_fmt_ident
  670. global re_C_ident
  671. global insnwidth
  672. global insnmask
  673. global variablewidth
  674. is_format = parent_pat is None
  675. fixedmask = 0
  676. fixedbits = 0
  677. undefmask = 0
  678. width = 0
  679. flds = {}
  680. arg = None
  681. fmt = None
  682. for t in toks:
  683. # '&Foo' gives a format an explicit argument set.
  684. if re.fullmatch(re_arg_ident, t):
  685. tt = t[1:]
  686. if arg:
  687. error(lineno, 'multiple argument sets')
  688. if tt in arguments:
  689. arg = arguments[tt]
  690. else:
  691. error(lineno, 'undefined argument set', t)
  692. continue
  693. # '@Foo' gives a pattern an explicit format.
  694. if re.fullmatch(re_fmt_ident, t):
  695. tt = t[1:]
  696. if fmt:
  697. error(lineno, 'multiple formats')
  698. if tt in formats:
  699. fmt = formats[tt]
  700. else:
  701. error(lineno, 'undefined format', t)
  702. continue
  703. # '%Foo' imports a field.
  704. if re.fullmatch(re_fld_ident, t):
  705. tt = t[1:]
  706. flds = add_field_byname(lineno, flds, tt, tt)
  707. continue
  708. # 'Foo=%Bar' imports a field with a different name.
  709. if re.fullmatch(re_C_ident + '=' + re_fld_ident, t):
  710. (fname, iname) = t.split('=%')
  711. flds = add_field_byname(lineno, flds, fname, iname)
  712. continue
  713. # 'Foo=number' sets an argument field to a constant value
  714. if re.fullmatch(re_C_ident + '=[+-]?[0-9]+', t):
  715. (fname, value) = t.split('=')
  716. value = int(value)
  717. flds = add_field(lineno, flds, fname, ConstField(value))
  718. continue
  719. # Pattern of 0s, 1s, dots and dashes indicate required zeros,
  720. # required ones, or dont-cares.
  721. if re.fullmatch('[01.-]+', t):
  722. shift = len(t)
  723. fms = t.replace('0', '1')
  724. fms = fms.replace('.', '0')
  725. fms = fms.replace('-', '0')
  726. fbs = t.replace('.', '0')
  727. fbs = fbs.replace('-', '0')
  728. ubm = t.replace('1', '0')
  729. ubm = ubm.replace('.', '0')
  730. ubm = ubm.replace('-', '1')
  731. fms = int(fms, 2)
  732. fbs = int(fbs, 2)
  733. ubm = int(ubm, 2)
  734. fixedbits = (fixedbits << shift) | fbs
  735. fixedmask = (fixedmask << shift) | fms
  736. undefmask = (undefmask << shift) | ubm
  737. # Otherwise, fieldname:fieldwidth
  738. elif re.fullmatch(re_C_ident + ':s?[0-9]+', t):
  739. (fname, flen) = t.split(':')
  740. sign = False
  741. if flen[0] == 's':
  742. sign = True
  743. flen = flen[1:]
  744. shift = int(flen, 10)
  745. if shift + width > insnwidth:
  746. error(lineno, f'field {fname} exceeds insnwidth')
  747. f = Field(sign, insnwidth - width - shift, shift)
  748. flds = add_field(lineno, flds, fname, f)
  749. fixedbits <<= shift
  750. fixedmask <<= shift
  751. undefmask <<= shift
  752. else:
  753. error(lineno, f'invalid token "{t}"')
  754. width += shift
  755. if variablewidth and width < insnwidth and width % 8 == 0:
  756. shift = insnwidth - width
  757. fixedbits <<= shift
  758. fixedmask <<= shift
  759. undefmask <<= shift
  760. undefmask |= (1 << shift) - 1
  761. # We should have filled in all of the bits of the instruction.
  762. elif not (is_format and width == 0) and width != insnwidth:
  763. error(lineno, f'definition has {width} bits')
  764. # Do not check for fields overlapping fields; one valid usage
  765. # is to be able to duplicate fields via import.
  766. fieldmask = 0
  767. for f in flds.values():
  768. fieldmask |= f.mask
  769. # Fix up what we've parsed to match either a format or a pattern.
  770. if is_format:
  771. # Formats cannot reference formats.
  772. if fmt:
  773. error(lineno, 'format referencing format')
  774. # If an argument set is given, then there should be no fields
  775. # without a place to store it.
  776. if arg:
  777. for f in flds.keys():
  778. if f not in arg.fields:
  779. error(lineno, f'field {f} not in argument set {arg.name}')
  780. else:
  781. arg = infer_argument_set(flds)
  782. if name in formats:
  783. error(lineno, 'duplicate format name', name)
  784. fmt = Format(name, lineno, arg, fixedbits, fixedmask,
  785. undefmask, fieldmask, flds, width)
  786. formats[name] = fmt
  787. else:
  788. # Patterns can reference a format ...
  789. if fmt:
  790. # ... but not an argument simultaneously
  791. if arg:
  792. error(lineno, 'pattern specifies both format and argument set')
  793. if fixedmask & fmt.fixedmask:
  794. error(lineno, 'pattern fixed bits overlap format fixed bits')
  795. if width != fmt.width:
  796. error(lineno, 'pattern uses format of different width')
  797. fieldmask |= fmt.fieldmask
  798. fixedbits |= fmt.fixedbits
  799. fixedmask |= fmt.fixedmask
  800. undefmask |= fmt.undefmask
  801. else:
  802. (fmt, flds) = infer_format(arg, fieldmask, flds, width)
  803. arg = fmt.base
  804. for f in flds.keys():
  805. if f not in arg.fields:
  806. error(lineno, f'field {f} not in argument set {arg.name}')
  807. if f in fmt.fields.keys():
  808. error(lineno, f'field {f} set by format and pattern')
  809. for f in arg.fields:
  810. if f not in flds.keys() and f not in fmt.fields.keys():
  811. error(lineno, f'field {f} not initialized')
  812. pat = Pattern(name, lineno, fmt, fixedbits, fixedmask,
  813. undefmask, fieldmask, flds, width)
  814. parent_pat.pats.append(pat)
  815. allpatterns.append(pat)
  816. # Validate the masks that we have assembled.
  817. if fieldmask & fixedmask:
  818. error(lineno, 'fieldmask overlaps fixedmask ',
  819. f'({whex(fieldmask)} & {whex(fixedmask)})')
  820. if fieldmask & undefmask:
  821. error(lineno, 'fieldmask overlaps undefmask ',
  822. f'({whex(fieldmask)} & {whex(undefmask)})')
  823. if fixedmask & undefmask:
  824. error(lineno, 'fixedmask overlaps undefmask ',
  825. f'({whex(fixedmask)} & {whex(undefmask)})')
  826. if not is_format:
  827. allbits = fieldmask | fixedmask | undefmask
  828. if allbits != insnmask:
  829. error(lineno, 'bits left unspecified ',
  830. f'({whex(allbits ^ insnmask)})')
  831. # end parse_general
  832. def parse_file(f, parent_pat):
  833. """Parse all of the patterns within a file"""
  834. global re_arg_ident
  835. global re_fld_ident
  836. global re_fmt_ident
  837. global re_pat_ident
  838. # Read all of the lines of the file. Concatenate lines
  839. # ending in backslash; discard empty lines and comments.
  840. toks = []
  841. lineno = 0
  842. nesting = 0
  843. nesting_pats = []
  844. for line in f:
  845. lineno += 1
  846. # Expand and strip spaces, to find indent.
  847. line = line.rstrip()
  848. line = line.expandtabs()
  849. len1 = len(line)
  850. line = line.lstrip()
  851. len2 = len(line)
  852. # Discard comments
  853. end = line.find('#')
  854. if end >= 0:
  855. line = line[:end]
  856. t = line.split()
  857. if len(toks) != 0:
  858. # Next line after continuation
  859. toks.extend(t)
  860. else:
  861. # Allow completely blank lines.
  862. if len1 == 0:
  863. continue
  864. indent = len1 - len2
  865. # Empty line due to comment.
  866. if len(t) == 0:
  867. # Indentation must be correct, even for comment lines.
  868. if indent != nesting:
  869. error(lineno, 'indentation ', indent, ' != ', nesting)
  870. continue
  871. start_lineno = lineno
  872. toks = t
  873. # Continuation?
  874. if toks[-1] == '\\':
  875. toks.pop()
  876. continue
  877. name = toks[0]
  878. del toks[0]
  879. # End nesting?
  880. if name == '}' or name == ']':
  881. if len(toks) != 0:
  882. error(start_lineno, 'extra tokens after close brace')
  883. # Make sure { } and [ ] nest properly.
  884. if (name == '}') != isinstance(parent_pat, IncMultiPattern):
  885. error(lineno, 'mismatched close brace')
  886. try:
  887. parent_pat = nesting_pats.pop()
  888. except:
  889. error(lineno, 'extra close brace')
  890. nesting -= 2
  891. if indent != nesting:
  892. error(lineno, 'indentation ', indent, ' != ', nesting)
  893. toks = []
  894. continue
  895. # Everything else should have current indentation.
  896. if indent != nesting:
  897. error(start_lineno, 'indentation ', indent, ' != ', nesting)
  898. # Start nesting?
  899. if name == '{' or name == '[':
  900. if len(toks) != 0:
  901. error(start_lineno, 'extra tokens after open brace')
  902. if name == '{':
  903. nested_pat = IncMultiPattern(start_lineno)
  904. else:
  905. nested_pat = ExcMultiPattern(start_lineno)
  906. parent_pat.pats.append(nested_pat)
  907. nesting_pats.append(parent_pat)
  908. parent_pat = nested_pat
  909. nesting += 2
  910. toks = []
  911. continue
  912. # Determine the type of object needing to be parsed.
  913. if re.fullmatch(re_fld_ident, name):
  914. parse_field(start_lineno, name[1:], toks)
  915. elif re.fullmatch(re_arg_ident, name):
  916. parse_arguments(start_lineno, name[1:], toks)
  917. elif re.fullmatch(re_fmt_ident, name):
  918. parse_generic(start_lineno, None, name[1:], toks)
  919. elif re.fullmatch(re_pat_ident, name):
  920. parse_generic(start_lineno, parent_pat, name, toks)
  921. else:
  922. error(lineno, f'invalid token "{name}"')
  923. toks = []
  924. if nesting != 0:
  925. error(lineno, 'missing close brace')
  926. # end parse_file
  927. class SizeTree:
  928. """Class representing a node in a size decode tree"""
  929. def __init__(self, m, w):
  930. self.mask = m
  931. self.subs = []
  932. self.base = None
  933. self.width = w
  934. def str1(self, i):
  935. ind = str_indent(i)
  936. r = ind + whex(self.mask) + ' [\n'
  937. for (b, s) in self.subs:
  938. r += ind + f' {whex(b)}:\n'
  939. r += s.str1(i + 4) + '\n'
  940. r += ind + ']'
  941. return r
  942. def __str__(self):
  943. return self.str1(0)
  944. def output_code(self, i, extracted, outerbits, outermask):
  945. ind = str_indent(i)
  946. # If we need to load more bytes to test, do so now.
  947. if extracted < self.width:
  948. output(ind, f'insn = {decode_function}_load_bytes',
  949. f'(ctx, insn, {extracted // 8}, {self.width // 8});\n')
  950. extracted = self.width
  951. # Attempt to aid the compiler in producing compact switch statements.
  952. # If the bits in the mask are contiguous, extract them.
  953. sh = is_contiguous(self.mask)
  954. if sh > 0:
  955. # Propagate SH down into the local functions.
  956. def str_switch(b, sh=sh):
  957. return f'(insn >> {sh}) & {b >> sh:#x}'
  958. def str_case(b, sh=sh):
  959. return hex(b >> sh)
  960. else:
  961. def str_switch(b):
  962. return f'insn & {whexC(b)}'
  963. def str_case(b):
  964. return whexC(b)
  965. output(ind, 'switch (', str_switch(self.mask), ') {\n')
  966. for b, s in sorted(self.subs):
  967. innermask = outermask | self.mask
  968. innerbits = outerbits | b
  969. output(ind, 'case ', str_case(b), ':\n')
  970. output(ind, ' /* ',
  971. str_match_bits(innerbits, innermask), ' */\n')
  972. s.output_code(i + 4, extracted, innerbits, innermask)
  973. output(ind, '}\n')
  974. output(ind, 'return insn;\n')
  975. # end SizeTree
  976. class SizeLeaf:
  977. """Class representing a leaf node in a size decode tree"""
  978. def __init__(self, m, w):
  979. self.mask = m
  980. self.width = w
  981. def str1(self, i):
  982. return str_indent(i) + whex(self.mask)
  983. def __str__(self):
  984. return self.str1(0)
  985. def output_code(self, i, extracted, outerbits, outermask):
  986. global decode_function
  987. ind = str_indent(i)
  988. # If we need to load more bytes, do so now.
  989. if extracted < self.width:
  990. output(ind, f'insn = {decode_function}_load_bytes',
  991. f'(ctx, insn, {extracted // 8}, {self.width // 8});\n')
  992. extracted = self.width
  993. output(ind, 'return insn;\n')
  994. # end SizeLeaf
  995. def build_size_tree(pats, width, outerbits, outermask):
  996. global insnwidth
  997. # Collect the mask of bits that are fixed in this width
  998. innermask = 0xff << (insnwidth - width)
  999. innermask &= ~outermask
  1000. minwidth = None
  1001. onewidth = True
  1002. for i in pats:
  1003. innermask &= i.fixedmask
  1004. if minwidth is None:
  1005. minwidth = i.width
  1006. elif minwidth != i.width:
  1007. onewidth = False;
  1008. if minwidth < i.width:
  1009. minwidth = i.width
  1010. if onewidth:
  1011. return SizeLeaf(innermask, minwidth)
  1012. if innermask == 0:
  1013. if width < minwidth:
  1014. return build_size_tree(pats, width + 8, outerbits, outermask)
  1015. pnames = []
  1016. for p in pats:
  1017. pnames.append(p.name + ':' + p.file + ':' + str(p.lineno))
  1018. error_with_file(pats[0].file, pats[0].lineno,
  1019. f'overlapping patterns size {width}:', pnames)
  1020. bins = {}
  1021. for i in pats:
  1022. fb = i.fixedbits & innermask
  1023. if fb in bins:
  1024. bins[fb].append(i)
  1025. else:
  1026. bins[fb] = [i]
  1027. fullmask = outermask | innermask
  1028. lens = sorted(bins.keys())
  1029. if len(lens) == 1:
  1030. b = lens[0]
  1031. return build_size_tree(bins[b], width + 8, b | outerbits, fullmask)
  1032. r = SizeTree(innermask, width)
  1033. for b, l in bins.items():
  1034. s = build_size_tree(l, width, b | outerbits, fullmask)
  1035. r.subs.append((b, s))
  1036. return r
  1037. # end build_size_tree
  1038. def prop_size(tree):
  1039. """Propagate minimum widths up the decode size tree"""
  1040. if isinstance(tree, SizeTree):
  1041. min = None
  1042. for (b, s) in tree.subs:
  1043. width = prop_size(s)
  1044. if min is None or min > width:
  1045. min = width
  1046. assert min >= tree.width
  1047. tree.width = min
  1048. else:
  1049. min = tree.width
  1050. return min
  1051. # end prop_size
  1052. def main():
  1053. global arguments
  1054. global formats
  1055. global allpatterns
  1056. global translate_scope
  1057. global translate_prefix
  1058. global output_fd
  1059. global output_file
  1060. global input_file
  1061. global insnwidth
  1062. global insntype
  1063. global insnmask
  1064. global decode_function
  1065. global bitop_width
  1066. global variablewidth
  1067. global anyextern
  1068. decode_scope = 'static '
  1069. long_opts = ['decode=', 'translate=', 'output=', 'insnwidth=',
  1070. 'static-decode=', 'varinsnwidth=']
  1071. try:
  1072. (opts, args) = getopt.gnu_getopt(sys.argv[1:], 'o:vw:', long_opts)
  1073. except getopt.GetoptError as err:
  1074. error(0, err)
  1075. for o, a in opts:
  1076. if o in ('-o', '--output'):
  1077. output_file = a
  1078. elif o == '--decode':
  1079. decode_function = a
  1080. decode_scope = ''
  1081. elif o == '--static-decode':
  1082. decode_function = a
  1083. elif o == '--translate':
  1084. translate_prefix = a
  1085. translate_scope = ''
  1086. elif o in ('-w', '--insnwidth', '--varinsnwidth'):
  1087. if o == '--varinsnwidth':
  1088. variablewidth = True
  1089. insnwidth = int(a)
  1090. if insnwidth == 16:
  1091. insntype = 'uint16_t'
  1092. insnmask = 0xffff
  1093. elif insnwidth == 64:
  1094. insntype = 'uint64_t'
  1095. insnmask = 0xffffffffffffffff
  1096. bitop_width = 64
  1097. elif insnwidth != 32:
  1098. error(0, 'cannot handle insns of width', insnwidth)
  1099. else:
  1100. assert False, 'unhandled option'
  1101. if len(args) < 1:
  1102. error(0, 'missing input file')
  1103. toppat = ExcMultiPattern(0)
  1104. for filename in args:
  1105. input_file = filename
  1106. f = open(filename, 'rt', encoding='utf-8')
  1107. parse_file(f, toppat)
  1108. f.close()
  1109. # We do not want to compute masks for toppat, because those masks
  1110. # are used as a starting point for build_tree. For toppat, we must
  1111. # insist that decode begins from naught.
  1112. for i in toppat.pats:
  1113. i.prop_masks()
  1114. toppat.build_tree()
  1115. toppat.prop_format()
  1116. if variablewidth:
  1117. for i in toppat.pats:
  1118. i.prop_width()
  1119. stree = build_size_tree(toppat.pats, 8, 0, 0)
  1120. prop_size(stree)
  1121. if output_file:
  1122. output_fd = open(output_file, 'wt', encoding='utf-8')
  1123. else:
  1124. output_fd = io.TextIOWrapper(sys.stdout.buffer,
  1125. encoding=sys.stdout.encoding,
  1126. errors="ignore")
  1127. output_autogen()
  1128. for n in sorted(arguments.keys()):
  1129. f = arguments[n]
  1130. f.output_def()
  1131. # A single translate function can be invoked for different patterns.
  1132. # Make sure that the argument sets are the same, and declare the
  1133. # function only once.
  1134. #
  1135. # If we're sharing formats, we're likely also sharing trans_* functions,
  1136. # but we can't tell which ones. Prevent issues from the compiler by
  1137. # suppressing redundant declaration warnings.
  1138. if anyextern:
  1139. output("#pragma GCC diagnostic push\n",
  1140. "#pragma GCC diagnostic ignored \"-Wredundant-decls\"\n",
  1141. "#ifdef __clang__\n"
  1142. "# pragma GCC diagnostic ignored \"-Wtypedef-redefinition\"\n",
  1143. "#endif\n\n")
  1144. out_pats = {}
  1145. for i in allpatterns:
  1146. if i.name in out_pats:
  1147. p = out_pats[i.name]
  1148. if i.base.base != p.base.base:
  1149. error(0, i.name, ' has conflicting argument sets')
  1150. else:
  1151. i.output_decl()
  1152. out_pats[i.name] = i
  1153. output('\n')
  1154. if anyextern:
  1155. output("#pragma GCC diagnostic pop\n\n")
  1156. for n in sorted(formats.keys()):
  1157. f = formats[n]
  1158. f.output_extract()
  1159. output(decode_scope, 'bool ', decode_function,
  1160. '(DisasContext *ctx, ', insntype, ' insn)\n{\n')
  1161. i4 = str_indent(4)
  1162. if len(allpatterns) != 0:
  1163. output(i4, 'union {\n')
  1164. for n in sorted(arguments.keys()):
  1165. f = arguments[n]
  1166. output(i4, i4, f.struct_name(), ' f_', f.name, ';\n')
  1167. output(i4, '} u;\n\n')
  1168. toppat.output_code(4, False, 0, 0)
  1169. output(i4, 'return false;\n')
  1170. output('}\n')
  1171. if variablewidth:
  1172. output('\n', decode_scope, insntype, ' ', decode_function,
  1173. '_load(DisasContext *ctx)\n{\n',
  1174. ' ', insntype, ' insn = 0;\n\n')
  1175. stree.output_code(4, 0, 0, 0)
  1176. output('}\n')
  1177. if output_file:
  1178. output_fd.close()
  1179. # end main
  1180. if __name__ == '__main__':
  1181. main()