decodetree.py 41 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444
  1. #!/usr/bin/env python3
  2. # Copyright (c) 2018 Linaro Limited
  3. #
  4. # This library is free software; you can redistribute it and/or
  5. # modify it under the terms of the GNU Lesser General Public
  6. # License as published by the Free Software Foundation; either
  7. # version 2.1 of the License, or (at your option) any later version.
  8. #
  9. # This library is distributed in the hope that it will be useful,
  10. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. # Lesser General Public License for more details.
  13. #
  14. # You should have received a copy of the GNU Lesser General Public
  15. # License along with this library; if not, see <http://www.gnu.org/licenses/>.
  16. #
  17. #
  18. # Generate a decoding tree from a specification file.
  19. # See the syntax and semantics in docs/devel/decodetree.rst.
  20. #
  21. import io
  22. import os
  23. import re
  24. import sys
  25. import getopt
  26. insnwidth = 32
  27. bitop_width = 32
  28. insnmask = 0xffffffff
  29. variablewidth = False
  30. fields = {}
  31. arguments = {}
  32. formats = {}
  33. allpatterns = []
  34. anyextern = False
  35. testforerror = False
  36. translate_prefix = 'trans'
  37. translate_scope = 'static '
  38. input_file = ''
  39. output_file = None
  40. output_fd = None
  41. insntype = 'uint32_t'
  42. decode_function = 'decode'
  43. # An identifier for C.
  44. re_C_ident = '[a-zA-Z][a-zA-Z0-9_]*'
  45. # Identifiers for Arguments, Fields, Formats and Patterns.
  46. re_arg_ident = '&[a-zA-Z0-9_]*'
  47. re_fld_ident = '%[a-zA-Z0-9_]*'
  48. re_fmt_ident = '@[a-zA-Z0-9_]*'
  49. re_pat_ident = '[a-zA-Z0-9_]*'
  50. def error_with_file(file, lineno, *args):
  51. """Print an error message from file:line and args and exit."""
  52. global output_file
  53. global output_fd
  54. prefix = ''
  55. if file:
  56. prefix += f'{file}:'
  57. if lineno:
  58. prefix += f'{lineno}:'
  59. if prefix:
  60. prefix += ' '
  61. print(prefix, end='error: ', file=sys.stderr)
  62. print(*args, file=sys.stderr)
  63. if output_file and output_fd:
  64. output_fd.close()
  65. # Do not try to remove e.g. -o /dev/null
  66. if not output_file.startswith("/dev"):
  67. try:
  68. os.remove(output_file)
  69. except PermissionError:
  70. pass
  71. exit(0 if testforerror else 1)
  72. # end error_with_file
  73. def error(lineno, *args):
  74. error_with_file(input_file, lineno, *args)
  75. # end error
  76. def output(*args):
  77. global output_fd
  78. for a in args:
  79. output_fd.write(a)
  80. def output_autogen():
  81. output('/* This file is autogenerated by scripts/decodetree.py. */\n\n')
  82. def str_indent(c):
  83. """Return a string with C spaces"""
  84. return ' ' * c
  85. def str_fields(fields):
  86. """Return a string uniquely identifying FIELDS"""
  87. r = ''
  88. for n in sorted(fields.keys()):
  89. r += '_' + n
  90. return r[1:]
  91. def whex(val):
  92. """Return a hex string for val padded for insnwidth"""
  93. global insnwidth
  94. return f'0x{val:0{insnwidth // 4}x}'
  95. def whexC(val):
  96. """Return a hex string for val padded for insnwidth,
  97. and with the proper suffix for a C constant."""
  98. suffix = ''
  99. if val >= 0x100000000:
  100. suffix = 'ull'
  101. elif val >= 0x80000000:
  102. suffix = 'u'
  103. return whex(val) + suffix
  104. def str_match_bits(bits, mask):
  105. """Return a string pretty-printing BITS/MASK"""
  106. global insnwidth
  107. i = 1 << (insnwidth - 1)
  108. space = 0x01010100
  109. r = ''
  110. while i != 0:
  111. if i & mask:
  112. if i & bits:
  113. r += '1'
  114. else:
  115. r += '0'
  116. else:
  117. r += '.'
  118. if i & space:
  119. r += ' '
  120. i >>= 1
  121. return r
  122. def is_pow2(x):
  123. """Return true iff X is equal to a power of 2."""
  124. return (x & (x - 1)) == 0
  125. def ctz(x):
  126. """Return the number of times 2 factors into X."""
  127. assert x != 0
  128. r = 0
  129. while ((x >> r) & 1) == 0:
  130. r += 1
  131. return r
  132. def is_contiguous(bits):
  133. if bits == 0:
  134. return -1
  135. shift = ctz(bits)
  136. if is_pow2((bits >> shift) + 1):
  137. return shift
  138. else:
  139. return -1
  140. def eq_fields_for_args(flds_a, arg):
  141. if len(flds_a) != len(arg.fields):
  142. return False
  143. # Only allow inference on default types
  144. for t in arg.types:
  145. if t != 'int':
  146. return False
  147. for k, a in flds_a.items():
  148. if k not in arg.fields:
  149. return False
  150. return True
  151. def eq_fields_for_fmts(flds_a, flds_b):
  152. if len(flds_a) != len(flds_b):
  153. return False
  154. for k, a in flds_a.items():
  155. if k not in flds_b:
  156. return False
  157. b = flds_b[k]
  158. if a.__class__ != b.__class__ or a != b:
  159. return False
  160. return True
  161. class Field:
  162. """Class representing a simple instruction field"""
  163. def __init__(self, sign, pos, len):
  164. self.sign = sign
  165. self.pos = pos
  166. self.len = len
  167. self.mask = ((1 << len) - 1) << pos
  168. def __str__(self):
  169. if self.sign:
  170. s = 's'
  171. else:
  172. s = ''
  173. return str(self.pos) + ':' + s + str(self.len)
  174. def str_extract(self, lvalue_formatter):
  175. global bitop_width
  176. s = 's' if self.sign else ''
  177. return f'{s}extract{bitop_width}(insn, {self.pos}, {self.len})'
  178. def __eq__(self, other):
  179. return self.sign == other.sign and self.mask == other.mask
  180. def __ne__(self, other):
  181. return not self.__eq__(other)
  182. # end Field
  183. class MultiField:
  184. """Class representing a compound instruction field"""
  185. def __init__(self, subs, mask):
  186. self.subs = subs
  187. self.sign = subs[0].sign
  188. self.mask = mask
  189. def __str__(self):
  190. return str(self.subs)
  191. def str_extract(self, lvalue_formatter):
  192. global bitop_width
  193. ret = '0'
  194. pos = 0
  195. for f in reversed(self.subs):
  196. ext = f.str_extract(lvalue_formatter)
  197. if pos == 0:
  198. ret = ext
  199. else:
  200. ret = f'deposit{bitop_width}({ret}, {pos}, {bitop_width - pos}, {ext})'
  201. pos += f.len
  202. return ret
  203. def __ne__(self, other):
  204. if len(self.subs) != len(other.subs):
  205. return True
  206. for a, b in zip(self.subs, other.subs):
  207. if a.__class__ != b.__class__ or a != b:
  208. return True
  209. return False
  210. def __eq__(self, other):
  211. return not self.__ne__(other)
  212. # end MultiField
  213. class ConstField:
  214. """Class representing an argument field with constant value"""
  215. def __init__(self, value):
  216. self.value = value
  217. self.mask = 0
  218. self.sign = value < 0
  219. def __str__(self):
  220. return str(self.value)
  221. def str_extract(self, lvalue_formatter):
  222. return str(self.value)
  223. def __cmp__(self, other):
  224. return self.value - other.value
  225. # end ConstField
  226. class FunctionField:
  227. """Class representing a field passed through a function"""
  228. def __init__(self, func, base):
  229. self.mask = base.mask
  230. self.sign = base.sign
  231. self.base = base
  232. self.func = func
  233. def __str__(self):
  234. return self.func + '(' + str(self.base) + ')'
  235. def str_extract(self, lvalue_formatter):
  236. return (self.func + '(ctx, '
  237. + self.base.str_extract(lvalue_formatter) + ')')
  238. def __eq__(self, other):
  239. return self.func == other.func and self.base == other.base
  240. def __ne__(self, other):
  241. return not self.__eq__(other)
  242. # end FunctionField
  243. class ParameterField:
  244. """Class representing a pseudo-field read from a function"""
  245. def __init__(self, func):
  246. self.mask = 0
  247. self.sign = 0
  248. self.func = func
  249. def __str__(self):
  250. return self.func
  251. def str_extract(self, lvalue_formatter):
  252. return self.func + '(ctx)'
  253. def __eq__(self, other):
  254. return self.func == other.func
  255. def __ne__(self, other):
  256. return not self.__eq__(other)
  257. # end ParameterField
  258. class Arguments:
  259. """Class representing the extracted fields of a format"""
  260. def __init__(self, nm, flds, types, extern):
  261. self.name = nm
  262. self.extern = extern
  263. self.fields = flds
  264. self.types = types
  265. def __str__(self):
  266. return self.name + ' ' + str(self.fields)
  267. def struct_name(self):
  268. return 'arg_' + self.name
  269. def output_def(self):
  270. if not self.extern:
  271. output('typedef struct {\n')
  272. for (n, t) in zip(self.fields, self.types):
  273. output(f' {t} {n};\n')
  274. output('} ', self.struct_name(), ';\n\n')
  275. # end Arguments
  276. class General:
  277. """Common code between instruction formats and instruction patterns"""
  278. def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w):
  279. self.name = name
  280. self.file = input_file
  281. self.lineno = lineno
  282. self.base = base
  283. self.fixedbits = fixb
  284. self.fixedmask = fixm
  285. self.undefmask = udfm
  286. self.fieldmask = fldm
  287. self.fields = flds
  288. self.width = w
  289. def __str__(self):
  290. return self.name + ' ' + str_match_bits(self.fixedbits, self.fixedmask)
  291. def str1(self, i):
  292. return str_indent(i) + self.__str__()
  293. def output_fields(self, indent, lvalue_formatter):
  294. for n, f in self.fields.items():
  295. output(indent, lvalue_formatter(n), ' = ',
  296. f.str_extract(lvalue_formatter), ';\n')
  297. # end General
  298. class Format(General):
  299. """Class representing an instruction format"""
  300. def extract_name(self):
  301. global decode_function
  302. return decode_function + '_extract_' + self.name
  303. def output_extract(self):
  304. output('static void ', self.extract_name(), '(DisasContext *ctx, ',
  305. self.base.struct_name(), ' *a, ', insntype, ' insn)\n{\n')
  306. self.output_fields(str_indent(4), lambda n: 'a->' + n)
  307. output('}\n\n')
  308. # end Format
  309. class Pattern(General):
  310. """Class representing an instruction pattern"""
  311. def output_decl(self):
  312. global translate_scope
  313. global translate_prefix
  314. output('typedef ', self.base.base.struct_name(),
  315. ' arg_', self.name, ';\n')
  316. output(translate_scope, 'bool ', translate_prefix, '_', self.name,
  317. '(DisasContext *ctx, arg_', self.name, ' *a);\n')
  318. def output_code(self, i, extracted, outerbits, outermask):
  319. global translate_prefix
  320. ind = str_indent(i)
  321. arg = self.base.base.name
  322. output(ind, '/* ', self.file, ':', str(self.lineno), ' */\n')
  323. if not extracted:
  324. output(ind, self.base.extract_name(),
  325. '(ctx, &u.f_', arg, ', insn);\n')
  326. self.output_fields(ind, lambda n: 'u.f_' + arg + '.' + n)
  327. output(ind, 'if (', translate_prefix, '_', self.name,
  328. '(ctx, &u.f_', arg, ')) return true;\n')
  329. # Normal patterns do not have children.
  330. def build_tree(self):
  331. return
  332. def prop_masks(self):
  333. return
  334. def prop_format(self):
  335. return
  336. def prop_width(self):
  337. return
  338. # end Pattern
  339. class MultiPattern(General):
  340. """Class representing a set of instruction patterns"""
  341. def __init__(self, lineno):
  342. self.file = input_file
  343. self.lineno = lineno
  344. self.pats = []
  345. self.base = None
  346. self.fixedbits = 0
  347. self.fixedmask = 0
  348. self.undefmask = 0
  349. self.width = None
  350. def __str__(self):
  351. r = 'group'
  352. if self.fixedbits is not None:
  353. r += ' ' + str_match_bits(self.fixedbits, self.fixedmask)
  354. return r
  355. def output_decl(self):
  356. for p in self.pats:
  357. p.output_decl()
  358. def prop_masks(self):
  359. global insnmask
  360. fixedmask = insnmask
  361. undefmask = insnmask
  362. # Collect fixedmask/undefmask for all of the children.
  363. for p in self.pats:
  364. p.prop_masks()
  365. fixedmask &= p.fixedmask
  366. undefmask &= p.undefmask
  367. # Widen fixedmask until all fixedbits match
  368. repeat = True
  369. fixedbits = 0
  370. while repeat and fixedmask != 0:
  371. fixedbits = None
  372. for p in self.pats:
  373. thisbits = p.fixedbits & fixedmask
  374. if fixedbits is None:
  375. fixedbits = thisbits
  376. elif fixedbits != thisbits:
  377. fixedmask &= ~(fixedbits ^ thisbits)
  378. break
  379. else:
  380. repeat = False
  381. self.fixedbits = fixedbits
  382. self.fixedmask = fixedmask
  383. self.undefmask = undefmask
  384. def build_tree(self):
  385. for p in self.pats:
  386. p.build_tree()
  387. def prop_format(self):
  388. for p in self.pats:
  389. p.prop_format()
  390. def prop_width(self):
  391. width = None
  392. for p in self.pats:
  393. p.prop_width()
  394. if width is None:
  395. width = p.width
  396. elif width != p.width:
  397. error_with_file(self.file, self.lineno,
  398. 'width mismatch in patterns within braces')
  399. self.width = width
  400. # end MultiPattern
  401. class IncMultiPattern(MultiPattern):
  402. """Class representing an overlapping set of instruction patterns"""
  403. def output_code(self, i, extracted, outerbits, outermask):
  404. global translate_prefix
  405. ind = str_indent(i)
  406. for p in self.pats:
  407. if outermask != p.fixedmask:
  408. innermask = p.fixedmask & ~outermask
  409. innerbits = p.fixedbits & ~outermask
  410. output(ind, f'if ((insn & {whexC(innermask)}) == {whexC(innerbits)}) {{\n')
  411. output(ind, f' /* {str_match_bits(p.fixedbits, p.fixedmask)} */\n')
  412. p.output_code(i + 4, extracted, p.fixedbits, p.fixedmask)
  413. output(ind, '}\n')
  414. else:
  415. p.output_code(i, extracted, p.fixedbits, p.fixedmask)
  416. def build_tree(self):
  417. if not self.pats:
  418. error_with_file(self.file, self.lineno, 'empty pattern group')
  419. super().build_tree()
  420. #end IncMultiPattern
  421. class Tree:
  422. """Class representing a node in a decode tree"""
  423. def __init__(self, fm, tm):
  424. self.fixedmask = fm
  425. self.thismask = tm
  426. self.subs = []
  427. self.base = None
  428. def str1(self, i):
  429. ind = str_indent(i)
  430. r = ind + whex(self.fixedmask)
  431. if self.format:
  432. r += ' ' + self.format.name
  433. r += ' [\n'
  434. for (b, s) in self.subs:
  435. r += ind + f' {whex(b)}:\n'
  436. r += s.str1(i + 4) + '\n'
  437. r += ind + ']'
  438. return r
  439. def __str__(self):
  440. return self.str1(0)
  441. def output_code(self, i, extracted, outerbits, outermask):
  442. ind = str_indent(i)
  443. # If we identified all nodes below have the same format,
  444. # extract the fields now.
  445. if not extracted and self.base:
  446. output(ind, self.base.extract_name(),
  447. '(ctx, &u.f_', self.base.base.name, ', insn);\n')
  448. extracted = True
  449. # Attempt to aid the compiler in producing compact switch statements.
  450. # If the bits in the mask are contiguous, extract them.
  451. sh = is_contiguous(self.thismask)
  452. if sh > 0:
  453. # Propagate SH down into the local functions.
  454. def str_switch(b, sh=sh):
  455. return f'(insn >> {sh}) & {b >> sh:#x}'
  456. def str_case(b, sh=sh):
  457. return hex(b >> sh)
  458. else:
  459. def str_switch(b):
  460. return f'insn & {whexC(b)}'
  461. def str_case(b):
  462. return whexC(b)
  463. output(ind, 'switch (', str_switch(self.thismask), ') {\n')
  464. for b, s in sorted(self.subs):
  465. assert (self.thismask & ~s.fixedmask) == 0
  466. innermask = outermask | self.thismask
  467. innerbits = outerbits | b
  468. output(ind, 'case ', str_case(b), ':\n')
  469. output(ind, ' /* ',
  470. str_match_bits(innerbits, innermask), ' */\n')
  471. s.output_code(i + 4, extracted, innerbits, innermask)
  472. output(ind, ' break;\n')
  473. output(ind, '}\n')
  474. # end Tree
  475. class ExcMultiPattern(MultiPattern):
  476. """Class representing a non-overlapping set of instruction patterns"""
  477. def output_code(self, i, extracted, outerbits, outermask):
  478. # Defer everything to our decomposed Tree node
  479. self.tree.output_code(i, extracted, outerbits, outermask)
  480. @staticmethod
  481. def __build_tree(pats, outerbits, outermask):
  482. # Find the intersection of all remaining fixedmask.
  483. innermask = ~outermask & insnmask
  484. for i in pats:
  485. innermask &= i.fixedmask
  486. if innermask == 0:
  487. # Edge condition: One pattern covers the entire insnmask
  488. if len(pats) == 1:
  489. t = Tree(outermask, innermask)
  490. t.subs.append((0, pats[0]))
  491. return t
  492. text = 'overlapping patterns:'
  493. for p in pats:
  494. text += '\n' + p.file + ':' + str(p.lineno) + ': ' + str(p)
  495. error_with_file(pats[0].file, pats[0].lineno, text)
  496. fullmask = outermask | innermask
  497. # Sort each element of pats into the bin selected by the mask.
  498. bins = {}
  499. for i in pats:
  500. fb = i.fixedbits & innermask
  501. if fb in bins:
  502. bins[fb].append(i)
  503. else:
  504. bins[fb] = [i]
  505. # We must recurse if any bin has more than one element or if
  506. # the single element in the bin has not been fully matched.
  507. t = Tree(fullmask, innermask)
  508. for b, l in bins.items():
  509. s = l[0]
  510. if len(l) > 1 or s.fixedmask & ~fullmask != 0:
  511. s = ExcMultiPattern.__build_tree(l, b | outerbits, fullmask)
  512. t.subs.append((b, s))
  513. return t
  514. def build_tree(self):
  515. super().build_tree()
  516. self.tree = self.__build_tree(self.pats, self.fixedbits,
  517. self.fixedmask)
  518. @staticmethod
  519. def __prop_format(tree):
  520. """Propagate Format objects into the decode tree"""
  521. # Depth first search.
  522. for (b, s) in tree.subs:
  523. if isinstance(s, Tree):
  524. ExcMultiPattern.__prop_format(s)
  525. # If all entries in SUBS have the same format, then
  526. # propagate that into the tree.
  527. f = None
  528. for (b, s) in tree.subs:
  529. if f is None:
  530. f = s.base
  531. if f is None:
  532. return
  533. if f is not s.base:
  534. return
  535. tree.base = f
  536. def prop_format(self):
  537. super().prop_format()
  538. self.__prop_format(self.tree)
  539. # end ExcMultiPattern
  540. def parse_field(lineno, name, toks):
  541. """Parse one instruction field from TOKS at LINENO"""
  542. global fields
  543. global insnwidth
  544. # A "simple" field will have only one entry;
  545. # a "multifield" will have several.
  546. subs = []
  547. width = 0
  548. func = None
  549. for t in toks:
  550. if re.match('^!function=', t):
  551. if func:
  552. error(lineno, 'duplicate function')
  553. func = t.split('=')
  554. func = func[1]
  555. continue
  556. if re.fullmatch('[0-9]+:s[0-9]+', t):
  557. # Signed field extract
  558. subtoks = t.split(':s')
  559. sign = True
  560. elif re.fullmatch('[0-9]+:[0-9]+', t):
  561. # Unsigned field extract
  562. subtoks = t.split(':')
  563. sign = False
  564. else:
  565. error(lineno, f'invalid field token "{t}"')
  566. po = int(subtoks[0])
  567. le = int(subtoks[1])
  568. if po + le > insnwidth:
  569. error(lineno, f'field {t} too large')
  570. f = Field(sign, po, le)
  571. subs.append(f)
  572. width += le
  573. if width > insnwidth:
  574. error(lineno, 'field too large')
  575. if len(subs) == 0:
  576. if func:
  577. f = ParameterField(func)
  578. else:
  579. error(lineno, 'field with no value')
  580. else:
  581. if len(subs) == 1:
  582. f = subs[0]
  583. else:
  584. mask = 0
  585. for s in subs:
  586. if mask & s.mask:
  587. error(lineno, 'field components overlap')
  588. mask |= s.mask
  589. f = MultiField(subs, mask)
  590. if func:
  591. f = FunctionField(func, f)
  592. if name in fields:
  593. error(lineno, 'duplicate field', name)
  594. fields[name] = f
  595. # end parse_field
  596. def parse_arguments(lineno, name, toks):
  597. """Parse one argument set from TOKS at LINENO"""
  598. global arguments
  599. global re_C_ident
  600. global anyextern
  601. flds = []
  602. types = []
  603. extern = False
  604. for n in toks:
  605. if re.fullmatch('!extern', n):
  606. extern = True
  607. anyextern = True
  608. continue
  609. if re.fullmatch(re_C_ident + ':' + re_C_ident, n):
  610. (n, t) = n.split(':')
  611. elif re.fullmatch(re_C_ident, n):
  612. t = 'int'
  613. else:
  614. error(lineno, f'invalid argument set token "{n}"')
  615. if n in flds:
  616. error(lineno, f'duplicate argument "{n}"')
  617. flds.append(n)
  618. types.append(t)
  619. if name in arguments:
  620. error(lineno, 'duplicate argument set', name)
  621. arguments[name] = Arguments(name, flds, types, extern)
  622. # end parse_arguments
  623. def lookup_field(lineno, name):
  624. global fields
  625. if name in fields:
  626. return fields[name]
  627. error(lineno, 'undefined field', name)
  628. def add_field(lineno, flds, new_name, f):
  629. if new_name in flds:
  630. error(lineno, 'duplicate field', new_name)
  631. flds[new_name] = f
  632. return flds
  633. def add_field_byname(lineno, flds, new_name, old_name):
  634. return add_field(lineno, flds, new_name, lookup_field(lineno, old_name))
  635. def infer_argument_set(flds):
  636. global arguments
  637. global decode_function
  638. for arg in arguments.values():
  639. if eq_fields_for_args(flds, arg):
  640. return arg
  641. name = decode_function + str(len(arguments))
  642. arg = Arguments(name, flds.keys(), ['int'] * len(flds), False)
  643. arguments[name] = arg
  644. return arg
  645. def infer_format(arg, fieldmask, flds, width):
  646. global arguments
  647. global formats
  648. global decode_function
  649. const_flds = {}
  650. var_flds = {}
  651. for n, c in flds.items():
  652. if c is ConstField:
  653. const_flds[n] = c
  654. else:
  655. var_flds[n] = c
  656. # Look for an existing format with the same argument set and fields
  657. for fmt in formats.values():
  658. if arg and fmt.base != arg:
  659. continue
  660. if fieldmask != fmt.fieldmask:
  661. continue
  662. if width != fmt.width:
  663. continue
  664. if not eq_fields_for_fmts(flds, fmt.fields):
  665. continue
  666. return (fmt, const_flds)
  667. name = decode_function + '_Fmt_' + str(len(formats))
  668. if not arg:
  669. arg = infer_argument_set(flds)
  670. fmt = Format(name, 0, arg, 0, 0, 0, fieldmask, var_flds, width)
  671. formats[name] = fmt
  672. return (fmt, const_flds)
  673. # end infer_format
  674. def parse_generic(lineno, parent_pat, name, toks):
  675. """Parse one instruction format from TOKS at LINENO"""
  676. global fields
  677. global arguments
  678. global formats
  679. global allpatterns
  680. global re_arg_ident
  681. global re_fld_ident
  682. global re_fmt_ident
  683. global re_C_ident
  684. global insnwidth
  685. global insnmask
  686. global variablewidth
  687. is_format = parent_pat is None
  688. fixedmask = 0
  689. fixedbits = 0
  690. undefmask = 0
  691. width = 0
  692. flds = {}
  693. arg = None
  694. fmt = None
  695. for t in toks:
  696. # '&Foo' gives a format an explicit argument set.
  697. if re.fullmatch(re_arg_ident, t):
  698. tt = t[1:]
  699. if arg:
  700. error(lineno, 'multiple argument sets')
  701. if tt in arguments:
  702. arg = arguments[tt]
  703. else:
  704. error(lineno, 'undefined argument set', t)
  705. continue
  706. # '@Foo' gives a pattern an explicit format.
  707. if re.fullmatch(re_fmt_ident, t):
  708. tt = t[1:]
  709. if fmt:
  710. error(lineno, 'multiple formats')
  711. if tt in formats:
  712. fmt = formats[tt]
  713. else:
  714. error(lineno, 'undefined format', t)
  715. continue
  716. # '%Foo' imports a field.
  717. if re.fullmatch(re_fld_ident, t):
  718. tt = t[1:]
  719. flds = add_field_byname(lineno, flds, tt, tt)
  720. continue
  721. # 'Foo=%Bar' imports a field with a different name.
  722. if re.fullmatch(re_C_ident + '=' + re_fld_ident, t):
  723. (fname, iname) = t.split('=%')
  724. flds = add_field_byname(lineno, flds, fname, iname)
  725. continue
  726. # 'Foo=number' sets an argument field to a constant value
  727. if re.fullmatch(re_C_ident + '=[+-]?[0-9]+', t):
  728. (fname, value) = t.split('=')
  729. value = int(value)
  730. flds = add_field(lineno, flds, fname, ConstField(value))
  731. continue
  732. # Pattern of 0s, 1s, dots and dashes indicate required zeros,
  733. # required ones, or dont-cares.
  734. if re.fullmatch('[01.-]+', t):
  735. shift = len(t)
  736. fms = t.replace('0', '1')
  737. fms = fms.replace('.', '0')
  738. fms = fms.replace('-', '0')
  739. fbs = t.replace('.', '0')
  740. fbs = fbs.replace('-', '0')
  741. ubm = t.replace('1', '0')
  742. ubm = ubm.replace('.', '0')
  743. ubm = ubm.replace('-', '1')
  744. fms = int(fms, 2)
  745. fbs = int(fbs, 2)
  746. ubm = int(ubm, 2)
  747. fixedbits = (fixedbits << shift) | fbs
  748. fixedmask = (fixedmask << shift) | fms
  749. undefmask = (undefmask << shift) | ubm
  750. # Otherwise, fieldname:fieldwidth
  751. elif re.fullmatch(re_C_ident + ':s?[0-9]+', t):
  752. (fname, flen) = t.split(':')
  753. sign = False
  754. if flen[0] == 's':
  755. sign = True
  756. flen = flen[1:]
  757. shift = int(flen, 10)
  758. if shift + width > insnwidth:
  759. error(lineno, f'field {fname} exceeds insnwidth')
  760. f = Field(sign, insnwidth - width - shift, shift)
  761. flds = add_field(lineno, flds, fname, f)
  762. fixedbits <<= shift
  763. fixedmask <<= shift
  764. undefmask <<= shift
  765. else:
  766. error(lineno, f'invalid token "{t}"')
  767. width += shift
  768. if variablewidth and width < insnwidth and width % 8 == 0:
  769. shift = insnwidth - width
  770. fixedbits <<= shift
  771. fixedmask <<= shift
  772. undefmask <<= shift
  773. undefmask |= (1 << shift) - 1
  774. # We should have filled in all of the bits of the instruction.
  775. elif not (is_format and width == 0) and width != insnwidth:
  776. error(lineno, f'definition has {width} bits')
  777. # Do not check for fields overlapping fields; one valid usage
  778. # is to be able to duplicate fields via import.
  779. fieldmask = 0
  780. for f in flds.values():
  781. fieldmask |= f.mask
  782. # Fix up what we've parsed to match either a format or a pattern.
  783. if is_format:
  784. # Formats cannot reference formats.
  785. if fmt:
  786. error(lineno, 'format referencing format')
  787. # If an argument set is given, then there should be no fields
  788. # without a place to store it.
  789. if arg:
  790. for f in flds.keys():
  791. if f not in arg.fields:
  792. error(lineno, f'field {f} not in argument set {arg.name}')
  793. else:
  794. arg = infer_argument_set(flds)
  795. if name in formats:
  796. error(lineno, 'duplicate format name', name)
  797. fmt = Format(name, lineno, arg, fixedbits, fixedmask,
  798. undefmask, fieldmask, flds, width)
  799. formats[name] = fmt
  800. else:
  801. # Patterns can reference a format ...
  802. if fmt:
  803. # ... but not an argument simultaneously
  804. if arg:
  805. error(lineno, 'pattern specifies both format and argument set')
  806. if fixedmask & fmt.fixedmask:
  807. error(lineno, 'pattern fixed bits overlap format fixed bits')
  808. if width != fmt.width:
  809. error(lineno, 'pattern uses format of different width')
  810. fieldmask |= fmt.fieldmask
  811. fixedbits |= fmt.fixedbits
  812. fixedmask |= fmt.fixedmask
  813. undefmask |= fmt.undefmask
  814. else:
  815. (fmt, flds) = infer_format(arg, fieldmask, flds, width)
  816. arg = fmt.base
  817. for f in flds.keys():
  818. if f not in arg.fields:
  819. error(lineno, f'field {f} not in argument set {arg.name}')
  820. if f in fmt.fields.keys():
  821. error(lineno, f'field {f} set by format and pattern')
  822. for f in arg.fields:
  823. if f not in flds.keys() and f not in fmt.fields.keys():
  824. error(lineno, f'field {f} not initialized')
  825. pat = Pattern(name, lineno, fmt, fixedbits, fixedmask,
  826. undefmask, fieldmask, flds, width)
  827. parent_pat.pats.append(pat)
  828. allpatterns.append(pat)
  829. # Validate the masks that we have assembled.
  830. if fieldmask & fixedmask:
  831. error(lineno, 'fieldmask overlaps fixedmask ',
  832. f'({whex(fieldmask)} & {whex(fixedmask)})')
  833. if fieldmask & undefmask:
  834. error(lineno, 'fieldmask overlaps undefmask ',
  835. f'({whex(fieldmask)} & {whex(undefmask)})')
  836. if fixedmask & undefmask:
  837. error(lineno, 'fixedmask overlaps undefmask ',
  838. f'({whex(fixedmask)} & {whex(undefmask)})')
  839. if not is_format:
  840. allbits = fieldmask | fixedmask | undefmask
  841. if allbits != insnmask:
  842. error(lineno, 'bits left unspecified ',
  843. f'({whex(allbits ^ insnmask)})')
  844. # end parse_general
  845. def parse_file(f, parent_pat):
  846. """Parse all of the patterns within a file"""
  847. global re_arg_ident
  848. global re_fld_ident
  849. global re_fmt_ident
  850. global re_pat_ident
  851. # Read all of the lines of the file. Concatenate lines
  852. # ending in backslash; discard empty lines and comments.
  853. toks = []
  854. lineno = 0
  855. nesting = 0
  856. nesting_pats = []
  857. for line in f:
  858. lineno += 1
  859. # Expand and strip spaces, to find indent.
  860. line = line.rstrip()
  861. line = line.expandtabs()
  862. len1 = len(line)
  863. line = line.lstrip()
  864. len2 = len(line)
  865. # Discard comments
  866. end = line.find('#')
  867. if end >= 0:
  868. line = line[:end]
  869. t = line.split()
  870. if len(toks) != 0:
  871. # Next line after continuation
  872. toks.extend(t)
  873. else:
  874. # Allow completely blank lines.
  875. if len1 == 0:
  876. continue
  877. indent = len1 - len2
  878. # Empty line due to comment.
  879. if len(t) == 0:
  880. # Indentation must be correct, even for comment lines.
  881. if indent != nesting:
  882. error(lineno, 'indentation ', indent, ' != ', nesting)
  883. continue
  884. start_lineno = lineno
  885. toks = t
  886. # Continuation?
  887. if toks[-1] == '\\':
  888. toks.pop()
  889. continue
  890. name = toks[0]
  891. del toks[0]
  892. # End nesting?
  893. if name == '}' or name == ']':
  894. if len(toks) != 0:
  895. error(start_lineno, 'extra tokens after close brace')
  896. # Make sure { } and [ ] nest properly.
  897. if (name == '}') != isinstance(parent_pat, IncMultiPattern):
  898. error(lineno, 'mismatched close brace')
  899. try:
  900. parent_pat = nesting_pats.pop()
  901. except:
  902. error(lineno, 'extra close brace')
  903. nesting -= 2
  904. if indent != nesting:
  905. error(lineno, 'indentation ', indent, ' != ', nesting)
  906. toks = []
  907. continue
  908. # Everything else should have current indentation.
  909. if indent != nesting:
  910. error(start_lineno, 'indentation ', indent, ' != ', nesting)
  911. # Start nesting?
  912. if name == '{' or name == '[':
  913. if len(toks) != 0:
  914. error(start_lineno, 'extra tokens after open brace')
  915. if name == '{':
  916. nested_pat = IncMultiPattern(start_lineno)
  917. else:
  918. nested_pat = ExcMultiPattern(start_lineno)
  919. parent_pat.pats.append(nested_pat)
  920. nesting_pats.append(parent_pat)
  921. parent_pat = nested_pat
  922. nesting += 2
  923. toks = []
  924. continue
  925. # Determine the type of object needing to be parsed.
  926. if re.fullmatch(re_fld_ident, name):
  927. parse_field(start_lineno, name[1:], toks)
  928. elif re.fullmatch(re_arg_ident, name):
  929. parse_arguments(start_lineno, name[1:], toks)
  930. elif re.fullmatch(re_fmt_ident, name):
  931. parse_generic(start_lineno, None, name[1:], toks)
  932. elif re.fullmatch(re_pat_ident, name):
  933. parse_generic(start_lineno, parent_pat, name, toks)
  934. else:
  935. error(lineno, f'invalid token "{name}"')
  936. toks = []
  937. if nesting != 0:
  938. error(lineno, 'missing close brace')
  939. # end parse_file
  940. class SizeTree:
  941. """Class representing a node in a size decode tree"""
  942. def __init__(self, m, w):
  943. self.mask = m
  944. self.subs = []
  945. self.base = None
  946. self.width = w
  947. def str1(self, i):
  948. ind = str_indent(i)
  949. r = ind + whex(self.mask) + ' [\n'
  950. for (b, s) in self.subs:
  951. r += ind + f' {whex(b)}:\n'
  952. r += s.str1(i + 4) + '\n'
  953. r += ind + ']'
  954. return r
  955. def __str__(self):
  956. return self.str1(0)
  957. def output_code(self, i, extracted, outerbits, outermask):
  958. ind = str_indent(i)
  959. # If we need to load more bytes to test, do so now.
  960. if extracted < self.width:
  961. output(ind, f'insn = {decode_function}_load_bytes',
  962. f'(ctx, insn, {extracted // 8}, {self.width // 8});\n')
  963. extracted = self.width
  964. # Attempt to aid the compiler in producing compact switch statements.
  965. # If the bits in the mask are contiguous, extract them.
  966. sh = is_contiguous(self.mask)
  967. if sh > 0:
  968. # Propagate SH down into the local functions.
  969. def str_switch(b, sh=sh):
  970. return f'(insn >> {sh}) & {b >> sh:#x}'
  971. def str_case(b, sh=sh):
  972. return hex(b >> sh)
  973. else:
  974. def str_switch(b):
  975. return f'insn & {whexC(b)}'
  976. def str_case(b):
  977. return whexC(b)
  978. output(ind, 'switch (', str_switch(self.mask), ') {\n')
  979. for b, s in sorted(self.subs):
  980. innermask = outermask | self.mask
  981. innerbits = outerbits | b
  982. output(ind, 'case ', str_case(b), ':\n')
  983. output(ind, ' /* ',
  984. str_match_bits(innerbits, innermask), ' */\n')
  985. s.output_code(i + 4, extracted, innerbits, innermask)
  986. output(ind, '}\n')
  987. output(ind, 'return insn;\n')
  988. # end SizeTree
  989. class SizeLeaf:
  990. """Class representing a leaf node in a size decode tree"""
  991. def __init__(self, m, w):
  992. self.mask = m
  993. self.width = w
  994. def str1(self, i):
  995. return str_indent(i) + whex(self.mask)
  996. def __str__(self):
  997. return self.str1(0)
  998. def output_code(self, i, extracted, outerbits, outermask):
  999. global decode_function
  1000. ind = str_indent(i)
  1001. # If we need to load more bytes, do so now.
  1002. if extracted < self.width:
  1003. output(ind, f'insn = {decode_function}_load_bytes',
  1004. f'(ctx, insn, {extracted // 8}, {self.width // 8});\n')
  1005. extracted = self.width
  1006. output(ind, 'return insn;\n')
  1007. # end SizeLeaf
  1008. def build_size_tree(pats, width, outerbits, outermask):
  1009. global insnwidth
  1010. # Collect the mask of bits that are fixed in this width
  1011. innermask = 0xff << (insnwidth - width)
  1012. innermask &= ~outermask
  1013. minwidth = None
  1014. onewidth = True
  1015. for i in pats:
  1016. innermask &= i.fixedmask
  1017. if minwidth is None:
  1018. minwidth = i.width
  1019. elif minwidth != i.width:
  1020. onewidth = False;
  1021. if minwidth < i.width:
  1022. minwidth = i.width
  1023. if onewidth:
  1024. return SizeLeaf(innermask, minwidth)
  1025. if innermask == 0:
  1026. if width < minwidth:
  1027. return build_size_tree(pats, width + 8, outerbits, outermask)
  1028. pnames = []
  1029. for p in pats:
  1030. pnames.append(p.name + ':' + p.file + ':' + str(p.lineno))
  1031. error_with_file(pats[0].file, pats[0].lineno,
  1032. f'overlapping patterns size {width}:', pnames)
  1033. bins = {}
  1034. for i in pats:
  1035. fb = i.fixedbits & innermask
  1036. if fb in bins:
  1037. bins[fb].append(i)
  1038. else:
  1039. bins[fb] = [i]
  1040. fullmask = outermask | innermask
  1041. lens = sorted(bins.keys())
  1042. if len(lens) == 1:
  1043. b = lens[0]
  1044. return build_size_tree(bins[b], width + 8, b | outerbits, fullmask)
  1045. r = SizeTree(innermask, width)
  1046. for b, l in bins.items():
  1047. s = build_size_tree(l, width, b | outerbits, fullmask)
  1048. r.subs.append((b, s))
  1049. return r
  1050. # end build_size_tree
  1051. def prop_size(tree):
  1052. """Propagate minimum widths up the decode size tree"""
  1053. if isinstance(tree, SizeTree):
  1054. min = None
  1055. for (b, s) in tree.subs:
  1056. width = prop_size(s)
  1057. if min is None or min > width:
  1058. min = width
  1059. assert min >= tree.width
  1060. tree.width = min
  1061. else:
  1062. min = tree.width
  1063. return min
  1064. # end prop_size
  1065. def main():
  1066. global arguments
  1067. global formats
  1068. global allpatterns
  1069. global translate_scope
  1070. global translate_prefix
  1071. global output_fd
  1072. global output_file
  1073. global input_file
  1074. global insnwidth
  1075. global insntype
  1076. global insnmask
  1077. global decode_function
  1078. global bitop_width
  1079. global variablewidth
  1080. global anyextern
  1081. global testforerror
  1082. decode_scope = 'static '
  1083. long_opts = ['decode=', 'translate=', 'output=', 'insnwidth=',
  1084. 'static-decode=', 'varinsnwidth=', 'test-for-error']
  1085. try:
  1086. (opts, args) = getopt.gnu_getopt(sys.argv[1:], 'o:vw:', long_opts)
  1087. except getopt.GetoptError as err:
  1088. error(0, err)
  1089. for o, a in opts:
  1090. if o in ('-o', '--output'):
  1091. output_file = a
  1092. elif o == '--decode':
  1093. decode_function = a
  1094. decode_scope = ''
  1095. elif o == '--static-decode':
  1096. decode_function = a
  1097. elif o == '--translate':
  1098. translate_prefix = a
  1099. translate_scope = ''
  1100. elif o in ('-w', '--insnwidth', '--varinsnwidth'):
  1101. if o == '--varinsnwidth':
  1102. variablewidth = True
  1103. insnwidth = int(a)
  1104. if insnwidth == 16:
  1105. insntype = 'uint16_t'
  1106. insnmask = 0xffff
  1107. elif insnwidth == 64:
  1108. insntype = 'uint64_t'
  1109. insnmask = 0xffffffffffffffff
  1110. bitop_width = 64
  1111. elif insnwidth != 32:
  1112. error(0, 'cannot handle insns of width', insnwidth)
  1113. elif o == '--test-for-error':
  1114. testforerror = True
  1115. else:
  1116. assert False, 'unhandled option'
  1117. if len(args) < 1:
  1118. error(0, 'missing input file')
  1119. toppat = ExcMultiPattern(0)
  1120. for filename in args:
  1121. input_file = filename
  1122. f = open(filename, 'rt', encoding='utf-8')
  1123. parse_file(f, toppat)
  1124. f.close()
  1125. # We do not want to compute masks for toppat, because those masks
  1126. # are used as a starting point for build_tree. For toppat, we must
  1127. # insist that decode begins from naught.
  1128. for i in toppat.pats:
  1129. i.prop_masks()
  1130. toppat.build_tree()
  1131. toppat.prop_format()
  1132. if variablewidth:
  1133. for i in toppat.pats:
  1134. i.prop_width()
  1135. stree = build_size_tree(toppat.pats, 8, 0, 0)
  1136. prop_size(stree)
  1137. if output_file:
  1138. output_fd = open(output_file, 'wt', encoding='utf-8')
  1139. else:
  1140. output_fd = io.TextIOWrapper(sys.stdout.buffer,
  1141. encoding=sys.stdout.encoding,
  1142. errors="ignore")
  1143. output_autogen()
  1144. for n in sorted(arguments.keys()):
  1145. f = arguments[n]
  1146. f.output_def()
  1147. # A single translate function can be invoked for different patterns.
  1148. # Make sure that the argument sets are the same, and declare the
  1149. # function only once.
  1150. #
  1151. # If we're sharing formats, we're likely also sharing trans_* functions,
  1152. # but we can't tell which ones. Prevent issues from the compiler by
  1153. # suppressing redundant declaration warnings.
  1154. if anyextern:
  1155. output("#pragma GCC diagnostic push\n",
  1156. "#pragma GCC diagnostic ignored \"-Wredundant-decls\"\n",
  1157. "#ifdef __clang__\n"
  1158. "# pragma GCC diagnostic ignored \"-Wtypedef-redefinition\"\n",
  1159. "#endif\n\n")
  1160. out_pats = {}
  1161. for i in allpatterns:
  1162. if i.name in out_pats:
  1163. p = out_pats[i.name]
  1164. if i.base.base != p.base.base:
  1165. error(0, i.name, ' has conflicting argument sets')
  1166. else:
  1167. i.output_decl()
  1168. out_pats[i.name] = i
  1169. output('\n')
  1170. if anyextern:
  1171. output("#pragma GCC diagnostic pop\n\n")
  1172. for n in sorted(formats.keys()):
  1173. f = formats[n]
  1174. f.output_extract()
  1175. output(decode_scope, 'bool ', decode_function,
  1176. '(DisasContext *ctx, ', insntype, ' insn)\n{\n')
  1177. i4 = str_indent(4)
  1178. if len(allpatterns) != 0:
  1179. output(i4, 'union {\n')
  1180. for n in sorted(arguments.keys()):
  1181. f = arguments[n]
  1182. output(i4, i4, f.struct_name(), ' f_', f.name, ';\n')
  1183. output(i4, '} u;\n\n')
  1184. toppat.output_code(4, False, 0, 0)
  1185. output(i4, 'return false;\n')
  1186. output('}\n')
  1187. if variablewidth:
  1188. output('\n', decode_scope, insntype, ' ', decode_function,
  1189. '_load(DisasContext *ctx)\n{\n',
  1190. ' ', insntype, ' insn = 0;\n\n')
  1191. stree.output_code(4, 0, 0, 0)
  1192. output('}\n')
  1193. if output_file:
  1194. output_fd.close()
  1195. exit(1 if testforerror else 0)
  1196. # end main
  1197. if __name__ == '__main__':
  1198. main()