decodetree.py 41 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435
  1. #!/usr/bin/env python3
  2. # Copyright (c) 2018 Linaro Limited
  3. #
  4. # This library is free software; you can redistribute it and/or
  5. # modify it under the terms of the GNU Lesser General Public
  6. # License as published by the Free Software Foundation; either
  7. # version 2.1 of the License, or (at your option) any later version.
  8. #
  9. # This library is distributed in the hope that it will be useful,
  10. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. # Lesser General Public License for more details.
  13. #
  14. # You should have received a copy of the GNU Lesser General Public
  15. # License along with this library; if not, see <http://www.gnu.org/licenses/>.
  16. #
  17. #
  18. # Generate a decoding tree from a specification file.
  19. # See the syntax and semantics in docs/devel/decodetree.rst.
  20. #
  21. import io
  22. import os
  23. import re
  24. import sys
  25. import getopt
  26. insnwidth = 32
  27. bitop_width = 32
  28. insnmask = 0xffffffff
  29. variablewidth = False
  30. fields = {}
  31. arguments = {}
  32. formats = {}
  33. allpatterns = []
  34. anyextern = False
  35. testforerror = False
  36. translate_prefix = 'trans'
  37. translate_scope = 'static '
  38. input_file = ''
  39. output_file = None
  40. output_fd = None
  41. insntype = 'uint32_t'
  42. decode_function = 'decode'
  43. # An identifier for C.
  44. re_C_ident = '[a-zA-Z][a-zA-Z0-9_]*'
  45. # Identifiers for Arguments, Fields, Formats and Patterns.
  46. re_arg_ident = '&[a-zA-Z0-9_]*'
  47. re_fld_ident = '%[a-zA-Z0-9_]*'
  48. re_fmt_ident = '@[a-zA-Z0-9_]*'
  49. re_pat_ident = '[a-zA-Z0-9_]*'
  50. def error_with_file(file, lineno, *args):
  51. """Print an error message from file:line and args and exit."""
  52. global output_file
  53. global output_fd
  54. prefix = ''
  55. if file:
  56. prefix += f'{file}:'
  57. if lineno:
  58. prefix += f'{lineno}:'
  59. if prefix:
  60. prefix += ' '
  61. print(prefix, end='error: ', file=sys.stderr)
  62. print(*args, file=sys.stderr)
  63. if output_file and output_fd:
  64. output_fd.close()
  65. os.remove(output_file)
  66. exit(0 if testforerror else 1)
  67. # end error_with_file
  68. def error(lineno, *args):
  69. error_with_file(input_file, lineno, *args)
  70. # end error
  71. def output(*args):
  72. global output_fd
  73. for a in args:
  74. output_fd.write(a)
  75. def output_autogen():
  76. output('/* This file is autogenerated by scripts/decodetree.py. */\n\n')
  77. def str_indent(c):
  78. """Return a string with C spaces"""
  79. return ' ' * c
  80. def str_fields(fields):
  81. """Return a string uniquely identifying FIELDS"""
  82. r = ''
  83. for n in sorted(fields.keys()):
  84. r += '_' + n
  85. return r[1:]
  86. def whex(val):
  87. """Return a hex string for val padded for insnwidth"""
  88. global insnwidth
  89. return f'0x{val:0{insnwidth // 4}x}'
  90. def whexC(val):
  91. """Return a hex string for val padded for insnwidth,
  92. and with the proper suffix for a C constant."""
  93. suffix = ''
  94. if val >= 0x100000000:
  95. suffix = 'ull'
  96. elif val >= 0x80000000:
  97. suffix = 'u'
  98. return whex(val) + suffix
  99. def str_match_bits(bits, mask):
  100. """Return a string pretty-printing BITS/MASK"""
  101. global insnwidth
  102. i = 1 << (insnwidth - 1)
  103. space = 0x01010100
  104. r = ''
  105. while i != 0:
  106. if i & mask:
  107. if i & bits:
  108. r += '1'
  109. else:
  110. r += '0'
  111. else:
  112. r += '.'
  113. if i & space:
  114. r += ' '
  115. i >>= 1
  116. return r
  117. def is_pow2(x):
  118. """Return true iff X is equal to a power of 2."""
  119. return (x & (x - 1)) == 0
  120. def ctz(x):
  121. """Return the number of times 2 factors into X."""
  122. assert x != 0
  123. r = 0
  124. while ((x >> r) & 1) == 0:
  125. r += 1
  126. return r
  127. def is_contiguous(bits):
  128. if bits == 0:
  129. return -1
  130. shift = ctz(bits)
  131. if is_pow2((bits >> shift) + 1):
  132. return shift
  133. else:
  134. return -1
  135. def eq_fields_for_args(flds_a, arg):
  136. if len(flds_a) != len(arg.fields):
  137. return False
  138. # Only allow inference on default types
  139. for t in arg.types:
  140. if t != 'int':
  141. return False
  142. for k, a in flds_a.items():
  143. if k not in arg.fields:
  144. return False
  145. return True
  146. def eq_fields_for_fmts(flds_a, flds_b):
  147. if len(flds_a) != len(flds_b):
  148. return False
  149. for k, a in flds_a.items():
  150. if k not in flds_b:
  151. return False
  152. b = flds_b[k]
  153. if a.__class__ != b.__class__ or a != b:
  154. return False
  155. return True
  156. class Field:
  157. """Class representing a simple instruction field"""
  158. def __init__(self, sign, pos, len):
  159. self.sign = sign
  160. self.pos = pos
  161. self.len = len
  162. self.mask = ((1 << len) - 1) << pos
  163. def __str__(self):
  164. if self.sign:
  165. s = 's'
  166. else:
  167. s = ''
  168. return str(self.pos) + ':' + s + str(self.len)
  169. def str_extract(self):
  170. global bitop_width
  171. s = 's' if self.sign else ''
  172. return f'{s}extract{bitop_width}(insn, {self.pos}, {self.len})'
  173. def __eq__(self, other):
  174. return self.sign == other.sign and self.mask == other.mask
  175. def __ne__(self, other):
  176. return not self.__eq__(other)
  177. # end Field
  178. class MultiField:
  179. """Class representing a compound instruction field"""
  180. def __init__(self, subs, mask):
  181. self.subs = subs
  182. self.sign = subs[0].sign
  183. self.mask = mask
  184. def __str__(self):
  185. return str(self.subs)
  186. def str_extract(self):
  187. global bitop_width
  188. ret = '0'
  189. pos = 0
  190. for f in reversed(self.subs):
  191. ext = f.str_extract()
  192. if pos == 0:
  193. ret = ext
  194. else:
  195. ret = f'deposit{bitop_width}({ret}, {pos}, {bitop_width - pos}, {ext})'
  196. pos += f.len
  197. return ret
  198. def __ne__(self, other):
  199. if len(self.subs) != len(other.subs):
  200. return True
  201. for a, b in zip(self.subs, other.subs):
  202. if a.__class__ != b.__class__ or a != b:
  203. return True
  204. return False
  205. def __eq__(self, other):
  206. return not self.__ne__(other)
  207. # end MultiField
  208. class ConstField:
  209. """Class representing an argument field with constant value"""
  210. def __init__(self, value):
  211. self.value = value
  212. self.mask = 0
  213. self.sign = value < 0
  214. def __str__(self):
  215. return str(self.value)
  216. def str_extract(self):
  217. return str(self.value)
  218. def __cmp__(self, other):
  219. return self.value - other.value
  220. # end ConstField
  221. class FunctionField:
  222. """Class representing a field passed through a function"""
  223. def __init__(self, func, base):
  224. self.mask = base.mask
  225. self.sign = base.sign
  226. self.base = base
  227. self.func = func
  228. def __str__(self):
  229. return self.func + '(' + str(self.base) + ')'
  230. def str_extract(self):
  231. return self.func + '(ctx, ' + self.base.str_extract() + ')'
  232. def __eq__(self, other):
  233. return self.func == other.func and self.base == other.base
  234. def __ne__(self, other):
  235. return not self.__eq__(other)
  236. # end FunctionField
  237. class ParameterField:
  238. """Class representing a pseudo-field read from a function"""
  239. def __init__(self, func):
  240. self.mask = 0
  241. self.sign = 0
  242. self.func = func
  243. def __str__(self):
  244. return self.func
  245. def str_extract(self):
  246. return self.func + '(ctx)'
  247. def __eq__(self, other):
  248. return self.func == other.func
  249. def __ne__(self, other):
  250. return not self.__eq__(other)
  251. # end ParameterField
  252. class Arguments:
  253. """Class representing the extracted fields of a format"""
  254. def __init__(self, nm, flds, types, extern):
  255. self.name = nm
  256. self.extern = extern
  257. self.fields = flds
  258. self.types = types
  259. def __str__(self):
  260. return self.name + ' ' + str(self.fields)
  261. def struct_name(self):
  262. return 'arg_' + self.name
  263. def output_def(self):
  264. if not self.extern:
  265. output('typedef struct {\n')
  266. for (n, t) in zip(self.fields, self.types):
  267. output(f' {t} {n};\n')
  268. output('} ', self.struct_name(), ';\n\n')
  269. # end Arguments
  270. class General:
  271. """Common code between instruction formats and instruction patterns"""
  272. def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w):
  273. self.name = name
  274. self.file = input_file
  275. self.lineno = lineno
  276. self.base = base
  277. self.fixedbits = fixb
  278. self.fixedmask = fixm
  279. self.undefmask = udfm
  280. self.fieldmask = fldm
  281. self.fields = flds
  282. self.width = w
  283. def __str__(self):
  284. return self.name + ' ' + str_match_bits(self.fixedbits, self.fixedmask)
  285. def str1(self, i):
  286. return str_indent(i) + self.__str__()
  287. # end General
  288. class Format(General):
  289. """Class representing an instruction format"""
  290. def extract_name(self):
  291. global decode_function
  292. return decode_function + '_extract_' + self.name
  293. def output_extract(self):
  294. output('static void ', self.extract_name(), '(DisasContext *ctx, ',
  295. self.base.struct_name(), ' *a, ', insntype, ' insn)\n{\n')
  296. for n, f in self.fields.items():
  297. output(' a->', n, ' = ', f.str_extract(), ';\n')
  298. output('}\n\n')
  299. # end Format
  300. class Pattern(General):
  301. """Class representing an instruction pattern"""
  302. def output_decl(self):
  303. global translate_scope
  304. global translate_prefix
  305. output('typedef ', self.base.base.struct_name(),
  306. ' arg_', self.name, ';\n')
  307. output(translate_scope, 'bool ', translate_prefix, '_', self.name,
  308. '(DisasContext *ctx, arg_', self.name, ' *a);\n')
  309. def output_code(self, i, extracted, outerbits, outermask):
  310. global translate_prefix
  311. ind = str_indent(i)
  312. arg = self.base.base.name
  313. output(ind, '/* ', self.file, ':', str(self.lineno), ' */\n')
  314. if not extracted:
  315. output(ind, self.base.extract_name(),
  316. '(ctx, &u.f_', arg, ', insn);\n')
  317. for n, f in self.fields.items():
  318. output(ind, 'u.f_', arg, '.', n, ' = ', f.str_extract(), ';\n')
  319. output(ind, 'if (', translate_prefix, '_', self.name,
  320. '(ctx, &u.f_', arg, ')) return true;\n')
  321. # Normal patterns do not have children.
  322. def build_tree(self):
  323. return
  324. def prop_masks(self):
  325. return
  326. def prop_format(self):
  327. return
  328. def prop_width(self):
  329. return
  330. # end Pattern
  331. class MultiPattern(General):
  332. """Class representing a set of instruction patterns"""
  333. def __init__(self, lineno):
  334. self.file = input_file
  335. self.lineno = lineno
  336. self.pats = []
  337. self.base = None
  338. self.fixedbits = 0
  339. self.fixedmask = 0
  340. self.undefmask = 0
  341. self.width = None
  342. def __str__(self):
  343. r = 'group'
  344. if self.fixedbits is not None:
  345. r += ' ' + str_match_bits(self.fixedbits, self.fixedmask)
  346. return r
  347. def output_decl(self):
  348. for p in self.pats:
  349. p.output_decl()
  350. def prop_masks(self):
  351. global insnmask
  352. fixedmask = insnmask
  353. undefmask = insnmask
  354. # Collect fixedmask/undefmask for all of the children.
  355. for p in self.pats:
  356. p.prop_masks()
  357. fixedmask &= p.fixedmask
  358. undefmask &= p.undefmask
  359. # Widen fixedmask until all fixedbits match
  360. repeat = True
  361. fixedbits = 0
  362. while repeat and fixedmask != 0:
  363. fixedbits = None
  364. for p in self.pats:
  365. thisbits = p.fixedbits & fixedmask
  366. if fixedbits is None:
  367. fixedbits = thisbits
  368. elif fixedbits != thisbits:
  369. fixedmask &= ~(fixedbits ^ thisbits)
  370. break
  371. else:
  372. repeat = False
  373. self.fixedbits = fixedbits
  374. self.fixedmask = fixedmask
  375. self.undefmask = undefmask
  376. def build_tree(self):
  377. for p in self.pats:
  378. p.build_tree()
  379. def prop_format(self):
  380. for p in self.pats:
  381. p.prop_format()
  382. def prop_width(self):
  383. width = None
  384. for p in self.pats:
  385. p.prop_width()
  386. if width is None:
  387. width = p.width
  388. elif width != p.width:
  389. error_with_file(self.file, self.lineno,
  390. 'width mismatch in patterns within braces')
  391. self.width = width
  392. # end MultiPattern
  393. class IncMultiPattern(MultiPattern):
  394. """Class representing an overlapping set of instruction patterns"""
  395. def output_code(self, i, extracted, outerbits, outermask):
  396. global translate_prefix
  397. ind = str_indent(i)
  398. for p in self.pats:
  399. if outermask != p.fixedmask:
  400. innermask = p.fixedmask & ~outermask
  401. innerbits = p.fixedbits & ~outermask
  402. output(ind, f'if ((insn & {whexC(innermask)}) == {whexC(innerbits)}) {{\n')
  403. output(ind, f' /* {str_match_bits(p.fixedbits, p.fixedmask)} */\n')
  404. p.output_code(i + 4, extracted, p.fixedbits, p.fixedmask)
  405. output(ind, '}\n')
  406. else:
  407. p.output_code(i, extracted, p.fixedbits, p.fixedmask)
  408. def build_tree(self):
  409. if not self.pats:
  410. error_with_file(self.file, self.lineno, 'empty pattern group')
  411. super().build_tree()
  412. #end IncMultiPattern
  413. class Tree:
  414. """Class representing a node in a decode tree"""
  415. def __init__(self, fm, tm):
  416. self.fixedmask = fm
  417. self.thismask = tm
  418. self.subs = []
  419. self.base = None
  420. def str1(self, i):
  421. ind = str_indent(i)
  422. r = ind + whex(self.fixedmask)
  423. if self.format:
  424. r += ' ' + self.format.name
  425. r += ' [\n'
  426. for (b, s) in self.subs:
  427. r += ind + f' {whex(b)}:\n'
  428. r += s.str1(i + 4) + '\n'
  429. r += ind + ']'
  430. return r
  431. def __str__(self):
  432. return self.str1(0)
  433. def output_code(self, i, extracted, outerbits, outermask):
  434. ind = str_indent(i)
  435. # If we identified all nodes below have the same format,
  436. # extract the fields now.
  437. if not extracted and self.base:
  438. output(ind, self.base.extract_name(),
  439. '(ctx, &u.f_', self.base.base.name, ', insn);\n')
  440. extracted = True
  441. # Attempt to aid the compiler in producing compact switch statements.
  442. # If the bits in the mask are contiguous, extract them.
  443. sh = is_contiguous(self.thismask)
  444. if sh > 0:
  445. # Propagate SH down into the local functions.
  446. def str_switch(b, sh=sh):
  447. return f'(insn >> {sh}) & {b >> sh:#x}'
  448. def str_case(b, sh=sh):
  449. return hex(b >> sh)
  450. else:
  451. def str_switch(b):
  452. return f'insn & {whexC(b)}'
  453. def str_case(b):
  454. return whexC(b)
  455. output(ind, 'switch (', str_switch(self.thismask), ') {\n')
  456. for b, s in sorted(self.subs):
  457. assert (self.thismask & ~s.fixedmask) == 0
  458. innermask = outermask | self.thismask
  459. innerbits = outerbits | b
  460. output(ind, 'case ', str_case(b), ':\n')
  461. output(ind, ' /* ',
  462. str_match_bits(innerbits, innermask), ' */\n')
  463. s.output_code(i + 4, extracted, innerbits, innermask)
  464. output(ind, ' break;\n')
  465. output(ind, '}\n')
  466. # end Tree
  467. class ExcMultiPattern(MultiPattern):
  468. """Class representing a non-overlapping set of instruction patterns"""
  469. def output_code(self, i, extracted, outerbits, outermask):
  470. # Defer everything to our decomposed Tree node
  471. self.tree.output_code(i, extracted, outerbits, outermask)
  472. @staticmethod
  473. def __build_tree(pats, outerbits, outermask):
  474. # Find the intersection of all remaining fixedmask.
  475. innermask = ~outermask & insnmask
  476. for i in pats:
  477. innermask &= i.fixedmask
  478. if innermask == 0:
  479. # Edge condition: One pattern covers the entire insnmask
  480. if len(pats) == 1:
  481. t = Tree(outermask, innermask)
  482. t.subs.append((0, pats[0]))
  483. return t
  484. text = 'overlapping patterns:'
  485. for p in pats:
  486. text += '\n' + p.file + ':' + str(p.lineno) + ': ' + str(p)
  487. error_with_file(pats[0].file, pats[0].lineno, text)
  488. fullmask = outermask | innermask
  489. # Sort each element of pats into the bin selected by the mask.
  490. bins = {}
  491. for i in pats:
  492. fb = i.fixedbits & innermask
  493. if fb in bins:
  494. bins[fb].append(i)
  495. else:
  496. bins[fb] = [i]
  497. # We must recurse if any bin has more than one element or if
  498. # the single element in the bin has not been fully matched.
  499. t = Tree(fullmask, innermask)
  500. for b, l in bins.items():
  501. s = l[0]
  502. if len(l) > 1 or s.fixedmask & ~fullmask != 0:
  503. s = ExcMultiPattern.__build_tree(l, b | outerbits, fullmask)
  504. t.subs.append((b, s))
  505. return t
  506. def build_tree(self):
  507. super().build_tree()
  508. self.tree = self.__build_tree(self.pats, self.fixedbits,
  509. self.fixedmask)
  510. @staticmethod
  511. def __prop_format(tree):
  512. """Propagate Format objects into the decode tree"""
  513. # Depth first search.
  514. for (b, s) in tree.subs:
  515. if isinstance(s, Tree):
  516. ExcMultiPattern.__prop_format(s)
  517. # If all entries in SUBS have the same format, then
  518. # propagate that into the tree.
  519. f = None
  520. for (b, s) in tree.subs:
  521. if f is None:
  522. f = s.base
  523. if f is None:
  524. return
  525. if f is not s.base:
  526. return
  527. tree.base = f
  528. def prop_format(self):
  529. super().prop_format()
  530. self.__prop_format(self.tree)
  531. # end ExcMultiPattern
  532. def parse_field(lineno, name, toks):
  533. """Parse one instruction field from TOKS at LINENO"""
  534. global fields
  535. global insnwidth
  536. # A "simple" field will have only one entry;
  537. # a "multifield" will have several.
  538. subs = []
  539. width = 0
  540. func = None
  541. for t in toks:
  542. if re.match('^!function=', t):
  543. if func:
  544. error(lineno, 'duplicate function')
  545. func = t.split('=')
  546. func = func[1]
  547. continue
  548. if re.fullmatch('[0-9]+:s[0-9]+', t):
  549. # Signed field extract
  550. subtoks = t.split(':s')
  551. sign = True
  552. elif re.fullmatch('[0-9]+:[0-9]+', t):
  553. # Unsigned field extract
  554. subtoks = t.split(':')
  555. sign = False
  556. else:
  557. error(lineno, f'invalid field token "{t}"')
  558. po = int(subtoks[0])
  559. le = int(subtoks[1])
  560. if po + le > insnwidth:
  561. error(lineno, f'field {t} too large')
  562. f = Field(sign, po, le)
  563. subs.append(f)
  564. width += le
  565. if width > insnwidth:
  566. error(lineno, 'field too large')
  567. if len(subs) == 0:
  568. if func:
  569. f = ParameterField(func)
  570. else:
  571. error(lineno, 'field with no value')
  572. else:
  573. if len(subs) == 1:
  574. f = subs[0]
  575. else:
  576. mask = 0
  577. for s in subs:
  578. if mask & s.mask:
  579. error(lineno, 'field components overlap')
  580. mask |= s.mask
  581. f = MultiField(subs, mask)
  582. if func:
  583. f = FunctionField(func, f)
  584. if name in fields:
  585. error(lineno, 'duplicate field', name)
  586. fields[name] = f
  587. # end parse_field
  588. def parse_arguments(lineno, name, toks):
  589. """Parse one argument set from TOKS at LINENO"""
  590. global arguments
  591. global re_C_ident
  592. global anyextern
  593. flds = []
  594. types = []
  595. extern = False
  596. for n in toks:
  597. if re.fullmatch('!extern', n):
  598. extern = True
  599. anyextern = True
  600. continue
  601. if re.fullmatch(re_C_ident + ':' + re_C_ident, n):
  602. (n, t) = n.split(':')
  603. elif re.fullmatch(re_C_ident, n):
  604. t = 'int'
  605. else:
  606. error(lineno, f'invalid argument set token "{n}"')
  607. if n in flds:
  608. error(lineno, f'duplicate argument "{n}"')
  609. flds.append(n)
  610. types.append(t)
  611. if name in arguments:
  612. error(lineno, 'duplicate argument set', name)
  613. arguments[name] = Arguments(name, flds, types, extern)
  614. # end parse_arguments
  615. def lookup_field(lineno, name):
  616. global fields
  617. if name in fields:
  618. return fields[name]
  619. error(lineno, 'undefined field', name)
  620. def add_field(lineno, flds, new_name, f):
  621. if new_name in flds:
  622. error(lineno, 'duplicate field', new_name)
  623. flds[new_name] = f
  624. return flds
  625. def add_field_byname(lineno, flds, new_name, old_name):
  626. return add_field(lineno, flds, new_name, lookup_field(lineno, old_name))
  627. def infer_argument_set(flds):
  628. global arguments
  629. global decode_function
  630. for arg in arguments.values():
  631. if eq_fields_for_args(flds, arg):
  632. return arg
  633. name = decode_function + str(len(arguments))
  634. arg = Arguments(name, flds.keys(), ['int'] * len(flds), False)
  635. arguments[name] = arg
  636. return arg
  637. def infer_format(arg, fieldmask, flds, width):
  638. global arguments
  639. global formats
  640. global decode_function
  641. const_flds = {}
  642. var_flds = {}
  643. for n, c in flds.items():
  644. if c is ConstField:
  645. const_flds[n] = c
  646. else:
  647. var_flds[n] = c
  648. # Look for an existing format with the same argument set and fields
  649. for fmt in formats.values():
  650. if arg and fmt.base != arg:
  651. continue
  652. if fieldmask != fmt.fieldmask:
  653. continue
  654. if width != fmt.width:
  655. continue
  656. if not eq_fields_for_fmts(flds, fmt.fields):
  657. continue
  658. return (fmt, const_flds)
  659. name = decode_function + '_Fmt_' + str(len(formats))
  660. if not arg:
  661. arg = infer_argument_set(flds)
  662. fmt = Format(name, 0, arg, 0, 0, 0, fieldmask, var_flds, width)
  663. formats[name] = fmt
  664. return (fmt, const_flds)
  665. # end infer_format
  666. def parse_generic(lineno, parent_pat, name, toks):
  667. """Parse one instruction format from TOKS at LINENO"""
  668. global fields
  669. global arguments
  670. global formats
  671. global allpatterns
  672. global re_arg_ident
  673. global re_fld_ident
  674. global re_fmt_ident
  675. global re_C_ident
  676. global insnwidth
  677. global insnmask
  678. global variablewidth
  679. is_format = parent_pat is None
  680. fixedmask = 0
  681. fixedbits = 0
  682. undefmask = 0
  683. width = 0
  684. flds = {}
  685. arg = None
  686. fmt = None
  687. for t in toks:
  688. # '&Foo' gives a format an explicit argument set.
  689. if re.fullmatch(re_arg_ident, t):
  690. tt = t[1:]
  691. if arg:
  692. error(lineno, 'multiple argument sets')
  693. if tt in arguments:
  694. arg = arguments[tt]
  695. else:
  696. error(lineno, 'undefined argument set', t)
  697. continue
  698. # '@Foo' gives a pattern an explicit format.
  699. if re.fullmatch(re_fmt_ident, t):
  700. tt = t[1:]
  701. if fmt:
  702. error(lineno, 'multiple formats')
  703. if tt in formats:
  704. fmt = formats[tt]
  705. else:
  706. error(lineno, 'undefined format', t)
  707. continue
  708. # '%Foo' imports a field.
  709. if re.fullmatch(re_fld_ident, t):
  710. tt = t[1:]
  711. flds = add_field_byname(lineno, flds, tt, tt)
  712. continue
  713. # 'Foo=%Bar' imports a field with a different name.
  714. if re.fullmatch(re_C_ident + '=' + re_fld_ident, t):
  715. (fname, iname) = t.split('=%')
  716. flds = add_field_byname(lineno, flds, fname, iname)
  717. continue
  718. # 'Foo=number' sets an argument field to a constant value
  719. if re.fullmatch(re_C_ident + '=[+-]?[0-9]+', t):
  720. (fname, value) = t.split('=')
  721. value = int(value)
  722. flds = add_field(lineno, flds, fname, ConstField(value))
  723. continue
  724. # Pattern of 0s, 1s, dots and dashes indicate required zeros,
  725. # required ones, or dont-cares.
  726. if re.fullmatch('[01.-]+', t):
  727. shift = len(t)
  728. fms = t.replace('0', '1')
  729. fms = fms.replace('.', '0')
  730. fms = fms.replace('-', '0')
  731. fbs = t.replace('.', '0')
  732. fbs = fbs.replace('-', '0')
  733. ubm = t.replace('1', '0')
  734. ubm = ubm.replace('.', '0')
  735. ubm = ubm.replace('-', '1')
  736. fms = int(fms, 2)
  737. fbs = int(fbs, 2)
  738. ubm = int(ubm, 2)
  739. fixedbits = (fixedbits << shift) | fbs
  740. fixedmask = (fixedmask << shift) | fms
  741. undefmask = (undefmask << shift) | ubm
  742. # Otherwise, fieldname:fieldwidth
  743. elif re.fullmatch(re_C_ident + ':s?[0-9]+', t):
  744. (fname, flen) = t.split(':')
  745. sign = False
  746. if flen[0] == 's':
  747. sign = True
  748. flen = flen[1:]
  749. shift = int(flen, 10)
  750. if shift + width > insnwidth:
  751. error(lineno, f'field {fname} exceeds insnwidth')
  752. f = Field(sign, insnwidth - width - shift, shift)
  753. flds = add_field(lineno, flds, fname, f)
  754. fixedbits <<= shift
  755. fixedmask <<= shift
  756. undefmask <<= shift
  757. else:
  758. error(lineno, f'invalid token "{t}"')
  759. width += shift
  760. if variablewidth and width < insnwidth and width % 8 == 0:
  761. shift = insnwidth - width
  762. fixedbits <<= shift
  763. fixedmask <<= shift
  764. undefmask <<= shift
  765. undefmask |= (1 << shift) - 1
  766. # We should have filled in all of the bits of the instruction.
  767. elif not (is_format and width == 0) and width != insnwidth:
  768. error(lineno, f'definition has {width} bits')
  769. # Do not check for fields overlapping fields; one valid usage
  770. # is to be able to duplicate fields via import.
  771. fieldmask = 0
  772. for f in flds.values():
  773. fieldmask |= f.mask
  774. # Fix up what we've parsed to match either a format or a pattern.
  775. if is_format:
  776. # Formats cannot reference formats.
  777. if fmt:
  778. error(lineno, 'format referencing format')
  779. # If an argument set is given, then there should be no fields
  780. # without a place to store it.
  781. if arg:
  782. for f in flds.keys():
  783. if f not in arg.fields:
  784. error(lineno, f'field {f} not in argument set {arg.name}')
  785. else:
  786. arg = infer_argument_set(flds)
  787. if name in formats:
  788. error(lineno, 'duplicate format name', name)
  789. fmt = Format(name, lineno, arg, fixedbits, fixedmask,
  790. undefmask, fieldmask, flds, width)
  791. formats[name] = fmt
  792. else:
  793. # Patterns can reference a format ...
  794. if fmt:
  795. # ... but not an argument simultaneously
  796. if arg:
  797. error(lineno, 'pattern specifies both format and argument set')
  798. if fixedmask & fmt.fixedmask:
  799. error(lineno, 'pattern fixed bits overlap format fixed bits')
  800. if width != fmt.width:
  801. error(lineno, 'pattern uses format of different width')
  802. fieldmask |= fmt.fieldmask
  803. fixedbits |= fmt.fixedbits
  804. fixedmask |= fmt.fixedmask
  805. undefmask |= fmt.undefmask
  806. else:
  807. (fmt, flds) = infer_format(arg, fieldmask, flds, width)
  808. arg = fmt.base
  809. for f in flds.keys():
  810. if f not in arg.fields:
  811. error(lineno, f'field {f} not in argument set {arg.name}')
  812. if f in fmt.fields.keys():
  813. error(lineno, f'field {f} set by format and pattern')
  814. for f in arg.fields:
  815. if f not in flds.keys() and f not in fmt.fields.keys():
  816. error(lineno, f'field {f} not initialized')
  817. pat = Pattern(name, lineno, fmt, fixedbits, fixedmask,
  818. undefmask, fieldmask, flds, width)
  819. parent_pat.pats.append(pat)
  820. allpatterns.append(pat)
  821. # Validate the masks that we have assembled.
  822. if fieldmask & fixedmask:
  823. error(lineno, 'fieldmask overlaps fixedmask ',
  824. f'({whex(fieldmask)} & {whex(fixedmask)})')
  825. if fieldmask & undefmask:
  826. error(lineno, 'fieldmask overlaps undefmask ',
  827. f'({whex(fieldmask)} & {whex(undefmask)})')
  828. if fixedmask & undefmask:
  829. error(lineno, 'fixedmask overlaps undefmask ',
  830. f'({whex(fixedmask)} & {whex(undefmask)})')
  831. if not is_format:
  832. allbits = fieldmask | fixedmask | undefmask
  833. if allbits != insnmask:
  834. error(lineno, 'bits left unspecified ',
  835. f'({whex(allbits ^ insnmask)})')
  836. # end parse_general
  837. def parse_file(f, parent_pat):
  838. """Parse all of the patterns within a file"""
  839. global re_arg_ident
  840. global re_fld_ident
  841. global re_fmt_ident
  842. global re_pat_ident
  843. # Read all of the lines of the file. Concatenate lines
  844. # ending in backslash; discard empty lines and comments.
  845. toks = []
  846. lineno = 0
  847. nesting = 0
  848. nesting_pats = []
  849. for line in f:
  850. lineno += 1
  851. # Expand and strip spaces, to find indent.
  852. line = line.rstrip()
  853. line = line.expandtabs()
  854. len1 = len(line)
  855. line = line.lstrip()
  856. len2 = len(line)
  857. # Discard comments
  858. end = line.find('#')
  859. if end >= 0:
  860. line = line[:end]
  861. t = line.split()
  862. if len(toks) != 0:
  863. # Next line after continuation
  864. toks.extend(t)
  865. else:
  866. # Allow completely blank lines.
  867. if len1 == 0:
  868. continue
  869. indent = len1 - len2
  870. # Empty line due to comment.
  871. if len(t) == 0:
  872. # Indentation must be correct, even for comment lines.
  873. if indent != nesting:
  874. error(lineno, 'indentation ', indent, ' != ', nesting)
  875. continue
  876. start_lineno = lineno
  877. toks = t
  878. # Continuation?
  879. if toks[-1] == '\\':
  880. toks.pop()
  881. continue
  882. name = toks[0]
  883. del toks[0]
  884. # End nesting?
  885. if name == '}' or name == ']':
  886. if len(toks) != 0:
  887. error(start_lineno, 'extra tokens after close brace')
  888. # Make sure { } and [ ] nest properly.
  889. if (name == '}') != isinstance(parent_pat, IncMultiPattern):
  890. error(lineno, 'mismatched close brace')
  891. try:
  892. parent_pat = nesting_pats.pop()
  893. except:
  894. error(lineno, 'extra close brace')
  895. nesting -= 2
  896. if indent != nesting:
  897. error(lineno, 'indentation ', indent, ' != ', nesting)
  898. toks = []
  899. continue
  900. # Everything else should have current indentation.
  901. if indent != nesting:
  902. error(start_lineno, 'indentation ', indent, ' != ', nesting)
  903. # Start nesting?
  904. if name == '{' or name == '[':
  905. if len(toks) != 0:
  906. error(start_lineno, 'extra tokens after open brace')
  907. if name == '{':
  908. nested_pat = IncMultiPattern(start_lineno)
  909. else:
  910. nested_pat = ExcMultiPattern(start_lineno)
  911. parent_pat.pats.append(nested_pat)
  912. nesting_pats.append(parent_pat)
  913. parent_pat = nested_pat
  914. nesting += 2
  915. toks = []
  916. continue
  917. # Determine the type of object needing to be parsed.
  918. if re.fullmatch(re_fld_ident, name):
  919. parse_field(start_lineno, name[1:], toks)
  920. elif re.fullmatch(re_arg_ident, name):
  921. parse_arguments(start_lineno, name[1:], toks)
  922. elif re.fullmatch(re_fmt_ident, name):
  923. parse_generic(start_lineno, None, name[1:], toks)
  924. elif re.fullmatch(re_pat_ident, name):
  925. parse_generic(start_lineno, parent_pat, name, toks)
  926. else:
  927. error(lineno, f'invalid token "{name}"')
  928. toks = []
  929. if nesting != 0:
  930. error(lineno, 'missing close brace')
  931. # end parse_file
  932. class SizeTree:
  933. """Class representing a node in a size decode tree"""
  934. def __init__(self, m, w):
  935. self.mask = m
  936. self.subs = []
  937. self.base = None
  938. self.width = w
  939. def str1(self, i):
  940. ind = str_indent(i)
  941. r = ind + whex(self.mask) + ' [\n'
  942. for (b, s) in self.subs:
  943. r += ind + f' {whex(b)}:\n'
  944. r += s.str1(i + 4) + '\n'
  945. r += ind + ']'
  946. return r
  947. def __str__(self):
  948. return self.str1(0)
  949. def output_code(self, i, extracted, outerbits, outermask):
  950. ind = str_indent(i)
  951. # If we need to load more bytes to test, do so now.
  952. if extracted < self.width:
  953. output(ind, f'insn = {decode_function}_load_bytes',
  954. f'(ctx, insn, {extracted // 8}, {self.width // 8});\n')
  955. extracted = self.width
  956. # Attempt to aid the compiler in producing compact switch statements.
  957. # If the bits in the mask are contiguous, extract them.
  958. sh = is_contiguous(self.mask)
  959. if sh > 0:
  960. # Propagate SH down into the local functions.
  961. def str_switch(b, sh=sh):
  962. return f'(insn >> {sh}) & {b >> sh:#x}'
  963. def str_case(b, sh=sh):
  964. return hex(b >> sh)
  965. else:
  966. def str_switch(b):
  967. return f'insn & {whexC(b)}'
  968. def str_case(b):
  969. return whexC(b)
  970. output(ind, 'switch (', str_switch(self.mask), ') {\n')
  971. for b, s in sorted(self.subs):
  972. innermask = outermask | self.mask
  973. innerbits = outerbits | b
  974. output(ind, 'case ', str_case(b), ':\n')
  975. output(ind, ' /* ',
  976. str_match_bits(innerbits, innermask), ' */\n')
  977. s.output_code(i + 4, extracted, innerbits, innermask)
  978. output(ind, '}\n')
  979. output(ind, 'return insn;\n')
  980. # end SizeTree
  981. class SizeLeaf:
  982. """Class representing a leaf node in a size decode tree"""
  983. def __init__(self, m, w):
  984. self.mask = m
  985. self.width = w
  986. def str1(self, i):
  987. return str_indent(i) + whex(self.mask)
  988. def __str__(self):
  989. return self.str1(0)
  990. def output_code(self, i, extracted, outerbits, outermask):
  991. global decode_function
  992. ind = str_indent(i)
  993. # If we need to load more bytes, do so now.
  994. if extracted < self.width:
  995. output(ind, f'insn = {decode_function}_load_bytes',
  996. f'(ctx, insn, {extracted // 8}, {self.width // 8});\n')
  997. extracted = self.width
  998. output(ind, 'return insn;\n')
  999. # end SizeLeaf
  1000. def build_size_tree(pats, width, outerbits, outermask):
  1001. global insnwidth
  1002. # Collect the mask of bits that are fixed in this width
  1003. innermask = 0xff << (insnwidth - width)
  1004. innermask &= ~outermask
  1005. minwidth = None
  1006. onewidth = True
  1007. for i in pats:
  1008. innermask &= i.fixedmask
  1009. if minwidth is None:
  1010. minwidth = i.width
  1011. elif minwidth != i.width:
  1012. onewidth = False;
  1013. if minwidth < i.width:
  1014. minwidth = i.width
  1015. if onewidth:
  1016. return SizeLeaf(innermask, minwidth)
  1017. if innermask == 0:
  1018. if width < minwidth:
  1019. return build_size_tree(pats, width + 8, outerbits, outermask)
  1020. pnames = []
  1021. for p in pats:
  1022. pnames.append(p.name + ':' + p.file + ':' + str(p.lineno))
  1023. error_with_file(pats[0].file, pats[0].lineno,
  1024. f'overlapping patterns size {width}:', pnames)
  1025. bins = {}
  1026. for i in pats:
  1027. fb = i.fixedbits & innermask
  1028. if fb in bins:
  1029. bins[fb].append(i)
  1030. else:
  1031. bins[fb] = [i]
  1032. fullmask = outermask | innermask
  1033. lens = sorted(bins.keys())
  1034. if len(lens) == 1:
  1035. b = lens[0]
  1036. return build_size_tree(bins[b], width + 8, b | outerbits, fullmask)
  1037. r = SizeTree(innermask, width)
  1038. for b, l in bins.items():
  1039. s = build_size_tree(l, width, b | outerbits, fullmask)
  1040. r.subs.append((b, s))
  1041. return r
  1042. # end build_size_tree
  1043. def prop_size(tree):
  1044. """Propagate minimum widths up the decode size tree"""
  1045. if isinstance(tree, SizeTree):
  1046. min = None
  1047. for (b, s) in tree.subs:
  1048. width = prop_size(s)
  1049. if min is None or min > width:
  1050. min = width
  1051. assert min >= tree.width
  1052. tree.width = min
  1053. else:
  1054. min = tree.width
  1055. return min
  1056. # end prop_size
  1057. def main():
  1058. global arguments
  1059. global formats
  1060. global allpatterns
  1061. global translate_scope
  1062. global translate_prefix
  1063. global output_fd
  1064. global output_file
  1065. global input_file
  1066. global insnwidth
  1067. global insntype
  1068. global insnmask
  1069. global decode_function
  1070. global bitop_width
  1071. global variablewidth
  1072. global anyextern
  1073. global testforerror
  1074. decode_scope = 'static '
  1075. long_opts = ['decode=', 'translate=', 'output=', 'insnwidth=',
  1076. 'static-decode=', 'varinsnwidth=', 'test-for-error']
  1077. try:
  1078. (opts, args) = getopt.gnu_getopt(sys.argv[1:], 'o:vw:', long_opts)
  1079. except getopt.GetoptError as err:
  1080. error(0, err)
  1081. for o, a in opts:
  1082. if o in ('-o', '--output'):
  1083. output_file = a
  1084. elif o == '--decode':
  1085. decode_function = a
  1086. decode_scope = ''
  1087. elif o == '--static-decode':
  1088. decode_function = a
  1089. elif o == '--translate':
  1090. translate_prefix = a
  1091. translate_scope = ''
  1092. elif o in ('-w', '--insnwidth', '--varinsnwidth'):
  1093. if o == '--varinsnwidth':
  1094. variablewidth = True
  1095. insnwidth = int(a)
  1096. if insnwidth == 16:
  1097. insntype = 'uint16_t'
  1098. insnmask = 0xffff
  1099. elif insnwidth == 64:
  1100. insntype = 'uint64_t'
  1101. insnmask = 0xffffffffffffffff
  1102. bitop_width = 64
  1103. elif insnwidth != 32:
  1104. error(0, 'cannot handle insns of width', insnwidth)
  1105. elif o == '--test-for-error':
  1106. testforerror = True
  1107. else:
  1108. assert False, 'unhandled option'
  1109. if len(args) < 1:
  1110. error(0, 'missing input file')
  1111. toppat = ExcMultiPattern(0)
  1112. for filename in args:
  1113. input_file = filename
  1114. f = open(filename, 'rt', encoding='utf-8')
  1115. parse_file(f, toppat)
  1116. f.close()
  1117. # We do not want to compute masks for toppat, because those masks
  1118. # are used as a starting point for build_tree. For toppat, we must
  1119. # insist that decode begins from naught.
  1120. for i in toppat.pats:
  1121. i.prop_masks()
  1122. toppat.build_tree()
  1123. toppat.prop_format()
  1124. if variablewidth:
  1125. for i in toppat.pats:
  1126. i.prop_width()
  1127. stree = build_size_tree(toppat.pats, 8, 0, 0)
  1128. prop_size(stree)
  1129. if output_file:
  1130. output_fd = open(output_file, 'wt', encoding='utf-8')
  1131. else:
  1132. output_fd = io.TextIOWrapper(sys.stdout.buffer,
  1133. encoding=sys.stdout.encoding,
  1134. errors="ignore")
  1135. output_autogen()
  1136. for n in sorted(arguments.keys()):
  1137. f = arguments[n]
  1138. f.output_def()
  1139. # A single translate function can be invoked for different patterns.
  1140. # Make sure that the argument sets are the same, and declare the
  1141. # function only once.
  1142. #
  1143. # If we're sharing formats, we're likely also sharing trans_* functions,
  1144. # but we can't tell which ones. Prevent issues from the compiler by
  1145. # suppressing redundant declaration warnings.
  1146. if anyextern:
  1147. output("#pragma GCC diagnostic push\n",
  1148. "#pragma GCC diagnostic ignored \"-Wredundant-decls\"\n",
  1149. "#ifdef __clang__\n"
  1150. "# pragma GCC diagnostic ignored \"-Wtypedef-redefinition\"\n",
  1151. "#endif\n\n")
  1152. out_pats = {}
  1153. for i in allpatterns:
  1154. if i.name in out_pats:
  1155. p = out_pats[i.name]
  1156. if i.base.base != p.base.base:
  1157. error(0, i.name, ' has conflicting argument sets')
  1158. else:
  1159. i.output_decl()
  1160. out_pats[i.name] = i
  1161. output('\n')
  1162. if anyextern:
  1163. output("#pragma GCC diagnostic pop\n\n")
  1164. for n in sorted(formats.keys()):
  1165. f = formats[n]
  1166. f.output_extract()
  1167. output(decode_scope, 'bool ', decode_function,
  1168. '(DisasContext *ctx, ', insntype, ' insn)\n{\n')
  1169. i4 = str_indent(4)
  1170. if len(allpatterns) != 0:
  1171. output(i4, 'union {\n')
  1172. for n in sorted(arguments.keys()):
  1173. f = arguments[n]
  1174. output(i4, i4, f.struct_name(), ' f_', f.name, ';\n')
  1175. output(i4, '} u;\n\n')
  1176. toppat.output_code(4, False, 0, 0)
  1177. output(i4, 'return false;\n')
  1178. output('}\n')
  1179. if variablewidth:
  1180. output('\n', decode_scope, insntype, ' ', decode_function,
  1181. '_load(DisasContext *ctx)\n{\n',
  1182. ' ', insntype, ' insn = 0;\n\n')
  1183. stree.output_code(4, 0, 0, 0)
  1184. output('}\n')
  1185. if output_file:
  1186. output_fd.close()
  1187. exit(1 if testforerror else 0)
  1188. # end main
  1189. if __name__ == '__main__':
  1190. main()