tcg-target.c.inc 74 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250
  1. /*
  2. * Tiny Code Threaded Intepreter for QEMU
  3. *
  4. * Copyright (c) 2021 Kate Temkin
  5. *
  6. * Permission is hereby granted, free of charge, to any person obtaining a copy
  7. * of this software and associated documentation files (the "Software"), to deal
  8. * in the Software without restriction, including without limitation the rights
  9. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10. * copies of the Software, and to permit persons to whom the Software is
  11. * furnished to do so, subject to the following conditions:
  12. *
  13. * The above copyright notice and this permission notice shall be included in
  14. * all copies or substantial portions of the Software.
  15. *
  16. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22. * THE SOFTWARE.
  23. */
  24. // Rich disassembly is nice in theory, but it's -slow-.
  25. //#define TCTI_GADGET_RICH_DISASSEMBLY
  26. #define TCTI_GADGET_IMMEDIATE_ARRAY_LEN 64
  27. // Specify the shape of the stack our runtime will use.
  28. #define TCG_TARGET_CALL_STACK_OFFSET 0
  29. #define TCG_TARGET_STACK_ALIGN 16
  30. #define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL
  31. #define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL
  32. #define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL
  33. #define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL
  34. #include "tcg/tcg-ldst.h"
  35. // Grab our gadget headers.
  36. #include "tcti_gadgets.h"
  37. /* Marker for missing code. */
  38. #define TODO() \
  39. do { \
  40. fprintf(stderr, "TODO %s:%u: %s()\n", \
  41. __FILE__, __LINE__, __func__); \
  42. g_assert_not_reached(); \
  43. } while (0)
  44. /* Enable TCTI assertions only when debugging TCG (and without NDEBUG defined).
  45. * Without assertions, the interpreter runs much faster. */
  46. #if defined(CONFIG_DEBUG_TCG)
  47. # define tcti_assert(cond) assert(cond)
  48. #else
  49. # define tcti_assert(cond) ((void)0)
  50. #endif
  51. /********************************
  52. * TCG Constraints Definitions *
  53. ********************************/
  54. static TCGConstraintSetIndex
  55. tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
  56. {
  57. switch (op) {
  58. case INDEX_op_ld8u_i32:
  59. case INDEX_op_ld8s_i32:
  60. case INDEX_op_ld16u_i32:
  61. case INDEX_op_ld16s_i32:
  62. case INDEX_op_ld_i32:
  63. case INDEX_op_ld8u_i64:
  64. case INDEX_op_ld8s_i64:
  65. case INDEX_op_ld16u_i64:
  66. case INDEX_op_ld16s_i64:
  67. case INDEX_op_ld32u_i64:
  68. case INDEX_op_ld32s_i64:
  69. case INDEX_op_ld_i64:
  70. case INDEX_op_not_i32:
  71. case INDEX_op_not_i64:
  72. case INDEX_op_neg_i32:
  73. case INDEX_op_neg_i64:
  74. case INDEX_op_ext8s_i32:
  75. case INDEX_op_ext8s_i64:
  76. case INDEX_op_ext16s_i32:
  77. case INDEX_op_ext16s_i64:
  78. case INDEX_op_ext8u_i32:
  79. case INDEX_op_ext8u_i64:
  80. case INDEX_op_ext16u_i32:
  81. case INDEX_op_ext16u_i64:
  82. case INDEX_op_ext32s_i64:
  83. case INDEX_op_ext32u_i64:
  84. case INDEX_op_ext_i32_i64:
  85. case INDEX_op_extu_i32_i64:
  86. case INDEX_op_bswap16_i32:
  87. case INDEX_op_bswap16_i64:
  88. case INDEX_op_bswap32_i32:
  89. case INDEX_op_bswap32_i64:
  90. case INDEX_op_bswap64_i64:
  91. case INDEX_op_extrl_i64_i32:
  92. case INDEX_op_extrh_i64_i32:
  93. return C_O1_I1(r, r);
  94. case INDEX_op_st8_i32:
  95. case INDEX_op_st16_i32:
  96. case INDEX_op_st_i32:
  97. case INDEX_op_st8_i64:
  98. case INDEX_op_st16_i64:
  99. case INDEX_op_st32_i64:
  100. case INDEX_op_st_i64:
  101. return C_O0_I2(r, r);
  102. case INDEX_op_div_i32:
  103. case INDEX_op_div_i64:
  104. case INDEX_op_divu_i32:
  105. case INDEX_op_divu_i64:
  106. case INDEX_op_rem_i32:
  107. case INDEX_op_rem_i64:
  108. case INDEX_op_remu_i32:
  109. case INDEX_op_remu_i64:
  110. case INDEX_op_add_i32:
  111. case INDEX_op_add_i64:
  112. case INDEX_op_sub_i32:
  113. case INDEX_op_sub_i64:
  114. case INDEX_op_mul_i32:
  115. case INDEX_op_mul_i64:
  116. case INDEX_op_and_i32:
  117. case INDEX_op_and_i64:
  118. case INDEX_op_andc_i32:
  119. case INDEX_op_andc_i64:
  120. case INDEX_op_eqv_i32:
  121. case INDEX_op_eqv_i64:
  122. case INDEX_op_nand_i32:
  123. case INDEX_op_nand_i64:
  124. case INDEX_op_nor_i32:
  125. case INDEX_op_nor_i64:
  126. case INDEX_op_or_i32:
  127. case INDEX_op_or_i64:
  128. case INDEX_op_orc_i32:
  129. case INDEX_op_orc_i64:
  130. case INDEX_op_xor_i32:
  131. case INDEX_op_xor_i64:
  132. case INDEX_op_shl_i32:
  133. case INDEX_op_shl_i64:
  134. case INDEX_op_shr_i32:
  135. case INDEX_op_shr_i64:
  136. case INDEX_op_sar_i32:
  137. case INDEX_op_sar_i64:
  138. case INDEX_op_rotl_i32:
  139. case INDEX_op_rotl_i64:
  140. case INDEX_op_rotr_i32:
  141. case INDEX_op_rotr_i64:
  142. case INDEX_op_setcond_i32:
  143. case INDEX_op_setcond_i64:
  144. case INDEX_op_clz_i32:
  145. case INDEX_op_clz_i64:
  146. case INDEX_op_ctz_i32:
  147. case INDEX_op_ctz_i64:
  148. return C_O1_I2(r, r, r);
  149. case INDEX_op_brcond_i32:
  150. case INDEX_op_brcond_i64:
  151. return C_O0_I2(r, r);
  152. case INDEX_op_qemu_ld_i32:
  153. case INDEX_op_qemu_ld_i64:
  154. return C_O1_I2(r, r, r);
  155. case INDEX_op_qemu_st_i32:
  156. case INDEX_op_qemu_st_i64:
  157. return C_O0_I3(r, r, r);
  158. //
  159. // Vector ops.
  160. //
  161. case INDEX_op_add_vec:
  162. case INDEX_op_sub_vec:
  163. case INDEX_op_mul_vec:
  164. case INDEX_op_xor_vec:
  165. case INDEX_op_ssadd_vec:
  166. case INDEX_op_sssub_vec:
  167. case INDEX_op_usadd_vec:
  168. case INDEX_op_ussub_vec:
  169. case INDEX_op_smax_vec:
  170. case INDEX_op_smin_vec:
  171. case INDEX_op_umax_vec:
  172. case INDEX_op_umin_vec:
  173. case INDEX_op_shlv_vec:
  174. case INDEX_op_shrv_vec:
  175. case INDEX_op_sarv_vec:
  176. case INDEX_op_aa64_sshl_vec:
  177. return C_O1_I2(w, w, w);
  178. case INDEX_op_not_vec:
  179. case INDEX_op_neg_vec:
  180. case INDEX_op_abs_vec:
  181. case INDEX_op_shli_vec:
  182. case INDEX_op_shri_vec:
  183. case INDEX_op_sari_vec:
  184. return C_O1_I1(w, w);
  185. case INDEX_op_ld_vec:
  186. case INDEX_op_dupm_vec:
  187. return C_O1_I1(w, r);
  188. case INDEX_op_st_vec:
  189. return C_O0_I2(w, r);
  190. case INDEX_op_dup_vec:
  191. return C_O1_I1(w, wr);
  192. case INDEX_op_or_vec:
  193. case INDEX_op_andc_vec:
  194. return C_O1_I2(w, w, w);
  195. case INDEX_op_and_vec:
  196. case INDEX_op_orc_vec:
  197. return C_O1_I2(w, w, w);
  198. case INDEX_op_cmp_vec:
  199. return C_O1_I2(w, w, w);
  200. case INDEX_op_bitsel_vec:
  201. return C_O1_I3(w, w, w, w);
  202. case INDEX_op_aa64_sli_vec:
  203. return C_O1_I2(w, 0, w);
  204. default:
  205. return C_NotImplemented;
  206. }
  207. }
  208. static const int tcg_target_reg_alloc_order[] = {
  209. // General purpose registers, in preference-of-allocation order.
  210. TCG_REG_R8,
  211. TCG_REG_R9,
  212. TCG_REG_R10,
  213. TCG_REG_R11,
  214. TCG_REG_R12,
  215. TCG_REG_R13,
  216. TCG_REG_R0,
  217. TCG_REG_R1,
  218. TCG_REG_R2,
  219. TCG_REG_R3,
  220. TCG_REG_R4,
  221. TCG_REG_R5,
  222. TCG_REG_R6,
  223. TCG_REG_R7,
  224. // Note: we do not allocate R14 or R15, as they're used for our
  225. // special-purpose values.
  226. // We'll use the high 16 vector register; avoiding the call-saved lower ones.
  227. TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
  228. TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
  229. TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
  230. TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
  231. };
  232. static const int tcg_target_call_iarg_regs[] = {
  233. TCG_REG_R0,
  234. TCG_REG_R1,
  235. TCG_REG_R2,
  236. TCG_REG_R3,
  237. TCG_REG_R4,
  238. TCG_REG_R5,
  239. };
  240. static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
  241. {
  242. tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
  243. tcg_debug_assert(slot >= 0 && slot < 128 / TCG_TARGET_REG_BITS);
  244. return TCG_REG_R0 + slot;
  245. }
  246. #ifdef CONFIG_DEBUG_TCG
  247. static const char *const tcg_target_reg_names[TCG_TARGET_GP_REGS] = {
  248. "r00",
  249. "r01",
  250. "r02",
  251. "r03",
  252. "r04",
  253. "r05",
  254. "r06",
  255. "r07",
  256. "r08",
  257. "r09",
  258. "r10",
  259. "r11",
  260. "r12",
  261. "r13",
  262. "r14",
  263. "r15",
  264. };
  265. #endif
  266. /*************************
  267. * TCG Emitter Helpers *
  268. *************************/
  269. /* Bitfield n...m (in 32 bit value). */
  270. #define BITS(n, m) (((0xffffffffU << (31 - n)) >> (31 - n + m)) << m)
  271. /**
  272. * Macro that defines a look-up tree for named QEMU_LD gadgets.
  273. */
  274. #define LD_MEMOP_LOOKUP(variable, arg, suffix) \
  275. switch (get_memop(arg) & MO_SSIZE) { \
  276. case MO_UB: variable = gadget_qemu_ld_ub_ ## suffix; break; \
  277. case MO_SB: variable = gadget_qemu_ld_sb_ ## suffix; break; \
  278. case MO_UW: variable = gadget_qemu_ld_leuw_ ## suffix; break; \
  279. case MO_SW: variable = gadget_qemu_ld_lesw_ ## suffix; break; \
  280. case MO_UL: variable = gadget_qemu_ld_leul_ ## suffix; break; \
  281. case MO_SL: variable = gadget_qemu_ld_lesl_ ## suffix; break; \
  282. case MO_UQ: variable = gadget_qemu_ld_leq_ ## suffix; break; \
  283. default: \
  284. g_assert_not_reached(); \
  285. }
  286. #define LD_MEMOP_HANDLER(variable, arg, suffix, a_bits, s_bits) \
  287. if (a_bits >= s_bits) { \
  288. LD_MEMOP_LOOKUP(variable, arg, aligned_ ## suffix ); \
  289. } else { \
  290. LD_MEMOP_LOOKUP(gadget, arg, unaligned_ ## suffix); \
  291. }
  292. /**
  293. * Macro that defines a look-up tree for named QEMU_ST gadgets.
  294. */
  295. #define ST_MEMOP_LOOKUP(variable, arg, suffix) \
  296. switch (get_memop(arg) & MO_SSIZE) { \
  297. case MO_UB: variable = gadget_qemu_st_ub_ ## suffix; break; \
  298. case MO_UW: variable = gadget_qemu_st_leuw_ ## suffix; break; \
  299. case MO_UL: variable = gadget_qemu_st_leul_ ## suffix; break; \
  300. case MO_UQ: variable = gadget_qemu_st_leq_ ## suffix; break; \
  301. default: \
  302. g_assert_not_reached(); \
  303. }
  304. #define ST_MEMOP_HANDLER(variable, arg, suffix, a_bits, s_bits) \
  305. if (a_bits >= s_bits) { \
  306. ST_MEMOP_LOOKUP(variable, arg, aligned_ ## suffix ); \
  307. } else { \
  308. ST_MEMOP_LOOKUP(gadget, arg, unaligned_ ## suffix); \
  309. }
  310. #define LOOKUP_SPECIAL_CASE_LDST_GADGET(arg, name, mode) \
  311. switch(tlb_mask_table_ofs(s, get_mmuidx(arg))) { \
  312. case -32: \
  313. gadget = (a_bits >= s_bits) ? \
  314. gadget_qemu_ ## name ## _aligned_ ## mode ## _off32_i64 : \
  315. gadget_qemu_ ## name ## _unaligned_ ## mode ## _off32_i64; \
  316. break; \
  317. case -48: \
  318. gadget = (a_bits >= s_bits) ? \
  319. gadget_qemu_ ## name ## _aligned_ ## mode ## _off48_i64 : \
  320. gadget_qemu_ ## name ## _unaligned_ ## mode ## _off48_i64; \
  321. break; \
  322. case -64: \
  323. gadget = (a_bits >= s_bits) ? \
  324. gadget_qemu_ ## name ## _aligned_ ## mode ## _off64_i64 : \
  325. gadget_qemu_ ## name ## _unaligned_ ## mode ## _off64_i64; \
  326. break; \
  327. case -96: \
  328. gadget = (a_bits >= s_bits) ? \
  329. gadget_qemu_ ## name ## _aligned_ ## mode ## _off96_i64 : \
  330. gadget_qemu_ ## name ## _unaligned_ ## mode ## _off96_i64; \
  331. break; \
  332. case -128: \
  333. gadget = (a_bits >= s_bits) ? \
  334. gadget_qemu_ ## name ## _aligned_ ## mode ## _off128_i64 : \
  335. gadget_qemu_ ## name ## _unaligned_ ## mode ## _off128_i64; \
  336. break;\
  337. default: \
  338. gadget = gadget_qemu_ ## name ## _slowpath_ ## mode ## _off0_i64; \
  339. break; \
  340. }
  341. static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
  342. intptr_t value, intptr_t addend)
  343. {
  344. /* tcg_out_reloc always uses the same type, addend. */
  345. tcg_debug_assert(type == sizeof(tcg_target_long));
  346. tcg_debug_assert(addend == 0);
  347. tcg_debug_assert(value != 0);
  348. if (TCG_TARGET_REG_BITS == 32) {
  349. tcg_patch32(code_ptr, value);
  350. } else {
  351. tcg_patch64(code_ptr, value);
  352. }
  353. return true;
  354. }
  355. #if defined(CONFIG_DEBUG_TCG_INTERPRETER)
  356. /* Show current bytecode. Used by tcg interpreter. */
  357. void tci_disas(uint8_t opc)
  358. {
  359. const TCGOpDef *def = &tcg_op_defs[opc];
  360. fprintf(stderr, "TCG %s %u, %u, %u\n",
  361. def->name, def->nb_oargs, def->nb_iargs, def->nb_cargs);
  362. }
  363. #endif
  364. /* Write value (native size). */
  365. static void tcg_out_immediate(TCGContext *s, tcg_target_ulong v)
  366. {
  367. if (TCG_TARGET_REG_BITS == 32) {
  368. //tcg_out32(s, v);
  369. tcg_out64(s, v);
  370. } else {
  371. tcg_out64(s, v);
  372. }
  373. }
  374. void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
  375. uintptr_t jmp_rx, uintptr_t jmp_rw)
  376. {
  377. /* Get a pointer to our immediate, which exists after a single pointer. */
  378. uintptr_t immediate_addr = jmp_rw;
  379. uintptr_t addr = tb->jmp_target_addr[n];
  380. /* Patch it to be match our target address. */
  381. qatomic_set((uint64_t *)immediate_addr, addr);
  382. }
  383. /**
  384. * TCTI Thunk Helpers
  385. */
  386. #ifdef CONFIG_SOFTMMU
  387. // TODO: relocate these prototypes?
  388. tcg_target_ulong helper_ldub_mmu_signed(CPUArchState *env, uint64_t addr, MemOpIdx oi, uintptr_t retaddr);
  389. tcg_target_ulong helper_lduw_mmu_signed(CPUArchState *env, uint64_t addr, MemOpIdx oi, uintptr_t retaddr);
  390. tcg_target_ulong helper_ldul_mmu_signed(CPUArchState *env, uint64_t addr, MemOpIdx oi, uintptr_t retaddr);
  391. tcg_target_ulong helper_ldub_mmu_signed(CPUArchState *env, uint64_t addr, MemOpIdx oi, uintptr_t retaddr)
  392. {
  393. return (int8_t)helper_ldub_mmu(env, addr, oi, retaddr);
  394. }
  395. tcg_target_ulong helper_lduw_mmu_signed(CPUArchState *env, uint64_t addr, MemOpIdx oi, uintptr_t retaddr)
  396. {
  397. return (int16_t)helper_lduw_mmu(env, addr, oi, retaddr);
  398. }
  399. tcg_target_ulong helper_ldul_mmu_signed(CPUArchState *env, uint64_t addr, MemOpIdx oi, uintptr_t retaddr)
  400. {
  401. return (int32_t)helper_ldul_mmu(env, addr, oi, retaddr);
  402. }
  403. #else
  404. #error TCTI currently only supports use of the soft MMU.
  405. #endif
  406. /**
  407. * TCTI Emmiter Helpers
  408. */
  409. /* Write gadget pointer. */
  410. static void tcg_out_gadget(TCGContext *s, const void *gadget)
  411. {
  412. tcg_out_immediate(s, (tcg_target_ulong)gadget);
  413. }
  414. /* Write gadget pointer, plus 64b immediate. */
  415. static void tcg_out_imm64_gadget(TCGContext *s, const void *gadget, tcg_target_ulong immediate)
  416. {
  417. tcg_out_gadget(s, gadget);
  418. tcg_out64(s, immediate);
  419. }
  420. /* Write gadget pointer (one register). */
  421. static void tcg_out_unary_gadget(TCGContext *s, const void *gadget_base[TCG_TARGET_GP_REGS], unsigned reg0)
  422. {
  423. tcg_out_gadget(s, gadget_base[reg0]);
  424. }
  425. /* Write gadget pointer (two registers). */
  426. static void tcg_out_binary_gadget(TCGContext *s, const void *gadget_base[TCG_TARGET_GP_REGS][TCG_TARGET_GP_REGS], unsigned reg0, unsigned reg1)
  427. {
  428. tcg_out_gadget(s, gadget_base[reg0][reg1]);
  429. }
  430. /* Write gadget pointer (three registers). */
  431. static void tcg_out_ternary_gadget(TCGContext *s, const void *gadget_base[TCG_TARGET_GP_REGS][TCG_TARGET_GP_REGS][TCG_TARGET_GP_REGS], unsigned reg0, unsigned reg1, unsigned reg2)
  432. {
  433. tcg_out_gadget(s, gadget_base[reg0][reg1][reg2]);
  434. }
  435. /* Write gadget pointer (three registers, last is immediate value). */
  436. static void tcg_out_ternary_immediate_gadget(TCGContext *s, const void *gadget_base[TCG_TARGET_GP_REGS][TCG_TARGET_GP_REGS][TCTI_GADGET_IMMEDIATE_ARRAY_LEN], unsigned reg0, unsigned reg1, unsigned reg2)
  437. {
  438. tcg_out_gadget(s, gadget_base[reg0][reg1][reg2]);
  439. }
  440. /***************************
  441. * TCG Scalar Operations *
  442. ***************************/
  443. /**
  444. * Version of our LDST generator that defers to more optimized gadgets selectively.
  445. */
  446. static void tcg_out_ldst_gadget_inner(TCGContext *s,
  447. const void *gadget_base[TCG_TARGET_GP_REGS][TCG_TARGET_GP_REGS],
  448. const void *gadget_pos_imm[TCG_TARGET_GP_REGS][TCG_TARGET_GP_REGS][TCTI_GADGET_IMMEDIATE_ARRAY_LEN],
  449. const void *gadget_shifted_imm[TCG_TARGET_GP_REGS][TCG_TARGET_GP_REGS][TCTI_GADGET_IMMEDIATE_ARRAY_LEN],
  450. const void *gadget_neg_imm[TCG_TARGET_GP_REGS][TCG_TARGET_GP_REGS][TCTI_GADGET_IMMEDIATE_ARRAY_LEN],
  451. unsigned reg0, unsigned reg1, uint32_t offset)
  452. {
  453. int64_t extended_offset = (int32_t)offset;
  454. bool is_negative = (extended_offset < 0);
  455. // Optimal case: we have a gadget that handles our specific offset, so we don't need to encode
  456. // an immediate. This saves us a bunch of speed. :)
  457. // We handle positive and negative gadgets separately, in order to allow for asymmetrical
  458. // collections of pre-made gadgets.
  459. if (!is_negative)
  460. {
  461. uint64_t shifted_offset = (extended_offset >> 3);
  462. bool aligned_to_8B = ((extended_offset & 0b111) == 0);
  463. bool have_optimized_gadget = (extended_offset < TCTI_GADGET_IMMEDIATE_ARRAY_LEN);
  464. bool have_shifted_gadget = (shifted_offset < TCTI_GADGET_IMMEDIATE_ARRAY_LEN);
  465. // More optimal case: we have a gadget that directly encodes the argument.
  466. if (have_optimized_gadget) {
  467. tcg_out_gadget(s, gadget_pos_imm[reg0][reg1][extended_offset]);
  468. return;
  469. }
  470. // Special case: it's frequent to have low-numbered positive offsets that are aligned
  471. // to 16B boundaries
  472. else if(aligned_to_8B && have_shifted_gadget) {
  473. tcg_out_gadget(s, gadget_shifted_imm[reg0][reg1][shifted_offset]);
  474. return;
  475. }
  476. }
  477. else {
  478. uint64_t negated_offset = -(extended_offset);
  479. // More optimal case: we have a gadget that directly encodes the argument.
  480. if (negated_offset < TCTI_GADGET_IMMEDIATE_ARRAY_LEN) {
  481. tcg_out_gadget(s, gadget_neg_imm[reg0][reg1][negated_offset]);
  482. return;
  483. }
  484. }
  485. // Less optimal case: we don't have a gadget specifically for this. Emit the general case immediate.
  486. tcg_out_binary_gadget(s, gadget_base, reg0, reg1);
  487. tcg_out64(s, extended_offset); //tcg_out32(s, offset);
  488. }
  489. /* Shorthand for the above, that prevents us from having to specify the name three times. */
  490. #define tcg_out_ldst_gadget(s, name, a, b, c) \
  491. tcg_out_ldst_gadget_inner(s, name, \
  492. name ## _imm, \
  493. name ## _sh8_imm, \
  494. name ## _neg_imm, \
  495. a, b, c)
  496. /* Write label. */
  497. static void tcti_out_label(TCGContext *s, TCGLabel *label)
  498. {
  499. if (label->has_value) {
  500. tcg_out64(s, label->u.value);
  501. tcg_debug_assert(label->u.value);
  502. } else {
  503. tcg_out_reloc(s, s->code_ptr, sizeof(tcg_target_ulong), label, 0);
  504. s->code_ptr += sizeof(tcg_target_ulong);
  505. }
  506. }
  507. /* Register to register move using ORR (shifted register with no shift). */
  508. static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
  509. {
  510. switch(ext) {
  511. case TCG_TYPE_I32:
  512. tcg_out_binary_gadget(s, gadget_mov_i32, rd, rm);
  513. break;
  514. case TCG_TYPE_I64:
  515. tcg_out_binary_gadget(s, gadget_mov_i64, rd, rm);
  516. break;
  517. default:
  518. g_assert_not_reached();
  519. }
  520. }
  521. static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
  522. {
  523. TCGReg w_ret = (ret - TCG_REG_V16);
  524. TCGReg w_arg = (arg - TCG_REG_V16);
  525. if (ret == arg) {
  526. return true;
  527. }
  528. switch (type) {
  529. case TCG_TYPE_I32:
  530. case TCG_TYPE_I64:
  531. // If this is a GP to GP register mov, issue our standard MOV.
  532. if (ret < 32 && arg < 32) {
  533. tcg_out_movr(s, type, ret, arg);
  534. break;
  535. }
  536. // If this is a vector register to GP, issue a UMOV.
  537. else if (ret < 32) {
  538. void *gadget = (type == TCG_TYPE_I32) ? gadget_umov_s0 : gadget_umov_d0;
  539. tcg_out_binary_gadget(s, gadget, ret, w_arg);
  540. break;
  541. }
  542. // If this is a GP to vector move, insert the vealue using INS.
  543. else if (arg < 32) {
  544. void *gadget = (type == TCG_TYPE_I32) ? gadget_ins_s0 : gadget_ins_d0;
  545. tcg_out_binary_gadget(s, gadget, w_ret, arg);
  546. break;
  547. }
  548. /* FALLTHRU */
  549. case TCG_TYPE_V64:
  550. tcg_debug_assert(ret >= 32 && arg >= 32);
  551. tcg_out_ternary_gadget(s, gadget_or_d, w_ret, w_arg, w_arg);
  552. break;
  553. case TCG_TYPE_V128:
  554. tcg_debug_assert(ret >= 32 && arg >= 32);
  555. tcg_out_ternary_gadget(s, gadget_or_q, w_ret, w_arg, w_arg);
  556. break;
  557. default:
  558. g_assert_not_reached();
  559. }
  560. return true;
  561. }
  562. static void tcg_out_movi_i32(TCGContext *s, TCGReg t0, tcg_target_long arg)
  563. {
  564. bool is_negative = (arg < 0);
  565. // We handle positive and negative gadgets separately, in order to allow for asymmetrical
  566. // collections of pre-made gadgets.
  567. if (!is_negative)
  568. {
  569. // More optimal case: we have a gadget that directly encodes the argument.
  570. if (arg < ARRAY_SIZE(gadget_movi_imm_i32[t0])) {
  571. tcg_out_gadget(s, gadget_movi_imm_i32[t0][arg]);
  572. return;
  573. }
  574. }
  575. // Emit the mov and its immediate.
  576. tcg_out_unary_gadget(s, gadget_movi_i32, t0);
  577. tcg_out64(s, arg); // TODO: make 32b?
  578. }
  579. static void tcg_out_movi_i64(TCGContext *s, TCGReg t0, tcg_target_long arg)
  580. {
  581. uint8_t is_negative = arg < 0;
  582. // We handle positive and negative gadgets separately, in order to allow for asymmetrical
  583. // collections of pre-made gadgets.
  584. if (!is_negative)
  585. {
  586. // More optimal case: we have a gadget that directly encodes the argument.
  587. if (arg < ARRAY_SIZE(gadget_movi_imm_i64[t0])) {
  588. tcg_out_gadget(s, gadget_movi_imm_i64[t0][arg]);
  589. return;
  590. }
  591. }
  592. // TODO: optimize the negative case, too?
  593. // Less optimal case: emit the mov and its immediate.
  594. tcg_out_unary_gadget(s, gadget_movi_i64, t0);
  595. tcg_out64(s, arg);
  596. }
  597. /**
  598. * Generate an immediate-to-register MOV.
  599. */
  600. static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg t0, tcg_target_long arg)
  601. {
  602. if (type == TCG_TYPE_I32) {
  603. tcg_out_movi_i32(s, t0, arg);
  604. } else {
  605. tcg_out_movi_i64(s, t0, arg);
  606. }
  607. }
  608. static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rs)
  609. {
  610. switch (type) {
  611. case TCG_TYPE_I32:
  612. tcg_debug_assert(TCG_TARGET_HAS_ext8s_i32);
  613. tcg_out_binary_gadget(s, gadget_ext8s_i32, rd, rs);
  614. break;
  615. #if TCG_TARGET_REG_BITS == 64
  616. case TCG_TYPE_I64:
  617. tcg_debug_assert(TCG_TARGET_HAS_ext8s_i64);
  618. tcg_out_binary_gadget(s, gadget_ext8s_i64, rd, rs);
  619. break;
  620. #endif
  621. default:
  622. g_assert_not_reached();
  623. }
  624. }
  625. static void tcg_out_ext8u(TCGContext *s, TCGReg rd, TCGReg rs)
  626. {
  627. tcg_out_binary_gadget(s, gadget_ext8u, rd, rs);
  628. }
  629. static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rs)
  630. {
  631. switch (type) {
  632. case TCG_TYPE_I32:
  633. tcg_debug_assert(TCG_TARGET_HAS_ext16s_i32);
  634. tcg_out_binary_gadget(s, gadget_ext16s_i32, rd, rs);
  635. break;
  636. #if TCG_TARGET_REG_BITS == 64
  637. case TCG_TYPE_I64:
  638. tcg_debug_assert(TCG_TARGET_HAS_ext16s_i64);
  639. tcg_out_binary_gadget(s, gadget_ext16s_i64, rd, rs);
  640. break;
  641. #endif
  642. default:
  643. g_assert_not_reached();
  644. }
  645. }
  646. static void tcg_out_ext16u(TCGContext *s, TCGReg rd, TCGReg rs)
  647. {
  648. tcg_out_binary_gadget(s, gadget_ext16u, rd, rs);
  649. }
  650. static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rs)
  651. {
  652. tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
  653. tcg_debug_assert(TCG_TARGET_HAS_ext32s_i64);
  654. tcg_out_binary_gadget(s, gadget_ext32s_i64, rd, rs);
  655. }
  656. static void tcg_out_ext32u(TCGContext *s, TCGReg rd, TCGReg rs)
  657. {
  658. tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
  659. tcg_debug_assert(TCG_TARGET_HAS_ext32u_i64);
  660. tcg_out_binary_gadget(s, gadget_ext32u_i64, rd, rs);
  661. }
  662. static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg rd, TCGReg rs)
  663. {
  664. tcg_out_ext32s(s, rd, rs);
  665. }
  666. static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg rd, TCGReg rs)
  667. {
  668. tcg_out_ext32u(s, rd, rs);
  669. }
  670. static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rs)
  671. {
  672. tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
  673. tcg_out_binary_gadget(s, gadget_extrl, rd, rs);
  674. }
  675. static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
  676. {
  677. return false;
  678. }
  679. static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
  680. tcg_target_long imm)
  681. {
  682. /* This function is only used for passing structs by reference. */
  683. g_assert_not_reached();
  684. }
  685. /**
  686. * Generate a CALL.
  687. */
  688. static void tcg_out_call(TCGContext *s, const tcg_insn_unit *func,
  689. const TCGHelperInfo *info)
  690. {
  691. tcg_out_gadget(s, gadget_call);
  692. tcg_out64(s, (uintptr_t)func);
  693. }
  694. /**
  695. * Generates LD instructions.
  696. */
  697. static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
  698. intptr_t arg2)
  699. {
  700. if (type == TCG_TYPE_I32) {
  701. tcg_out_ldst_gadget(s, gadget_ld32u, ret, arg1, arg2);
  702. } else {
  703. tcg_out_ldst_gadget(s, gadget_ld_i64, ret, arg1, arg2);
  704. }
  705. }
  706. static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg)
  707. {
  708. // Emit a simple gadget with a known return code.
  709. tcg_out_imm64_gadget(s, gadget_exit_tb, arg);
  710. }
  711. static void tcg_out_goto_tb(TCGContext *s, int which)
  712. {
  713. // If we're using a direct jump, we'll emit a "relocation" that can be usd
  714. // to patch our gadget stream with the target address, later.
  715. // Emit our gadget.
  716. tcg_out_gadget(s, gadget_br);
  717. // Place our current instruction into our "relocation table", so it can
  718. // be patched once we know where the branch will target...
  719. s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
  720. // ... and emit our relocation.
  721. tcg_out64(s, which);
  722. set_jmp_reset_offset(s, which);
  723. }
  724. /* We expect to use a 7-bit scaled negative offset from ENV. */
  725. #define MIN_TLB_MASK_TABLE_OFS -512
  726. /**
  727. * Generate every other operation.
  728. */
  729. static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
  730. const TCGArg args[TCG_MAX_OP_ARGS],
  731. const int const_args[TCG_MAX_OP_ARGS])
  732. {
  733. switch (opc) {
  734. // Simple branch.
  735. case INDEX_op_br:
  736. tcg_out_gadget(s, gadget_br);
  737. tcti_out_label(s, arg_label(args[0]));
  738. break;
  739. // Set condition flag.
  740. // a0 = Rd, a1 = Rn, a2 = Rm
  741. case INDEX_op_setcond_i32:
  742. {
  743. void *gadget;
  744. // We have to emit a different gadget per condition; we'll select which.
  745. switch(args[3]) {
  746. case TCG_COND_EQ: gadget = gadget_setcond_i32_eq; break;
  747. case TCG_COND_NE: gadget = gadget_setcond_i32_ne; break;
  748. case TCG_COND_LT: gadget = gadget_setcond_i32_lt; break;
  749. case TCG_COND_GE: gadget = gadget_setcond_i32_ge; break;
  750. case TCG_COND_LE: gadget = gadget_setcond_i32_le; break;
  751. case TCG_COND_GT: gadget = gadget_setcond_i32_gt; break;
  752. case TCG_COND_LTU: gadget = gadget_setcond_i32_lo; break;
  753. case TCG_COND_GEU: gadget = gadget_setcond_i32_hs; break;
  754. case TCG_COND_LEU: gadget = gadget_setcond_i32_ls; break;
  755. case TCG_COND_GTU: gadget = gadget_setcond_i32_hi; break;
  756. default:
  757. g_assert_not_reached();
  758. }
  759. tcg_out_ternary_gadget(s, gadget, args[0], args[1], args[2]);
  760. break;
  761. }
  762. case INDEX_op_setcond_i64:
  763. {
  764. void *gadget;
  765. // We have to emit a different gadget per condition; we'll select which.
  766. switch(args[3]) {
  767. case TCG_COND_EQ: gadget = gadget_setcond_i64_eq; break;
  768. case TCG_COND_NE: gadget = gadget_setcond_i64_ne; break;
  769. case TCG_COND_LT: gadget = gadget_setcond_i64_lt; break;
  770. case TCG_COND_GE: gadget = gadget_setcond_i64_ge; break;
  771. case TCG_COND_LE: gadget = gadget_setcond_i64_le; break;
  772. case TCG_COND_GT: gadget = gadget_setcond_i64_gt; break;
  773. case TCG_COND_LTU: gadget = gadget_setcond_i64_lo; break;
  774. case TCG_COND_GEU: gadget = gadget_setcond_i64_hs; break;
  775. case TCG_COND_LEU: gadget = gadget_setcond_i64_ls; break;
  776. case TCG_COND_GTU: gadget = gadget_setcond_i64_hi; break;
  777. default:
  778. g_assert_not_reached();
  779. }
  780. tcg_out_ternary_gadget(s, gadget, args[0], args[1], args[2]);
  781. break;
  782. }
  783. /**
  784. * Load instructions.
  785. */
  786. case INDEX_op_ld8u_i32:
  787. case INDEX_op_ld8u_i64:
  788. tcg_out_ldst_gadget(s, gadget_ld8u, args[0], args[1], args[2]);
  789. break;
  790. case INDEX_op_ld8s_i32:
  791. tcg_out_ldst_gadget(s, gadget_ld8s_i32, args[0], args[1], args[2]);
  792. break;
  793. case INDEX_op_ld8s_i64:
  794. tcg_out_ldst_gadget(s, gadget_ld8s_i64, args[0], args[1], args[2]);
  795. break;
  796. case INDEX_op_ld16u_i32:
  797. case INDEX_op_ld16u_i64:
  798. tcg_out_ldst_gadget(s, gadget_ld16u, args[0], args[1], args[2]);
  799. break;
  800. case INDEX_op_ld16s_i32:
  801. tcg_out_ldst_gadget(s, gadget_ld16s_i32, args[0], args[1], args[2]);
  802. break;
  803. case INDEX_op_ld16s_i64:
  804. tcg_out_ldst_gadget(s, gadget_ld16s_i64, args[0], args[1], args[2]);
  805. break;
  806. case INDEX_op_ld_i32:
  807. case INDEX_op_ld32u_i64:
  808. tcg_out_ldst_gadget(s, gadget_ld32u, args[0], args[1], args[2]);
  809. break;
  810. case INDEX_op_ld_i64:
  811. tcg_out_ldst_gadget(s, gadget_ld_i64, args[0], args[1], args[2]);
  812. break;
  813. case INDEX_op_ld32s_i64:
  814. tcg_out_ldst_gadget(s, gadget_ld32s_i64, args[0], args[1], args[2]);
  815. break;
  816. /**
  817. * Store instructions.
  818. */
  819. case INDEX_op_st8_i32:
  820. case INDEX_op_st8_i64:
  821. tcg_out_ldst_gadget(s, gadget_st8, args[0], args[1], args[2]);
  822. break;
  823. case INDEX_op_st16_i32:
  824. case INDEX_op_st16_i64:
  825. tcg_out_ldst_gadget(s, gadget_st16, args[0], args[1], args[2]);
  826. break;
  827. case INDEX_op_st_i32:
  828. case INDEX_op_st32_i64:
  829. tcg_out_ldst_gadget(s, gadget_st_i32, args[0], args[1], args[2]);
  830. break;
  831. case INDEX_op_st_i64:
  832. tcg_out_ldst_gadget(s, gadget_st_i64, args[0], args[1], args[2]);
  833. break;
  834. /**
  835. * Arithmetic instructions.
  836. */
  837. case INDEX_op_add_i32:
  838. tcg_out_ternary_gadget(s, gadget_add_i32, args[0], args[1], args[2]);
  839. break;
  840. case INDEX_op_sub_i32:
  841. tcg_out_ternary_gadget(s, gadget_sub_i32, args[0], args[1], args[2]);
  842. break;
  843. case INDEX_op_mul_i32:
  844. tcg_out_ternary_gadget(s, gadget_mul_i32, args[0], args[1], args[2]);
  845. break;
  846. case INDEX_op_nand_i32: /* Optional (TCG_TARGET_HAS_nand_i32). */
  847. tcg_out_ternary_gadget(s, gadget_nand_i32, args[0], args[1], args[2]);
  848. break;
  849. case INDEX_op_nor_i32: /* Optional (TCG_TARGET_HAS_nor_i32). */
  850. tcg_out_ternary_gadget(s, gadget_nor_i32, args[0], args[1], args[2]);
  851. break;
  852. case INDEX_op_and_i32:
  853. tcg_out_ternary_gadget(s, gadget_and_i32, args[0], args[1], args[2]);
  854. break;
  855. case INDEX_op_andc_i32: /* Optional (TCG_TARGET_HAS_andc_i32). */
  856. tcg_out_ternary_gadget(s, gadget_andc_i32, args[0], args[1], args[2]);
  857. break;
  858. case INDEX_op_orc_i32: /* Optional (TCG_TARGET_HAS_orc_i64). */
  859. tcg_out_ternary_gadget(s, gadget_orc_i32, args[0], args[1], args[2]);
  860. break;
  861. case INDEX_op_eqv_i32: /* Optional (TCG_TARGET_HAS_orc_i64). */
  862. tcg_out_ternary_gadget(s, gadget_eqv_i32, args[0], args[1], args[2]);
  863. break;
  864. case INDEX_op_or_i32:
  865. tcg_out_ternary_gadget(s, gadget_or_i32, args[0], args[1], args[2]);
  866. break;
  867. case INDEX_op_xor_i32:
  868. tcg_out_ternary_gadget(s, gadget_xor_i32, args[0], args[1], args[2]);
  869. break;
  870. case INDEX_op_shl_i32:
  871. tcg_out_ternary_gadget(s, gadget_shl_i32, args[0], args[1], args[2]);
  872. break;
  873. case INDEX_op_shr_i32:
  874. tcg_out_ternary_gadget(s, gadget_shr_i32, args[0], args[1], args[2]);
  875. break;
  876. case INDEX_op_sar_i32:
  877. tcg_out_ternary_gadget(s, gadget_sar_i32, args[0], args[1], args[2]);
  878. break;
  879. case INDEX_op_rotr_i32: /* Optional (TCG_TARGET_HAS_rot_i32). */
  880. tcg_out_ternary_gadget(s, gadget_rotr_i32, args[0], args[1], args[2]);
  881. break;
  882. case INDEX_op_rotl_i32: /* Optional (TCG_TARGET_HAS_rot_i32). */
  883. tcg_out_ternary_gadget(s, gadget_rotl_i32, args[0], args[1], args[2]);
  884. break;
  885. case INDEX_op_add_i64:
  886. tcg_out_ternary_gadget(s, gadget_add_i64, args[0], args[1], args[2]);
  887. break;
  888. case INDEX_op_sub_i64:
  889. tcg_out_ternary_gadget(s, gadget_sub_i64, args[0], args[1], args[2]);
  890. break;
  891. case INDEX_op_mul_i64:
  892. tcg_out_ternary_gadget(s, gadget_mul_i64, args[0], args[1], args[2]);
  893. break;
  894. case INDEX_op_and_i64:
  895. tcg_out_ternary_gadget(s, gadget_and_i64, args[0], args[1], args[2]);
  896. break;
  897. case INDEX_op_andc_i64: /* Optional (TCG_TARGET_HAS_andc_i64). */
  898. tcg_out_ternary_gadget(s, gadget_andc_i64, args[0], args[1], args[2]);
  899. break;
  900. case INDEX_op_orc_i64: /* Optional (TCG_TARGET_HAS_orc_i64). */
  901. tcg_out_ternary_gadget(s, gadget_orc_i64, args[0], args[1], args[2]);
  902. break;
  903. case INDEX_op_eqv_i64: /* Optional (TCG_TARGET_HAS_eqv_i64). */
  904. tcg_out_ternary_gadget(s, gadget_eqv_i64, args[0], args[1], args[2]);
  905. break;
  906. case INDEX_op_nand_i64: /* Optional (TCG_TARGET_HAS_nand_i64). */
  907. tcg_out_ternary_gadget(s, gadget_nand_i64, args[0], args[1], args[2]);
  908. break;
  909. case INDEX_op_nor_i64: /* Optional (TCG_TARGET_HAS_nor_i64). */
  910. tcg_out_ternary_gadget(s, gadget_nor_i64, args[0], args[1], args[2]);
  911. break;
  912. case INDEX_op_or_i64:
  913. tcg_out_ternary_gadget(s, gadget_or_i64, args[0], args[1], args[2]);
  914. break;
  915. case INDEX_op_xor_i64:
  916. tcg_out_ternary_gadget(s, gadget_xor_i64, args[0], args[1], args[2]);
  917. break;
  918. case INDEX_op_shl_i64:
  919. tcg_out_ternary_gadget(s, gadget_shl_i64, args[0], args[1], args[2]);
  920. break;
  921. case INDEX_op_shr_i64:
  922. tcg_out_ternary_gadget(s, gadget_shr_i64, args[0], args[1], args[2]);
  923. break;
  924. case INDEX_op_sar_i64:
  925. tcg_out_ternary_gadget(s, gadget_sar_i64, args[0], args[1], args[2]);
  926. break;
  927. case INDEX_op_rotl_i64: /* Optional (TCG_TARGET_HAS_rot_i64). */
  928. tcg_out_ternary_gadget(s, gadget_rotl_i64, args[0], args[1], args[2]);
  929. break;
  930. case INDEX_op_rotr_i64: /* Optional (TCG_TARGET_HAS_rot_i64). */
  931. tcg_out_ternary_gadget(s, gadget_rotr_i64, args[0], args[1], args[2]);
  932. break;
  933. case INDEX_op_div_i64: /* Optional (TCG_TARGET_HAS_div_i64). */
  934. tcg_out_ternary_gadget(s, gadget_div_i64, args[0], args[1], args[2]);
  935. break;
  936. case INDEX_op_divu_i64: /* Optional (TCG_TARGET_HAS_div_i64). */
  937. tcg_out_ternary_gadget(s, gadget_divu_i64, args[0], args[1], args[2]);
  938. break;
  939. case INDEX_op_rem_i64: /* Optional (TCG_TARGET_HAS_div_i64). */
  940. tcg_out_ternary_gadget(s, gadget_rem_i64, args[0], args[1], args[2]);
  941. break;
  942. case INDEX_op_remu_i64: /* Optional (TCG_TARGET_HAS_div_i64). */
  943. tcg_out_ternary_gadget(s, gadget_remu_i64, args[0], args[1], args[2]);
  944. break;
  945. case INDEX_op_brcond_i64:
  946. {
  947. static uint8_t last_brcond_i64 = 0;
  948. void *gadget;
  949. // We have to emit a different gadget per condition; we'll select which.
  950. switch(args[2]) {
  951. case TCG_COND_EQ: gadget = gadget_brcond_i64_eq; break;
  952. case TCG_COND_NE: gadget = gadget_brcond_i64_ne; break;
  953. case TCG_COND_LT: gadget = gadget_brcond_i64_lt; break;
  954. case TCG_COND_GE: gadget = gadget_brcond_i64_ge; break;
  955. case TCG_COND_LE: gadget = gadget_brcond_i64_le; break;
  956. case TCG_COND_GT: gadget = gadget_brcond_i64_gt; break;
  957. case TCG_COND_LTU: gadget = gadget_brcond_i64_lo; break;
  958. case TCG_COND_GEU: gadget = gadget_brcond_i64_hs; break;
  959. case TCG_COND_LEU: gadget = gadget_brcond_i64_ls; break;
  960. case TCG_COND_GTU: gadget = gadget_brcond_i64_hi; break;
  961. default:
  962. g_assert_not_reached();
  963. }
  964. // We'll select the which branch to used based on a cycling counter.
  965. // This means we'll pick one of 16 identical brconds. Spreading this out
  966. // helps the processor's branch prediction be less "squished", as not every
  967. // branch is going throuh the same instruction.
  968. tcg_out_ternary_gadget(s, gadget, last_brcond_i64, args[0], args[1]);
  969. last_brcond_i64 = (last_brcond_i64 + 1) % TCG_TARGET_GP_REGS;
  970. // Branch target immediate.
  971. tcti_out_label(s, arg_label(args[3]));
  972. break;
  973. }
  974. case INDEX_op_bswap16_i32: /* Optional (TCG_TARGET_HAS_bswap16_i32). */
  975. case INDEX_op_bswap16_i64: /* Optional (TCG_TARGET_HAS_bswap16_i64). */
  976. tcg_out_binary_gadget(s, gadget_bswap16, args[0], args[1]);
  977. break;
  978. case INDEX_op_bswap32_i32: /* Optional (TCG_TARGET_HAS_bswap32_i32). */
  979. case INDEX_op_bswap32_i64: /* Optional (TCG_TARGET_HAS_bswap32_i64). */
  980. tcg_out_binary_gadget(s, gadget_bswap32, args[0], args[1]);
  981. break;
  982. case INDEX_op_bswap64_i64: /* Optional (TCG_TARGET_HAS_bswap64_i64). */
  983. tcg_out_binary_gadget(s, gadget_bswap64, args[0], args[1]);
  984. break;
  985. case INDEX_op_not_i64: /* Optional (TCG_TARGET_HAS_not_i64). */
  986. tcg_out_binary_gadget(s, gadget_not_i64, args[0], args[1]);
  987. break;
  988. case INDEX_op_neg_i64: /* Optional (TCG_TARGET_HAS_neg_i64). */
  989. tcg_out_binary_gadget(s, gadget_neg_i64, args[0], args[1]);
  990. break;
  991. case INDEX_op_clz_i64: /* Optional (TCG_TARGET_HAS_clz_i64). */
  992. tcg_out_ternary_gadget(s, gadget_clz_i64, args[0], args[1], args[2]);
  993. break;
  994. case INDEX_op_ctz_i64: /* Optional (TCG_TARGET_HAS_ctz_i64). */
  995. tcg_out_ternary_gadget(s, gadget_ctz_i64, args[0], args[1], args[2]);
  996. break;
  997. case INDEX_op_extrh_i64_i32:
  998. tcg_out_binary_gadget(s, gadget_extrh, args[0], args[1]);
  999. break;
  1000. case INDEX_op_neg_i32: /* Optional (TCG_TARGET_HAS_neg_i32). */
  1001. tcg_out_binary_gadget(s, gadget_neg_i32, args[0], args[1]);
  1002. break;
  1003. case INDEX_op_clz_i32: /* Optional (TCG_TARGET_HAS_clz_i32). */
  1004. tcg_out_ternary_gadget(s, gadget_clz_i32, args[0], args[1], args[2]);
  1005. break;
  1006. case INDEX_op_ctz_i32: /* Optional (TCG_TARGET_HAS_ctz_i32). */
  1007. tcg_out_ternary_gadget(s, gadget_ctz_i32, args[0], args[1], args[2]);
  1008. break;
  1009. case INDEX_op_not_i32: /* Optional (TCG_TARGET_HAS_not_i32). */
  1010. tcg_out_binary_gadget(s, gadget_not_i32, args[0], args[1]);
  1011. break;
  1012. case INDEX_op_div_i32: /* Optional (TCG_TARGET_HAS_div_i32). */
  1013. tcg_out_ternary_gadget(s, gadget_div_i32, args[0], args[1], args[2]);
  1014. break;
  1015. case INDEX_op_divu_i32: /* Optional (TCG_TARGET_HAS_div_i32). */
  1016. tcg_out_ternary_gadget(s, gadget_divu_i32, args[0], args[1], args[2]);
  1017. break;
  1018. case INDEX_op_rem_i32: /* Optional (TCG_TARGET_HAS_div_i32). */
  1019. tcg_out_ternary_gadget(s, gadget_rem_i32, args[0], args[1], args[2]);
  1020. break;
  1021. case INDEX_op_remu_i32: /* Optional (TCG_TARGET_HAS_div_i32). */
  1022. tcg_out_ternary_gadget(s, gadget_remu_i32, args[0], args[1], args[2]);
  1023. break;
  1024. case INDEX_op_brcond_i32:
  1025. {
  1026. static uint8_t last_brcond_i32 = 0;
  1027. void *gadget;
  1028. // We have to emit a different gadget per condition; we'll select which.
  1029. switch(args[2]) {
  1030. case TCG_COND_EQ: gadget = gadget_brcond_i32_eq; break;
  1031. case TCG_COND_NE: gadget = gadget_brcond_i32_ne; break;
  1032. case TCG_COND_LT: gadget = gadget_brcond_i32_lt; break;
  1033. case TCG_COND_GE: gadget = gadget_brcond_i32_ge; break;
  1034. case TCG_COND_LE: gadget = gadget_brcond_i32_le; break;
  1035. case TCG_COND_GT: gadget = gadget_brcond_i32_gt; break;
  1036. case TCG_COND_LTU: gadget = gadget_brcond_i32_lo; break;
  1037. case TCG_COND_GEU: gadget = gadget_brcond_i32_hs; break;
  1038. case TCG_COND_LEU: gadget = gadget_brcond_i32_ls; break;
  1039. case TCG_COND_GTU: gadget = gadget_brcond_i32_hi; break;
  1040. default:
  1041. g_assert_not_reached();
  1042. }
  1043. // We'll select the which branch to used based on a cycling counter.
  1044. // This means we'll pick one of 16 identical brconds. Spreading this out
  1045. // helps the processor's branch prediction be less "squished", as not every
  1046. // branch is going throuh the same instruction.
  1047. tcg_out_ternary_gadget(s, gadget, last_brcond_i32, args[0], args[1]);
  1048. last_brcond_i32 = (last_brcond_i32 + 1) % TCG_TARGET_GP_REGS;
  1049. // Branch target immediate.
  1050. tcti_out_label(s, arg_label(args[3]));
  1051. break;
  1052. }
  1053. case INDEX_op_qemu_ld_i32:
  1054. {
  1055. MemOp opc = get_memop(args[2]);
  1056. unsigned a_bits = memop_alignment_bits(opc);
  1057. unsigned s_bits = opc & MO_SIZE;
  1058. void *gadget;
  1059. switch(tlb_mask_table_ofs(s, get_mmuidx(args[2]))) {
  1060. case -32: LD_MEMOP_HANDLER(gadget, args[2], off32_i32, a_bits, s_bits); break;
  1061. case -48: LD_MEMOP_HANDLER(gadget, args[2], off48_i32, a_bits, s_bits); break;
  1062. case -64: LD_MEMOP_HANDLER(gadget, args[2], off64_i32, a_bits, s_bits); break;
  1063. case -96: LD_MEMOP_HANDLER(gadget, args[2], off96_i32, a_bits, s_bits); break;
  1064. case -128: LD_MEMOP_HANDLER(gadget, args[2], off128_i32, a_bits, s_bits); break;
  1065. default: LD_MEMOP_LOOKUP(gadget, args[2], slowpath_off0_i32); break;
  1066. }
  1067. // Args:
  1068. // - an immediate32 encodes our operation index
  1069. tcg_out_binary_gadget(s, gadget, args[0], args[1]);
  1070. tcg_out64(s, args[2]); // TODO: fix encoding to be 4b
  1071. break;
  1072. }
  1073. case INDEX_op_qemu_ld_i64:
  1074. {
  1075. MemOp opc = get_memop(args[2]);
  1076. unsigned a_bits = memop_alignment_bits(opc);
  1077. unsigned s_bits = opc & MO_SIZE;
  1078. void *gadget;
  1079. // Special optimization case: if we have an common case.
  1080. // Delegate to our special-case handler.
  1081. if (args[2] == 0x02) {
  1082. LOOKUP_SPECIAL_CASE_LDST_GADGET(args[2], ld_ub, mode02)
  1083. tcg_out_binary_gadget(s, gadget, args[0], args[1]);
  1084. } else if (args[2] == 0x32) {
  1085. LOOKUP_SPECIAL_CASE_LDST_GADGET(args[2], ld_leq, mode32)
  1086. tcg_out_binary_gadget(s, gadget, args[0], args[1]);
  1087. } else if(args[2] == 0x3a) {
  1088. LOOKUP_SPECIAL_CASE_LDST_GADGET(args[2], ld_leq, mode3a)
  1089. tcg_out_binary_gadget(s, gadget, args[0], args[1]);
  1090. }
  1091. // Otherwise, handle the generic case.
  1092. else {
  1093. switch(tlb_mask_table_ofs(s, get_mmuidx(args[2]))) {
  1094. case -32: LD_MEMOP_HANDLER(gadget, args[2], off32_i64, a_bits, s_bits); break;
  1095. case -48: LD_MEMOP_HANDLER(gadget, args[2], off48_i64, a_bits, s_bits); break;
  1096. case -64: LD_MEMOP_HANDLER(gadget, args[2], off64_i64, a_bits, s_bits); break;
  1097. case -96: LD_MEMOP_HANDLER(gadget, args[2], off96_i64, a_bits, s_bits); break;
  1098. case -128: LD_MEMOP_HANDLER(gadget, args[2], off128_i64, a_bits, s_bits); break;
  1099. default: LD_MEMOP_LOOKUP(gadget, args[2], slowpath_off0_i64); break;
  1100. }
  1101. // Args:
  1102. // - an immediate32 encodes our operation index
  1103. tcg_out_binary_gadget(s, gadget, args[0], args[1]);
  1104. tcg_out64(s, args[2]); // TODO: fix encoding to be 4b
  1105. }
  1106. break;
  1107. }
  1108. case INDEX_op_qemu_st_i32:
  1109. {
  1110. MemOp opc = get_memop(args[2]);
  1111. unsigned a_bits = memop_alignment_bits(opc);
  1112. unsigned s_bits = opc & MO_SIZE;
  1113. void *gadget;
  1114. switch(tlb_mask_table_ofs(s, get_mmuidx(args[2]))) {
  1115. case -32: ST_MEMOP_HANDLER(gadget, args[2], off32_i32, a_bits, s_bits); break;
  1116. case -48: ST_MEMOP_HANDLER(gadget, args[2], off48_i32, a_bits, s_bits); break;
  1117. case -64: ST_MEMOP_HANDLER(gadget, args[2], off64_i32, a_bits, s_bits); break;
  1118. case -96: ST_MEMOP_HANDLER(gadget, args[2], off96_i32, a_bits, s_bits); break;
  1119. case -128: ST_MEMOP_HANDLER(gadget, args[2], off128_i32, a_bits, s_bits); break;
  1120. default: ST_MEMOP_LOOKUP(gadget, args[2], slowpath_off0_i32); break;
  1121. }
  1122. // Args:
  1123. // - our gadget encodes the target and address registers
  1124. // - an immediate32 encodes our operation index
  1125. tcg_out_binary_gadget(s, gadget, args[0], args[1]);
  1126. tcg_out64(s, args[2]); // FIXME: double encoded
  1127. break;
  1128. }
  1129. case INDEX_op_qemu_st_i64:
  1130. {
  1131. MemOp opc = get_memop(args[2]);
  1132. unsigned a_bits = memop_alignment_bits(opc);
  1133. unsigned s_bits = opc & MO_SIZE;
  1134. void *gadget;
  1135. // Special optimization case: if we have an common case.
  1136. // Delegate to our special-case handler.
  1137. if (args[2] == 0x02) {
  1138. LOOKUP_SPECIAL_CASE_LDST_GADGET(args[2], st_ub, mode02)
  1139. tcg_out_binary_gadget(s, gadget, args[0], args[1]);
  1140. } else if (args[2] == 0x32) {
  1141. LOOKUP_SPECIAL_CASE_LDST_GADGET(args[2], st_leq, mode32)
  1142. tcg_out_binary_gadget(s, gadget, args[0], args[1]);
  1143. } else if(args[2] == 0x3a) {
  1144. LOOKUP_SPECIAL_CASE_LDST_GADGET(args[2], st_leq, mode3a)
  1145. tcg_out_binary_gadget(s, gadget, args[0], args[1]);
  1146. }
  1147. // Otherwise, handle the generic case.
  1148. else {
  1149. switch(tlb_mask_table_ofs(s, get_mmuidx(args[2]))) {
  1150. case -32: ST_MEMOP_HANDLER(gadget, args[2], off32_i64, a_bits, s_bits); break;
  1151. case -48: ST_MEMOP_HANDLER(gadget, args[2], off48_i64, a_bits, s_bits); break;
  1152. case -64: ST_MEMOP_HANDLER(gadget, args[2], off64_i64, a_bits, s_bits); break;
  1153. case -96: ST_MEMOP_HANDLER(gadget, args[2], off96_i64, a_bits, s_bits); break;
  1154. case -128: ST_MEMOP_HANDLER(gadget, args[2], off128_i64, a_bits, s_bits); break;
  1155. default: ST_MEMOP_LOOKUP(gadget, args[2], slowpath_off0_i64); break;
  1156. }
  1157. // Args:
  1158. // - our gadget encodes the target and address registers
  1159. // - an immediate32 encodes our operation index
  1160. tcg_out_binary_gadget(s, gadget, args[0], args[1]);
  1161. tcg_out64(s, args[2]); // FIXME: double encoded
  1162. }
  1163. break;
  1164. }
  1165. // Memory barriers.
  1166. case INDEX_op_mb:
  1167. {
  1168. static void* sync[] = {
  1169. [0 ... TCG_MO_ALL] = gadget_mb_all,
  1170. [TCG_MO_ST_ST] = gadget_mb_st,
  1171. [TCG_MO_LD_LD] = gadget_mb_ld,
  1172. [TCG_MO_LD_ST] = gadget_mb_ld,
  1173. [TCG_MO_LD_ST | TCG_MO_LD_LD] = gadget_mb_ld,
  1174. };
  1175. tcg_out_gadget(s, sync[args[0] & TCG_MO_ALL]);
  1176. break;
  1177. }
  1178. case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
  1179. case INDEX_op_mov_i64:
  1180. case INDEX_op_call: /* Always emitted via tcg_out_call. */
  1181. case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */
  1182. case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */
  1183. case INDEX_op_ext8s_i32: /* Always emitted via tcg_reg_alloc_op. */
  1184. case INDEX_op_ext8s_i64:
  1185. case INDEX_op_ext8u_i32:
  1186. case INDEX_op_ext8u_i64:
  1187. case INDEX_op_ext16s_i32:
  1188. case INDEX_op_ext16s_i64:
  1189. case INDEX_op_ext16u_i32:
  1190. case INDEX_op_ext16u_i64:
  1191. case INDEX_op_ext32s_i64:
  1192. case INDEX_op_ext32u_i64:
  1193. case INDEX_op_ext_i32_i64:
  1194. case INDEX_op_extu_i32_i64:
  1195. case INDEX_op_extrl_i64_i32:
  1196. default:
  1197. g_assert_not_reached();
  1198. }
  1199. }
  1200. /**
  1201. * Generate immediate stores.
  1202. */
  1203. static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
  1204. intptr_t arg2)
  1205. {
  1206. if (type == TCG_TYPE_I32) {
  1207. tcg_out_ldst_gadget(s, gadget_st_i32, arg, arg1, arg2);
  1208. } else {
  1209. tcg_out_ldst_gadget(s, gadget_st_i64, arg, arg1, arg2);
  1210. }
  1211. }
  1212. static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
  1213. TCGReg base, intptr_t ofs)
  1214. {
  1215. return false;
  1216. }
  1217. /* Test if a constant matches the constraint. */
  1218. static bool tcg_target_const_match(int64_t val, int ct,
  1219. TCGType type, TCGCond cond, int vece)
  1220. {
  1221. return ct & TCG_CT_CONST;
  1222. }
  1223. static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
  1224. {
  1225. memset(p, 0, sizeof(*p) * count);
  1226. }
  1227. /***************************
  1228. * TCG Vector Operations *
  1229. ***************************/
  1230. //
  1231. // Helper for emitting DUPI (immediate DUP) instructions.
  1232. //
  1233. #define tcg_out_dupi_gadget(s, name, q, rd, op, cmode, arg) \
  1234. if (q) { \
  1235. tcg_out_gadget(s, gadget_ ## name ## _cmode_ ## cmode ## _op ## op ## _q1[rd][arg]); \
  1236. } else { \
  1237. tcg_out_gadget(s, gadget_ ## name ## _cmode_ ## cmode ## _op ## op ## _q0[rd][arg]); \
  1238. }
  1239. //
  1240. // Helpers for emitting D/Q variant instructions.
  1241. //
  1242. #define tcg_out_dq_gadget(s, name, arity, is_q, args...) \
  1243. if (is_q) { \
  1244. tcg_out_ ## arity ## _gadget(s, gadget_ ## name ## _q, args); \
  1245. } else { \
  1246. tcg_out_ ## arity ## _gadget(s, gadget_ ## name ## _d, args); \
  1247. }
  1248. #define tcg_out_unary_dq_gadget(s, name, is_q, a) \
  1249. tcg_out_dq_gadget(s, name, unary, is_q, a)
  1250. #define tcg_out_binary_dq_gadget(s, name, is_q, a, b) \
  1251. tcg_out_dq_gadget(s, name, binary, is_q, a, b)
  1252. #define tcg_out_ternary_dq_gadget(s, name, is_q, a, b, c) \
  1253. tcg_out_dq_gadget(s, name, ternary, is_q, a, b, c)
  1254. //
  1255. // Helper for emitting the gadget appropriate for a vector's size.
  1256. //
  1257. #define tcg_out_sized_vector_gadget(s, name, arity, vece, args...) \
  1258. switch(vece) { \
  1259. case MO_8: \
  1260. if (type == TCG_TYPE_V64) { \
  1261. tcg_out_ ## arity ## _gadget(s, gadget_ ## name ## _8b, args); \
  1262. } else { \
  1263. tcg_out_ ## arity ## _gadget(s, gadget_ ## name ## _16b, args); \
  1264. } \
  1265. break; \
  1266. case MO_16: \
  1267. if (type == TCG_TYPE_V64) { \
  1268. tcg_out_ ## arity ## _gadget(s, gadget_ ## name ## _4h, args); \
  1269. } else { \
  1270. tcg_out_ ## arity ## _gadget(s, gadget_ ## name ## _8h, args); \
  1271. } \
  1272. break; \
  1273. case MO_32: \
  1274. if (type == TCG_TYPE_V64) { \
  1275. tcg_out_ ## arity ## _gadget(s, gadget_ ## name ## _2s, args); \
  1276. } else { \
  1277. tcg_out_ ## arity ## _gadget(s, gadget_ ## name ## _4s, args); \
  1278. } \
  1279. break; \
  1280. case MO_64: \
  1281. if (type == TCG_TYPE_V128) { \
  1282. tcg_out_ ## arity ## _gadget(s, gadget_ ## name ## _2d, args); \
  1283. } \
  1284. else { \
  1285. g_assert_not_reached(); \
  1286. } \
  1287. break; \
  1288. default: \
  1289. g_assert_not_reached(); \
  1290. }
  1291. #define tcg_out_sized_vector_gadget_no64(s, name, arity, vece, args...) \
  1292. switch(vece) { \
  1293. case MO_8: \
  1294. if (type == TCG_TYPE_V64) { \
  1295. tcg_out_ ## arity ## _gadget(s, gadget_ ## name ## _8b, args); \
  1296. } else { \
  1297. tcg_out_ ## arity ## _gadget(s, gadget_ ## name ## _16b, args); \
  1298. } \
  1299. break; \
  1300. case MO_16: \
  1301. if (type == TCG_TYPE_V64) { \
  1302. tcg_out_ ## arity ## _gadget(s, gadget_ ## name ## _4h, args); \
  1303. } else { \
  1304. tcg_out_ ## arity ## _gadget(s, gadget_ ## name ## _8h, args); \
  1305. } \
  1306. break; \
  1307. case MO_32: \
  1308. if (type == TCG_TYPE_V64) { \
  1309. tcg_out_ ## arity ## _gadget(s, gadget_ ## name ## _2s, args); \
  1310. } else { \
  1311. tcg_out_ ## arity ## _gadget(s, gadget_ ## name ## _4s, args); \
  1312. } \
  1313. break; \
  1314. default: \
  1315. g_assert_not_reached(); \
  1316. }
  1317. #define tcg_out_unary_vector_gadget(s, name, vece, a) \
  1318. tcg_out_sized_vector_gadget(s, name, unary, vece, a)
  1319. #define tcg_out_binary_vector_gadget(s, name, vece, a, b) \
  1320. tcg_out_sized_vector_gadget(s, name, binary, vece, a, b)
  1321. #define tcg_out_ternary_vector_gadget(s, name, vece, a, b, c) \
  1322. tcg_out_sized_vector_gadget(s, name, ternary, vece, a, b, c)
  1323. #define tcg_out_ternary_vector_gadget_no64(s, name, vece, a, b, c) \
  1324. tcg_out_sized_vector_gadget_no64(s, name, ternary, vece, a, b, c)
  1325. #define tcg_out_sized_gadget_with_scalar(s, name, arity, is_scalar, vece, args...) \
  1326. if (is_scalar) { \
  1327. tcg_out_ ## arity ## _gadget(s, gadget_ ## name ## _scalar, args); \
  1328. } else { \
  1329. tcg_out_sized_vector_gadget(s, name, arity, vece, args); \
  1330. }
  1331. #define tcg_out_ternary_vector_gadget_with_scalar(s, name, is_scalar, vece, a, b, c) \
  1332. tcg_out_sized_gadget_with_scalar(s, name, ternary, is_scalar, vece, a, b, c)
  1333. #define tcg_out_ternary_immediate_vector_gadget_with_scalar(s, name, is_scalar, vece, a, b, c) \
  1334. tcg_out_sized_gadget_with_scalar(s, name, ternary_immediate, is_scalar, vece, a, b, c)
  1335. /* Return true if v16 is a valid 16-bit shifted immediate. */
  1336. static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
  1337. {
  1338. if (v16 == (v16 & 0xff)) {
  1339. *cmode = 0x8;
  1340. *imm8 = v16 & 0xff;
  1341. return true;
  1342. } else if (v16 == (v16 & 0xff00)) {
  1343. *cmode = 0xa;
  1344. *imm8 = v16 >> 8;
  1345. return true;
  1346. }
  1347. return false;
  1348. }
  1349. /** Core vector operation emission. */
  1350. static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl, unsigned vece,
  1351. const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS])
  1352. {
  1353. TCGType type = vecl + TCG_TYPE_V64;
  1354. TCGArg r0, r1, r2, r3, w0, w1, w2, w3;
  1355. // Typing flags for vector operations.
  1356. bool is_v128 = (type == TCG_TYPE_V128);
  1357. bool is_scalar = !is_v128 && (vece == MO_64);
  1358. // Argument shortcuts.
  1359. r0 = args[0];
  1360. r1 = args[1];
  1361. r2 = args[2];
  1362. r3 = args[3];
  1363. // Offset argument shortcuts; offset to convert register numbers to gadget numberes.
  1364. w0 = args[0] - TCG_REG_V16;
  1365. w1 = args[1] - TCG_REG_V16;
  1366. w2 = args[2] - TCG_REG_V16;
  1367. w3 = args[3] - TCG_REG_V16;
  1368. // Argument shortcuts, as signed.
  1369. int64_t signed_offset_arg = (int32_t)args[2];
  1370. switch (opc) {
  1371. // Load memory -> vector: followed by a 64-bit offset immediate
  1372. case INDEX_op_ld_vec:
  1373. tcg_out_binary_dq_gadget(s, ldr, is_v128, w0, r1);
  1374. tcg_out64(s, signed_offset_arg);
  1375. break;
  1376. // Store memory -> vector: followed by a 64-bit offset immediate
  1377. case INDEX_op_st_vec:
  1378. tcg_out_binary_dq_gadget(s, str, is_v128, w0, r1);
  1379. tcg_out64(s, signed_offset_arg);
  1380. break;
  1381. // Duplciate memory to all vector elements.
  1382. case INDEX_op_dupm_vec:
  1383. // DUPM handles normalization itself; pass arguments raw.
  1384. tcg_out_dupm_vec(s, type, vece, r0, r1, r2);
  1385. break;
  1386. case INDEX_op_add_vec:
  1387. tcg_out_ternary_vector_gadget_with_scalar(s, add, is_scalar, vece, w0, w1, w2);
  1388. break;
  1389. case INDEX_op_sub_vec:
  1390. tcg_out_ternary_vector_gadget_with_scalar(s, sub, is_scalar, vece, w0, w1, w2);
  1391. break;
  1392. case INDEX_op_mul_vec: // optional
  1393. tcg_out_ternary_vector_gadget_no64(s, mul, vece, w0, w1, w2);
  1394. break;
  1395. case INDEX_op_neg_vec: // optional
  1396. tcg_out_binary_vector_gadget(s, neg, vece, w0, w1);
  1397. break;
  1398. case INDEX_op_abs_vec: // optional
  1399. tcg_out_binary_vector_gadget(s, abs, vece, w0, w1);
  1400. break;
  1401. case INDEX_op_and_vec: // optional
  1402. tcg_out_ternary_dq_gadget(s, and, is_v128, w0, w1, w2);
  1403. break;
  1404. case INDEX_op_or_vec:
  1405. tcg_out_ternary_dq_gadget(s, or, is_v128, w0, w1, w2);
  1406. break;
  1407. case INDEX_op_andc_vec:
  1408. tcg_out_ternary_dq_gadget(s, andc, is_v128, w0, w1, w2);
  1409. break;
  1410. case INDEX_op_orc_vec: // optional
  1411. tcg_out_ternary_dq_gadget(s, orc, is_v128, w0, w1, w2);
  1412. break;
  1413. case INDEX_op_xor_vec:
  1414. tcg_out_ternary_dq_gadget(s, xor, is_v128, w0, w1, w2);
  1415. break;
  1416. case INDEX_op_ssadd_vec:
  1417. tcg_out_ternary_vector_gadget_with_scalar(s, ssadd, is_scalar, vece, w0, w1, w2);
  1418. break;
  1419. case INDEX_op_sssub_vec:
  1420. tcg_out_ternary_vector_gadget_with_scalar(s, sssub, is_scalar, vece, w0, w1, w2);
  1421. break;
  1422. case INDEX_op_usadd_vec:
  1423. tcg_out_ternary_vector_gadget_with_scalar(s, usadd, is_scalar, vece, w0, w1, w2);
  1424. break;
  1425. case INDEX_op_ussub_vec:
  1426. tcg_out_ternary_vector_gadget_with_scalar(s, ussub, is_scalar, vece, w0, w1, w2);
  1427. break;
  1428. case INDEX_op_smax_vec:
  1429. tcg_out_ternary_vector_gadget_no64(s, smax, vece, w0, w1, w2);
  1430. break;
  1431. case INDEX_op_smin_vec:
  1432. tcg_out_ternary_vector_gadget_no64(s, smin, vece, w0, w1, w2);
  1433. break;
  1434. case INDEX_op_umax_vec:
  1435. tcg_out_ternary_vector_gadget_no64(s, umax, vece, w0, w1, w2);
  1436. break;
  1437. case INDEX_op_umin_vec:
  1438. tcg_out_ternary_vector_gadget_no64(s, umin, vece, w0, w1, w2);
  1439. break;
  1440. case INDEX_op_not_vec: // optional
  1441. tcg_out_binary_dq_gadget(s, not, is_v128, w0, w1);
  1442. break;
  1443. case INDEX_op_shlv_vec:
  1444. tcg_out_ternary_vector_gadget_with_scalar(s, shlv, is_scalar, vece, w0, w1, w2);
  1445. break;
  1446. case INDEX_op_aa64_sshl_vec:
  1447. tcg_out_ternary_vector_gadget_with_scalar(s, sshl, is_scalar, vece, w0, w1, w2);
  1448. break;
  1449. case INDEX_op_cmp_vec:
  1450. switch (args[3]) {
  1451. case TCG_COND_EQ:
  1452. tcg_out_ternary_vector_gadget_with_scalar(s, cmeq, is_scalar, vece, w0, w1, w2);
  1453. break;
  1454. case TCG_COND_NE:
  1455. tcg_out_ternary_vector_gadget_with_scalar(s, cmeq, is_scalar, vece, w0, w1, w2);
  1456. tcg_out_binary_dq_gadget(s, not, is_v128, w0, w0);
  1457. break;
  1458. case TCG_COND_GT:
  1459. tcg_out_ternary_vector_gadget_with_scalar(s, cmgt, is_scalar, vece, w0, w1, w2);
  1460. break;
  1461. case TCG_COND_LE:
  1462. tcg_out_ternary_vector_gadget_with_scalar(s, cmgt, is_scalar, vece, w0, w2, w1);
  1463. break;
  1464. case TCG_COND_GE:
  1465. tcg_out_ternary_vector_gadget_with_scalar(s, cmge, is_scalar, vece, w0, w1, w2);
  1466. break;
  1467. case TCG_COND_LT:
  1468. tcg_out_ternary_vector_gadget_with_scalar(s, cmge, is_scalar, vece, w0, w2, w1);
  1469. break;
  1470. case TCG_COND_GTU:
  1471. tcg_out_ternary_vector_gadget_with_scalar(s, cmhi, is_scalar, vece, w0, w1, w2);
  1472. break;
  1473. case TCG_COND_LEU:
  1474. tcg_out_ternary_vector_gadget_with_scalar(s, cmhi, is_scalar, vece, w0, w2, w1);
  1475. break;
  1476. case TCG_COND_GEU:
  1477. tcg_out_ternary_vector_gadget_with_scalar(s, cmhs, is_scalar, vece, w0, w1, w2);
  1478. break;
  1479. case TCG_COND_LTU:
  1480. tcg_out_ternary_vector_gadget_with_scalar(s, cmhs, is_scalar, vece, w0, w2, w1);
  1481. break;
  1482. default:
  1483. g_assert_not_reached();
  1484. }
  1485. break;
  1486. case INDEX_op_bitsel_vec: // optional
  1487. {
  1488. if (r0 == r3) {
  1489. tcg_out_ternary_dq_gadget(s, bit, is_v128, w0, w2, w1);
  1490. } else if (r0 == r2) {
  1491. tcg_out_ternary_dq_gadget(s, bif, is_v128, w0, w3, w1);
  1492. } else {
  1493. if (r0 != r1) {
  1494. tcg_out_mov(s, type, r0, r1);
  1495. }
  1496. tcg_out_ternary_dq_gadget(s, bsl, is_v128, w0, w2, w3);
  1497. }
  1498. break;
  1499. }
  1500. /* inhibit compiler warning because we use imm as a register */
  1501. case INDEX_op_shli_vec:
  1502. tcg_out_ternary_immediate_vector_gadget_with_scalar(s, shl, is_scalar, vece, w0, w1, r2);
  1503. break;
  1504. case INDEX_op_shri_vec:
  1505. tcg_out_ternary_immediate_vector_gadget_with_scalar(s, ushr, is_scalar, vece, w0, w1, r2 - 1);
  1506. break;
  1507. case INDEX_op_sari_vec:
  1508. tcg_out_ternary_immediate_vector_gadget_with_scalar(s, sshr, is_scalar, vece, w0, w1, r2 - 1);
  1509. break;
  1510. case INDEX_op_aa64_sli_vec:
  1511. tcg_out_ternary_immediate_vector_gadget_with_scalar(s, sli, is_scalar, vece, w0, w2, r3);
  1512. break;
  1513. case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
  1514. case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
  1515. default:
  1516. g_assert_not_reached();
  1517. }
  1518. }
  1519. int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
  1520. {
  1521. switch (opc) {
  1522. case INDEX_op_add_vec:
  1523. case INDEX_op_sub_vec:
  1524. case INDEX_op_and_vec:
  1525. case INDEX_op_or_vec:
  1526. case INDEX_op_xor_vec:
  1527. case INDEX_op_andc_vec:
  1528. case INDEX_op_orc_vec:
  1529. case INDEX_op_neg_vec:
  1530. case INDEX_op_abs_vec:
  1531. case INDEX_op_not_vec:
  1532. case INDEX_op_cmp_vec:
  1533. case INDEX_op_shli_vec:
  1534. case INDEX_op_shri_vec:
  1535. case INDEX_op_sari_vec:
  1536. case INDEX_op_ssadd_vec:
  1537. case INDEX_op_sssub_vec:
  1538. case INDEX_op_usadd_vec:
  1539. case INDEX_op_ussub_vec:
  1540. case INDEX_op_shlv_vec:
  1541. case INDEX_op_bitsel_vec:
  1542. return 1;
  1543. case INDEX_op_rotli_vec:
  1544. case INDEX_op_shrv_vec:
  1545. case INDEX_op_sarv_vec:
  1546. case INDEX_op_rotlv_vec:
  1547. case INDEX_op_rotrv_vec:
  1548. return -1;
  1549. case INDEX_op_mul_vec:
  1550. case INDEX_op_smax_vec:
  1551. case INDEX_op_smin_vec:
  1552. case INDEX_op_umax_vec:
  1553. case INDEX_op_umin_vec:
  1554. return vece < MO_64;
  1555. default:
  1556. return 0;
  1557. }
  1558. }
  1559. void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
  1560. TCGArg a0, ...)
  1561. {
  1562. va_list va;
  1563. TCGv_vec v0, v1, v2, t1, t2, c1;
  1564. TCGArg a2;
  1565. va_start(va, a0);
  1566. v0 = temp_tcgv_vec(arg_temp(a0));
  1567. v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
  1568. a2 = va_arg(va, TCGArg);
  1569. va_end(va);
  1570. switch (opc) {
  1571. case INDEX_op_rotli_vec:
  1572. t1 = tcg_temp_new_vec(type);
  1573. tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
  1574. vec_gen_4(INDEX_op_aa64_sli_vec, type, vece,
  1575. tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
  1576. tcg_temp_free_vec(t1);
  1577. break;
  1578. case INDEX_op_shrv_vec:
  1579. case INDEX_op_sarv_vec:
  1580. /* Right shifts are negative left shifts for AArch64. */
  1581. v2 = temp_tcgv_vec(arg_temp(a2));
  1582. t1 = tcg_temp_new_vec(type);
  1583. tcg_gen_neg_vec(vece, t1, v2);
  1584. opc = (opc == INDEX_op_shrv_vec
  1585. ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
  1586. vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
  1587. tcgv_vec_arg(v1), tcgv_vec_arg(t1));
  1588. tcg_temp_free_vec(t1);
  1589. break;
  1590. case INDEX_op_rotlv_vec:
  1591. v2 = temp_tcgv_vec(arg_temp(a2));
  1592. t1 = tcg_temp_new_vec(type);
  1593. c1 = tcg_constant_vec(type, vece, 8 << vece);
  1594. tcg_gen_sub_vec(vece, t1, v2, c1);
  1595. /* Right shifts are negative left shifts for AArch64. */
  1596. vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
  1597. tcgv_vec_arg(v1), tcgv_vec_arg(t1));
  1598. vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
  1599. tcgv_vec_arg(v1), tcgv_vec_arg(v2));
  1600. tcg_gen_or_vec(vece, v0, v0, t1);
  1601. tcg_temp_free_vec(t1);
  1602. break;
  1603. case INDEX_op_rotrv_vec:
  1604. v2 = temp_tcgv_vec(arg_temp(a2));
  1605. t1 = tcg_temp_new_vec(type);
  1606. t2 = tcg_temp_new_vec(type);
  1607. c1 = tcg_constant_vec(type, vece, 8 << vece);
  1608. tcg_gen_neg_vec(vece, t1, v2);
  1609. tcg_gen_sub_vec(vece, t2, c1, v2);
  1610. /* Right shifts are negative left shifts for AArch64. */
  1611. vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
  1612. tcgv_vec_arg(v1), tcgv_vec_arg(t1));
  1613. vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
  1614. tcgv_vec_arg(v1), tcgv_vec_arg(t2));
  1615. tcg_gen_or_vec(vece, v0, t1, t2);
  1616. tcg_temp_free_vec(t1);
  1617. tcg_temp_free_vec(t2);
  1618. break;
  1619. default:
  1620. g_assert_not_reached();
  1621. }
  1622. }
  1623. /* Generate DUPI (move immediate) vector ops. */
  1624. static bool tcg_out_optimized_dupi_vec(TCGContext *s, TCGType type, unsigned vece, TCGReg rd, int64_t v64)
  1625. {
  1626. bool q = (type == TCG_TYPE_V128);
  1627. int cmode, imm8, i;
  1628. // If we're copying an 8b immediate, we implicitly have a simple gadget for this,
  1629. // since there are only 256 possible values * 16 registers. Emit a MOVI gadget implicitly.
  1630. if (vece == MO_8) {
  1631. imm8 = (uint8_t)v64;
  1632. tcg_out_dupi_gadget(s, movi, q, rd, 0, e, imm8);
  1633. return true;
  1634. }
  1635. // Otherwise, if we have a value that's all 0x00 and 0xFF bytes,
  1636. // we can use the scalar variant of MOVI (op=1, cmode=e), which handles
  1637. // that case directly.
  1638. for (i = imm8 = 0; i < 8; i++) {
  1639. uint8_t byte = v64 >> (i * 8);
  1640. if (byte == 0xff) {
  1641. imm8 |= 1 << i;
  1642. } else if (byte != 0) {
  1643. goto fail_bytes;
  1644. }
  1645. }
  1646. tcg_out_dupi_gadget(s, movi, q, rd, 1, e, imm8);
  1647. return true;
  1648. fail_bytes:
  1649. // Handle 16B moves.
  1650. if (vece == MO_16) {
  1651. uint16_t v16 = v64;
  1652. // Check to see if we have a value representable in as a MOV imm8, possibly via a shift.
  1653. if (is_shimm16(v16, &cmode, &imm8)) {
  1654. // Output the corret instruction CMode for either a regular MOVI (8) or a LSL8 MOVI (a).
  1655. if (cmode == 0x8) {
  1656. tcg_out_dupi_gadget(s, movi, q, rd, 0, 8, imm8);
  1657. } else {
  1658. tcg_out_dupi_gadget(s, movi, q, rd, 0, a, imm8);
  1659. }
  1660. return true;
  1661. }
  1662. // Check to see if we have a value representable in as an inverted MOV imm8, possibly via a shift.
  1663. if (is_shimm16(~v16, &cmode, &imm8)) {
  1664. // Output the corret instruction CMode for either a regular MOVI (8) or a LSL8 MOVI (a).
  1665. if (cmode == 0x8) {
  1666. tcg_out_dupi_gadget(s, mvni, q, rd, 0, 8, imm8);
  1667. } else {
  1668. tcg_out_dupi_gadget(s, mvni, q, rd, 0, a, imm8);
  1669. }
  1670. return true;
  1671. }
  1672. // If we can't perform either of the optimizations, we'll need to do this in two steps.
  1673. // Normally, we'd emit a gadget for both steps, but in this case that'd result in needing -way-
  1674. // too many gadgets. We'll emit two, instead.
  1675. tcg_out_dupi_gadget(s, movi, q, rd, 0, 8, v16 & 0xff);
  1676. tcg_out_dupi_gadget(s, orr, q, rd, 0, a, v16 >> 8);
  1677. return true;
  1678. }
  1679. // FIXME: implement 32B move optimizations
  1680. // Try to create optimized 32B moves.
  1681. //else if (vece == MO_32) {
  1682. // uint32_t v32 = v64;
  1683. // uint32_t n32 = ~v32;
  1684. // if (is_shimm32(v32, &cmode, &imm8) ||
  1685. // is_soimm32(v32, &cmode, &imm8) ||
  1686. // is_fimm32(v32, &cmode, &imm8)) {
  1687. // tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
  1688. // return;
  1689. // }
  1690. // if (is_shimm32(n32, &cmode, &imm8) ||
  1691. // is_soimm32(n32, &cmode, &imm8)) {
  1692. // tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
  1693. // return;
  1694. // }
  1695. // //
  1696. // // Restrict the set of constants to those we can load with
  1697. // // two instructions. Others we load from the pool.
  1698. // //
  1699. // i = is_shimm32_pair(v32, &cmode, &imm8);
  1700. // if (i) {
  1701. // tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
  1702. // tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
  1703. // return;
  1704. // }
  1705. // i = is_shimm32_pair(n32, &cmode, &imm8);
  1706. // if (i) {
  1707. // tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
  1708. // tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
  1709. // return;
  1710. // }
  1711. //}
  1712. return false;
  1713. }
  1714. /* Emits instructions that can load an immediate into a vector. */
  1715. static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, TCGReg rd, int64_t v64)
  1716. {
  1717. // Convert Rd into a simple gadget number.
  1718. rd = rd - (TCG_REG_V16);
  1719. // First, try to create an optimized implementation, if possible.
  1720. if (tcg_out_optimized_dupi_vec(s, type, vece, rd, v64)) {
  1721. return;
  1722. }
  1723. // If we didn't, we'll need to load the full vector from memory.
  1724. // Emit it into our bytecode stream as an immediate; which we'll then
  1725. // load inside the gadget.
  1726. if (type == TCG_TYPE_V128) {
  1727. tcg_out_unary_gadget(s, gadget_ldi_q, rd);
  1728. tcg_out64(s, v64);
  1729. tcg_out64(s, v64);
  1730. } else {
  1731. tcg_out_unary_gadget(s, gadget_ldi_d, rd);
  1732. tcg_out64(s, v64);
  1733. }
  1734. }
  1735. /* Emits instructions that can load a register into a vector. */
  1736. static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, TCGReg rd, TCGReg rs)
  1737. {
  1738. // Compute the gadget index for the relevant vector register.
  1739. TCGReg wd = rd - (TCG_REG_V16);
  1740. // Emit a DUP gadget to handles the operation.
  1741. tcg_out_binary_vector_gadget(s, dup, vece, wd, rs);
  1742. return true;
  1743. }
  1744. static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, TCGReg r, TCGReg base, intptr_t offset)
  1745. {
  1746. int64_t extended_offset = (int32_t)offset;
  1747. // Convert the register into a simple register number for our gadgets.
  1748. r = r - TCG_REG_V16;
  1749. // Emit a DUPM gadget...
  1750. tcg_out_binary_vector_gadget(s, dupm, vece, r, base);
  1751. // ... and emit its int64 immediate offset.
  1752. tcg_out64(s, extended_offset);
  1753. return true;
  1754. }
  1755. /********************************
  1756. * TCG Runtime & Platform Def *
  1757. *******************************/
  1758. static void tcg_target_init(TCGContext *s)
  1759. {
  1760. /* The current code uses uint8_t for tcg operations. */
  1761. tcg_debug_assert(tcg_op_defs_max <= UINT8_MAX);
  1762. // Registers available for each type of operation.
  1763. tcg_target_available_regs[TCG_TYPE_I32] = TCG_MASK_GP_REGISTERS;
  1764. tcg_target_available_regs[TCG_TYPE_I64] = TCG_MASK_GP_REGISTERS;
  1765. tcg_target_available_regs[TCG_TYPE_V64] = TCG_MASK_VECTOR_REGISTERS;
  1766. tcg_target_available_regs[TCG_TYPE_V128] = TCG_MASK_VECTOR_REGISTERS;
  1767. TCGReg unclobbered_registers[] = {
  1768. // We don't use registers R16+ in our runtime, so we'll not bother protecting them.
  1769. TCG_REG_R16, TCG_REG_R17, TCG_REG_R18, TCG_REG_R19,
  1770. TCG_REG_R20, TCG_REG_R21, TCG_REG_R22, TCG_REG_R23,
  1771. TCG_REG_R24, TCG_REG_R25, TCG_REG_R26, TCG_REG_R27,
  1772. TCG_REG_R28, TCG_REG_R29, TCG_REG_R30, TCG_REG_R31,
  1773. // Per our calling convention.
  1774. TCG_REG_V8, TCG_REG_V9, TCG_REG_V10, TCG_REG_V11,
  1775. TCG_REG_V12, TCG_REG_V13, TCG_REG_V14, TCG_REG_V15,
  1776. };
  1777. // Specify which registers are clobbered during call.
  1778. tcg_target_call_clobber_regs = -1ull;
  1779. for (unsigned i = 0; i < ARRAY_SIZE(unclobbered_registers); ++i) {
  1780. tcg_regset_reset_reg(tcg_target_call_clobber_regs, unclobbered_registers[i]);
  1781. }
  1782. // Specify which local registers we're reserving.
  1783. //
  1784. // Note that we only have to specify registers that are used in the runtime,
  1785. // and so not e.g. the register that contains AREG0, which can never be allocated.
  1786. s->reserved_regs = 0;
  1787. tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
  1788. /* We use negative offsets from "sp" so that we can distinguish
  1789. stores that might pretend to be call arguments. */
  1790. tcg_set_frame(s, TCG_REG_CALL_STACK, -CPU_TEMP_BUF_NLONGS * sizeof(long), CPU_TEMP_BUF_NLONGS * sizeof(long));
  1791. }
  1792. /* Generate global QEMU prologue and epilogue code. */
  1793. static inline void tcg_target_qemu_prologue(TCGContext *s)
  1794. {
  1795. // No prologue; as we're interpreted.
  1796. }
  1797. static void tcg_out_tb_start(TCGContext *s)
  1798. {
  1799. /* nothing to do */
  1800. }
  1801. bool tcg_target_has_memory_bswap(MemOp memop)
  1802. {
  1803. return true;
  1804. }
  1805. /**
  1806. * TCTI 'interpreter' bootstrap.
  1807. */
  1808. // Store the current return address during helper calls.
  1809. __thread uintptr_t tcti_call_return_address;
  1810. /* Dispatch the bytecode stream contained in our translation buffer. */
  1811. uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, const void *v_tb_ptr)
  1812. {
  1813. // Create our per-CPU temporary storage.
  1814. long tcg_temps[CPU_TEMP_BUF_NLONGS];
  1815. uint64_t return_value = 0;
  1816. uintptr_t sp_value = (uintptr_t)(tcg_temps + CPU_TEMP_BUF_NLONGS);
  1817. uintptr_t pc_mirror = (uintptr_t)&tcti_call_return_address;
  1818. // Ensure our target configuration hasn't changed.
  1819. tcti_assert(TCG_AREG0 == TCG_REG_R14);
  1820. tcti_assert(TCG_REG_CALL_STACK == TCG_REG_R15);
  1821. asm(
  1822. // Our threaded-dispatch prologue needs to set up things for our machine to run.
  1823. // This means:
  1824. // - Set up TCG_AREG0 (R14) to point to our architectural state.
  1825. // - Set up TCG_REG_CALL_STACK (R15) to point to our temporary buffer.
  1826. // - Point x28 (our bytecode "instruction pointer") to the relevant stream address.
  1827. "ldr x14, %[areg0]\n"
  1828. "ldr x15, %[sp_value]\n"
  1829. "ldr x25, %[pc_mirror]\n"
  1830. "ldr x28, %[start_tb_ptr]\n"
  1831. // To start our code, we'll -call- the gadget at the first bytecode pointer.
  1832. // Note that we call/branch-with-link, here; so our TB_EXIT gadget can RET in order
  1833. // to return to this point when things are complete.
  1834. "ldr x27, [x28], #8\n"
  1835. "blr x27\n"
  1836. // Finally, we'll copy out our final return value.
  1837. "str x0, %[return_value]\n"
  1838. : [return_value] "=m" (return_value)
  1839. : [areg0] "m" (env),
  1840. [sp_value] "m" (sp_value),
  1841. [start_tb_ptr] "m" (v_tb_ptr),
  1842. [pc_mirror] "m" (pc_mirror)
  1843. // We touch _every_ one of the lower registers, as we use these to execute directly.
  1844. : "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
  1845. "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
  1846. // We also use x26/x27 for temporary values, and x28 as our bytecode poitner.
  1847. "x25", "x26", "x27", "x28", "cc", "memory"
  1848. );
  1849. return return_value;
  1850. }
  1851. /**
  1852. * Disassembly output support.
  1853. */
  1854. #include <dlfcn.h>
  1855. /* Disassemble TCI bytecode. */
  1856. int print_insn_tcti(bfd_vma addr, disassemble_info *info)
  1857. {
  1858. #ifdef TCTI_GADGET_RICH_DISASSEMBLY
  1859. Dl_info symbol_info = {};
  1860. char symbol_name[48] ;
  1861. #endif
  1862. int status;
  1863. uint64_t block;
  1864. // Read the relevant pointer.
  1865. status = info->read_memory_func(addr, (void *)&block, sizeof(block), info);
  1866. if (status != 0) {
  1867. info->memory_error_func(status, addr, info);
  1868. return -1;
  1869. }
  1870. #ifdef TCTI_GADGET_RICH_DISASSEMBLY
  1871. // Most of our disassembly stream will be gadgets. Try to get their names, for nice output.
  1872. dladdr((void *)block, &symbol_info);
  1873. if(symbol_info.dli_sname != 0) {
  1874. strncpy(symbol_name, symbol_info.dli_sname, sizeof(symbol_name));
  1875. symbol_name[sizeof(symbol_name) - 1] = 0;
  1876. info->fprintf_func(info->stream, "%s", symbol_name);
  1877. } else {
  1878. info->fprintf_func(info->stream, "%016lx", block);
  1879. }
  1880. #else
  1881. info->fprintf_func(info->stream, "%016lx", block);
  1882. #endif
  1883. return sizeof(block);
  1884. }
  1885. static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
  1886. {
  1887. g_assert_not_reached();
  1888. }
  1889. static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
  1890. {
  1891. g_assert_not_reached();
  1892. }