optimize.c 57 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238
  1. /*
  2. * Optimizations for Tiny Code Generator for QEMU
  3. *
  4. * Copyright (c) 2010 Samsung Electronics.
  5. * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
  6. *
  7. * Permission is hereby granted, free of charge, to any person obtaining a copy
  8. * of this software and associated documentation files (the "Software"), to deal
  9. * in the Software without restriction, including without limitation the rights
  10. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11. * copies of the Software, and to permit persons to whom the Software is
  12. * furnished to do so, subject to the following conditions:
  13. *
  14. * The above copyright notice and this permission notice shall be included in
  15. * all copies or substantial portions of the Software.
  16. *
  17. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23. * THE SOFTWARE.
  24. */
  25. #include "qemu/osdep.h"
  26. #include "qemu/int128.h"
  27. #include "tcg/tcg-op.h"
  28. #include "tcg-internal.h"
  29. #define CASE_OP_32_64(x) \
  30. glue(glue(case INDEX_op_, x), _i32): \
  31. glue(glue(case INDEX_op_, x), _i64)
  32. #define CASE_OP_32_64_VEC(x) \
  33. glue(glue(case INDEX_op_, x), _i32): \
  34. glue(glue(case INDEX_op_, x), _i64): \
  35. glue(glue(case INDEX_op_, x), _vec)
  36. typedef struct TempOptInfo {
  37. bool is_const;
  38. TCGTemp *prev_copy;
  39. TCGTemp *next_copy;
  40. uint64_t val;
  41. uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */
  42. uint64_t s_mask; /* a left-aligned mask of clrsb(value) bits. */
  43. } TempOptInfo;
  44. typedef struct OptContext {
  45. TCGContext *tcg;
  46. TCGOp *prev_mb;
  47. TCGTempSet temps_used;
  48. /* In flight values from optimization. */
  49. uint64_t a_mask; /* mask bit is 0 iff value identical to first input */
  50. uint64_t z_mask; /* mask bit is 0 iff value bit is 0 */
  51. uint64_t s_mask; /* mask of clrsb(value) bits */
  52. TCGType type;
  53. } OptContext;
  54. /* Calculate the smask for a specific value. */
  55. static uint64_t smask_from_value(uint64_t value)
  56. {
  57. int rep = clrsb64(value);
  58. return ~(~0ull >> rep);
  59. }
  60. /*
  61. * Calculate the smask for a given set of known-zeros.
  62. * If there are lots of zeros on the left, we can consider the remainder
  63. * an unsigned field, and thus the corresponding signed field is one bit
  64. * larger.
  65. */
  66. static uint64_t smask_from_zmask(uint64_t zmask)
  67. {
  68. /*
  69. * Only the 0 bits are significant for zmask, thus the msb itself
  70. * must be zero, else we have no sign information.
  71. */
  72. int rep = clz64(zmask);
  73. if (rep == 0) {
  74. return 0;
  75. }
  76. rep -= 1;
  77. return ~(~0ull >> rep);
  78. }
  79. /*
  80. * Recreate a properly left-aligned smask after manipulation.
  81. * Some bit-shuffling, particularly shifts and rotates, may
  82. * retain sign bits on the left, but may scatter disconnected
  83. * sign bits on the right. Retain only what remains to the left.
  84. */
  85. static uint64_t smask_from_smask(int64_t smask)
  86. {
  87. /* Only the 1 bits are significant for smask */
  88. return smask_from_zmask(~smask);
  89. }
  90. static inline TempOptInfo *ts_info(TCGTemp *ts)
  91. {
  92. return ts->state_ptr;
  93. }
  94. static inline TempOptInfo *arg_info(TCGArg arg)
  95. {
  96. return ts_info(arg_temp(arg));
  97. }
  98. static inline bool ts_is_const(TCGTemp *ts)
  99. {
  100. return ts_info(ts)->is_const;
  101. }
  102. static inline bool arg_is_const(TCGArg arg)
  103. {
  104. return ts_is_const(arg_temp(arg));
  105. }
  106. static inline bool ts_is_copy(TCGTemp *ts)
  107. {
  108. return ts_info(ts)->next_copy != ts;
  109. }
  110. /* Reset TEMP's state, possibly removing the temp for the list of copies. */
  111. static void reset_ts(TCGTemp *ts)
  112. {
  113. TempOptInfo *ti = ts_info(ts);
  114. TempOptInfo *pi = ts_info(ti->prev_copy);
  115. TempOptInfo *ni = ts_info(ti->next_copy);
  116. ni->prev_copy = ti->prev_copy;
  117. pi->next_copy = ti->next_copy;
  118. ti->next_copy = ts;
  119. ti->prev_copy = ts;
  120. ti->is_const = false;
  121. ti->z_mask = -1;
  122. ti->s_mask = 0;
  123. }
  124. static void reset_temp(TCGArg arg)
  125. {
  126. reset_ts(arg_temp(arg));
  127. }
  128. /* Initialize and activate a temporary. */
  129. static void init_ts_info(OptContext *ctx, TCGTemp *ts)
  130. {
  131. size_t idx = temp_idx(ts);
  132. TempOptInfo *ti;
  133. if (test_bit(idx, ctx->temps_used.l)) {
  134. return;
  135. }
  136. set_bit(idx, ctx->temps_used.l);
  137. ti = ts->state_ptr;
  138. if (ti == NULL) {
  139. ti = tcg_malloc(sizeof(TempOptInfo));
  140. ts->state_ptr = ti;
  141. }
  142. ti->next_copy = ts;
  143. ti->prev_copy = ts;
  144. if (ts->kind == TEMP_CONST) {
  145. ti->is_const = true;
  146. ti->val = ts->val;
  147. ti->z_mask = ts->val;
  148. ti->s_mask = smask_from_value(ts->val);
  149. } else {
  150. ti->is_const = false;
  151. ti->z_mask = -1;
  152. ti->s_mask = 0;
  153. }
  154. }
  155. static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
  156. {
  157. TCGTemp *i, *g, *l;
  158. /* If this is already readonly, we can't do better. */
  159. if (temp_readonly(ts)) {
  160. return ts;
  161. }
  162. g = l = NULL;
  163. for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
  164. if (temp_readonly(i)) {
  165. return i;
  166. } else if (i->kind > ts->kind) {
  167. if (i->kind == TEMP_GLOBAL) {
  168. g = i;
  169. } else if (i->kind == TEMP_TB) {
  170. l = i;
  171. }
  172. }
  173. }
  174. /* If we didn't find a better representation, return the same temp. */
  175. return g ? g : l ? l : ts;
  176. }
  177. static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2)
  178. {
  179. TCGTemp *i;
  180. if (ts1 == ts2) {
  181. return true;
  182. }
  183. if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) {
  184. return false;
  185. }
  186. for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) {
  187. if (i == ts2) {
  188. return true;
  189. }
  190. }
  191. return false;
  192. }
  193. static bool args_are_copies(TCGArg arg1, TCGArg arg2)
  194. {
  195. return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
  196. }
  197. static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
  198. {
  199. TCGTemp *dst_ts = arg_temp(dst);
  200. TCGTemp *src_ts = arg_temp(src);
  201. TempOptInfo *di;
  202. TempOptInfo *si;
  203. TCGOpcode new_op;
  204. if (ts_are_copies(dst_ts, src_ts)) {
  205. tcg_op_remove(ctx->tcg, op);
  206. return true;
  207. }
  208. reset_ts(dst_ts);
  209. di = ts_info(dst_ts);
  210. si = ts_info(src_ts);
  211. switch (ctx->type) {
  212. case TCG_TYPE_I32:
  213. new_op = INDEX_op_mov_i32;
  214. break;
  215. case TCG_TYPE_I64:
  216. new_op = INDEX_op_mov_i64;
  217. break;
  218. case TCG_TYPE_V64:
  219. case TCG_TYPE_V128:
  220. case TCG_TYPE_V256:
  221. /* TCGOP_VECL and TCGOP_VECE remain unchanged. */
  222. new_op = INDEX_op_mov_vec;
  223. break;
  224. default:
  225. g_assert_not_reached();
  226. }
  227. op->opc = new_op;
  228. op->args[0] = dst;
  229. op->args[1] = src;
  230. di->z_mask = si->z_mask;
  231. di->s_mask = si->s_mask;
  232. if (src_ts->type == dst_ts->type) {
  233. TempOptInfo *ni = ts_info(si->next_copy);
  234. di->next_copy = si->next_copy;
  235. di->prev_copy = src_ts;
  236. ni->prev_copy = dst_ts;
  237. si->next_copy = dst_ts;
  238. di->is_const = si->is_const;
  239. di->val = si->val;
  240. }
  241. return true;
  242. }
  243. static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
  244. TCGArg dst, uint64_t val)
  245. {
  246. TCGTemp *tv;
  247. if (ctx->type == TCG_TYPE_I32) {
  248. val = (int32_t)val;
  249. }
  250. /* Convert movi to mov with constant temp. */
  251. tv = tcg_constant_internal(ctx->type, val);
  252. init_ts_info(ctx, tv);
  253. return tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
  254. }
  255. static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
  256. {
  257. uint64_t l64, h64;
  258. switch (op) {
  259. CASE_OP_32_64(add):
  260. return x + y;
  261. CASE_OP_32_64(sub):
  262. return x - y;
  263. CASE_OP_32_64(mul):
  264. return x * y;
  265. CASE_OP_32_64_VEC(and):
  266. return x & y;
  267. CASE_OP_32_64_VEC(or):
  268. return x | y;
  269. CASE_OP_32_64_VEC(xor):
  270. return x ^ y;
  271. case INDEX_op_shl_i32:
  272. return (uint32_t)x << (y & 31);
  273. case INDEX_op_shl_i64:
  274. return (uint64_t)x << (y & 63);
  275. case INDEX_op_shr_i32:
  276. return (uint32_t)x >> (y & 31);
  277. case INDEX_op_shr_i64:
  278. return (uint64_t)x >> (y & 63);
  279. case INDEX_op_sar_i32:
  280. return (int32_t)x >> (y & 31);
  281. case INDEX_op_sar_i64:
  282. return (int64_t)x >> (y & 63);
  283. case INDEX_op_rotr_i32:
  284. return ror32(x, y & 31);
  285. case INDEX_op_rotr_i64:
  286. return ror64(x, y & 63);
  287. case INDEX_op_rotl_i32:
  288. return rol32(x, y & 31);
  289. case INDEX_op_rotl_i64:
  290. return rol64(x, y & 63);
  291. CASE_OP_32_64_VEC(not):
  292. return ~x;
  293. CASE_OP_32_64(neg):
  294. return -x;
  295. CASE_OP_32_64_VEC(andc):
  296. return x & ~y;
  297. CASE_OP_32_64_VEC(orc):
  298. return x | ~y;
  299. CASE_OP_32_64_VEC(eqv):
  300. return ~(x ^ y);
  301. CASE_OP_32_64_VEC(nand):
  302. return ~(x & y);
  303. CASE_OP_32_64_VEC(nor):
  304. return ~(x | y);
  305. case INDEX_op_clz_i32:
  306. return (uint32_t)x ? clz32(x) : y;
  307. case INDEX_op_clz_i64:
  308. return x ? clz64(x) : y;
  309. case INDEX_op_ctz_i32:
  310. return (uint32_t)x ? ctz32(x) : y;
  311. case INDEX_op_ctz_i64:
  312. return x ? ctz64(x) : y;
  313. case INDEX_op_ctpop_i32:
  314. return ctpop32(x);
  315. case INDEX_op_ctpop_i64:
  316. return ctpop64(x);
  317. CASE_OP_32_64(ext8s):
  318. return (int8_t)x;
  319. CASE_OP_32_64(ext16s):
  320. return (int16_t)x;
  321. CASE_OP_32_64(ext8u):
  322. return (uint8_t)x;
  323. CASE_OP_32_64(ext16u):
  324. return (uint16_t)x;
  325. CASE_OP_32_64(bswap16):
  326. x = bswap16(x);
  327. return y & TCG_BSWAP_OS ? (int16_t)x : x;
  328. CASE_OP_32_64(bswap32):
  329. x = bswap32(x);
  330. return y & TCG_BSWAP_OS ? (int32_t)x : x;
  331. case INDEX_op_bswap64_i64:
  332. return bswap64(x);
  333. case INDEX_op_ext_i32_i64:
  334. case INDEX_op_ext32s_i64:
  335. return (int32_t)x;
  336. case INDEX_op_extu_i32_i64:
  337. case INDEX_op_extrl_i64_i32:
  338. case INDEX_op_ext32u_i64:
  339. return (uint32_t)x;
  340. case INDEX_op_extrh_i64_i32:
  341. return (uint64_t)x >> 32;
  342. case INDEX_op_muluh_i32:
  343. return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
  344. case INDEX_op_mulsh_i32:
  345. return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
  346. case INDEX_op_muluh_i64:
  347. mulu64(&l64, &h64, x, y);
  348. return h64;
  349. case INDEX_op_mulsh_i64:
  350. muls64(&l64, &h64, x, y);
  351. return h64;
  352. case INDEX_op_div_i32:
  353. /* Avoid crashing on divide by zero, otherwise undefined. */
  354. return (int32_t)x / ((int32_t)y ? : 1);
  355. case INDEX_op_divu_i32:
  356. return (uint32_t)x / ((uint32_t)y ? : 1);
  357. case INDEX_op_div_i64:
  358. return (int64_t)x / ((int64_t)y ? : 1);
  359. case INDEX_op_divu_i64:
  360. return (uint64_t)x / ((uint64_t)y ? : 1);
  361. case INDEX_op_rem_i32:
  362. return (int32_t)x % ((int32_t)y ? : 1);
  363. case INDEX_op_remu_i32:
  364. return (uint32_t)x % ((uint32_t)y ? : 1);
  365. case INDEX_op_rem_i64:
  366. return (int64_t)x % ((int64_t)y ? : 1);
  367. case INDEX_op_remu_i64:
  368. return (uint64_t)x % ((uint64_t)y ? : 1);
  369. default:
  370. fprintf(stderr,
  371. "Unrecognized operation %d in do_constant_folding.\n", op);
  372. tcg_abort();
  373. }
  374. }
  375. static uint64_t do_constant_folding(TCGOpcode op, TCGType type,
  376. uint64_t x, uint64_t y)
  377. {
  378. uint64_t res = do_constant_folding_2(op, x, y);
  379. if (type == TCG_TYPE_I32) {
  380. res = (int32_t)res;
  381. }
  382. return res;
  383. }
  384. static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
  385. {
  386. switch (c) {
  387. case TCG_COND_EQ:
  388. return x == y;
  389. case TCG_COND_NE:
  390. return x != y;
  391. case TCG_COND_LT:
  392. return (int32_t)x < (int32_t)y;
  393. case TCG_COND_GE:
  394. return (int32_t)x >= (int32_t)y;
  395. case TCG_COND_LE:
  396. return (int32_t)x <= (int32_t)y;
  397. case TCG_COND_GT:
  398. return (int32_t)x > (int32_t)y;
  399. case TCG_COND_LTU:
  400. return x < y;
  401. case TCG_COND_GEU:
  402. return x >= y;
  403. case TCG_COND_LEU:
  404. return x <= y;
  405. case TCG_COND_GTU:
  406. return x > y;
  407. default:
  408. tcg_abort();
  409. }
  410. }
  411. static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
  412. {
  413. switch (c) {
  414. case TCG_COND_EQ:
  415. return x == y;
  416. case TCG_COND_NE:
  417. return x != y;
  418. case TCG_COND_LT:
  419. return (int64_t)x < (int64_t)y;
  420. case TCG_COND_GE:
  421. return (int64_t)x >= (int64_t)y;
  422. case TCG_COND_LE:
  423. return (int64_t)x <= (int64_t)y;
  424. case TCG_COND_GT:
  425. return (int64_t)x > (int64_t)y;
  426. case TCG_COND_LTU:
  427. return x < y;
  428. case TCG_COND_GEU:
  429. return x >= y;
  430. case TCG_COND_LEU:
  431. return x <= y;
  432. case TCG_COND_GTU:
  433. return x > y;
  434. default:
  435. tcg_abort();
  436. }
  437. }
  438. static bool do_constant_folding_cond_eq(TCGCond c)
  439. {
  440. switch (c) {
  441. case TCG_COND_GT:
  442. case TCG_COND_LTU:
  443. case TCG_COND_LT:
  444. case TCG_COND_GTU:
  445. case TCG_COND_NE:
  446. return 0;
  447. case TCG_COND_GE:
  448. case TCG_COND_GEU:
  449. case TCG_COND_LE:
  450. case TCG_COND_LEU:
  451. case TCG_COND_EQ:
  452. return 1;
  453. default:
  454. tcg_abort();
  455. }
  456. }
  457. /*
  458. * Return -1 if the condition can't be simplified,
  459. * and the result of the condition (0 or 1) if it can.
  460. */
  461. static int do_constant_folding_cond(TCGType type, TCGArg x,
  462. TCGArg y, TCGCond c)
  463. {
  464. if (arg_is_const(x) && arg_is_const(y)) {
  465. uint64_t xv = arg_info(x)->val;
  466. uint64_t yv = arg_info(y)->val;
  467. switch (type) {
  468. case TCG_TYPE_I32:
  469. return do_constant_folding_cond_32(xv, yv, c);
  470. case TCG_TYPE_I64:
  471. return do_constant_folding_cond_64(xv, yv, c);
  472. default:
  473. /* Only scalar comparisons are optimizable */
  474. return -1;
  475. }
  476. } else if (args_are_copies(x, y)) {
  477. return do_constant_folding_cond_eq(c);
  478. } else if (arg_is_const(y) && arg_info(y)->val == 0) {
  479. switch (c) {
  480. case TCG_COND_LTU:
  481. return 0;
  482. case TCG_COND_GEU:
  483. return 1;
  484. default:
  485. return -1;
  486. }
  487. }
  488. return -1;
  489. }
  490. /*
  491. * Return -1 if the condition can't be simplified,
  492. * and the result of the condition (0 or 1) if it can.
  493. */
  494. static int do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
  495. {
  496. TCGArg al = p1[0], ah = p1[1];
  497. TCGArg bl = p2[0], bh = p2[1];
  498. if (arg_is_const(bl) && arg_is_const(bh)) {
  499. tcg_target_ulong blv = arg_info(bl)->val;
  500. tcg_target_ulong bhv = arg_info(bh)->val;
  501. uint64_t b = deposit64(blv, 32, 32, bhv);
  502. if (arg_is_const(al) && arg_is_const(ah)) {
  503. tcg_target_ulong alv = arg_info(al)->val;
  504. tcg_target_ulong ahv = arg_info(ah)->val;
  505. uint64_t a = deposit64(alv, 32, 32, ahv);
  506. return do_constant_folding_cond_64(a, b, c);
  507. }
  508. if (b == 0) {
  509. switch (c) {
  510. case TCG_COND_LTU:
  511. return 0;
  512. case TCG_COND_GEU:
  513. return 1;
  514. default:
  515. break;
  516. }
  517. }
  518. }
  519. if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
  520. return do_constant_folding_cond_eq(c);
  521. }
  522. return -1;
  523. }
  524. /**
  525. * swap_commutative:
  526. * @dest: TCGArg of the destination argument, or NO_DEST.
  527. * @p1: first paired argument
  528. * @p2: second paired argument
  529. *
  530. * If *@p1 is a constant and *@p2 is not, swap.
  531. * If *@p2 matches @dest, swap.
  532. * Return true if a swap was performed.
  533. */
  534. #define NO_DEST temp_arg(NULL)
  535. static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
  536. {
  537. TCGArg a1 = *p1, a2 = *p2;
  538. int sum = 0;
  539. sum += arg_is_const(a1);
  540. sum -= arg_is_const(a2);
  541. /* Prefer the constant in second argument, and then the form
  542. op a, a, b, which is better handled on non-RISC hosts. */
  543. if (sum > 0 || (sum == 0 && dest == a2)) {
  544. *p1 = a2;
  545. *p2 = a1;
  546. return true;
  547. }
  548. return false;
  549. }
  550. static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
  551. {
  552. int sum = 0;
  553. sum += arg_is_const(p1[0]);
  554. sum += arg_is_const(p1[1]);
  555. sum -= arg_is_const(p2[0]);
  556. sum -= arg_is_const(p2[1]);
  557. if (sum > 0) {
  558. TCGArg t;
  559. t = p1[0], p1[0] = p2[0], p2[0] = t;
  560. t = p1[1], p1[1] = p2[1], p2[1] = t;
  561. return true;
  562. }
  563. return false;
  564. }
  565. static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args)
  566. {
  567. for (int i = 0; i < nb_args; i++) {
  568. TCGTemp *ts = arg_temp(op->args[i]);
  569. init_ts_info(ctx, ts);
  570. }
  571. }
  572. static void copy_propagate(OptContext *ctx, TCGOp *op,
  573. int nb_oargs, int nb_iargs)
  574. {
  575. TCGContext *s = ctx->tcg;
  576. for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
  577. TCGTemp *ts = arg_temp(op->args[i]);
  578. if (ts_is_copy(ts)) {
  579. op->args[i] = temp_arg(find_better_copy(s, ts));
  580. }
  581. }
  582. }
  583. static void finish_folding(OptContext *ctx, TCGOp *op)
  584. {
  585. const TCGOpDef *def = &tcg_op_defs[op->opc];
  586. int i, nb_oargs;
  587. /*
  588. * For an opcode that ends a BB, reset all temp data.
  589. * We do no cross-BB optimization.
  590. */
  591. if (def->flags & TCG_OPF_BB_END) {
  592. memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
  593. ctx->prev_mb = NULL;
  594. return;
  595. }
  596. nb_oargs = def->nb_oargs;
  597. for (i = 0; i < nb_oargs; i++) {
  598. TCGTemp *ts = arg_temp(op->args[i]);
  599. reset_ts(ts);
  600. /*
  601. * Save the corresponding known-zero/sign bits mask for the
  602. * first output argument (only one supported so far).
  603. */
  604. if (i == 0) {
  605. ts_info(ts)->z_mask = ctx->z_mask;
  606. ts_info(ts)->s_mask = ctx->s_mask;
  607. }
  608. }
  609. }
  610. /*
  611. * The fold_* functions return true when processing is complete,
  612. * usually by folding the operation to a constant or to a copy,
  613. * and calling tcg_opt_gen_{mov,movi}. They may do other things,
  614. * like collect information about the value produced, for use in
  615. * optimizing a subsequent operation.
  616. *
  617. * These first fold_* functions are all helpers, used by other
  618. * folders for more specific operations.
  619. */
  620. static bool fold_const1(OptContext *ctx, TCGOp *op)
  621. {
  622. if (arg_is_const(op->args[1])) {
  623. uint64_t t;
  624. t = arg_info(op->args[1])->val;
  625. t = do_constant_folding(op->opc, ctx->type, t, 0);
  626. return tcg_opt_gen_movi(ctx, op, op->args[0], t);
  627. }
  628. return false;
  629. }
  630. static bool fold_const2(OptContext *ctx, TCGOp *op)
  631. {
  632. if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
  633. uint64_t t1 = arg_info(op->args[1])->val;
  634. uint64_t t2 = arg_info(op->args[2])->val;
  635. t1 = do_constant_folding(op->opc, ctx->type, t1, t2);
  636. return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
  637. }
  638. return false;
  639. }
  640. static bool fold_commutative(OptContext *ctx, TCGOp *op)
  641. {
  642. swap_commutative(op->args[0], &op->args[1], &op->args[2]);
  643. return false;
  644. }
  645. static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
  646. {
  647. swap_commutative(op->args[0], &op->args[1], &op->args[2]);
  648. return fold_const2(ctx, op);
  649. }
  650. static bool fold_masks(OptContext *ctx, TCGOp *op)
  651. {
  652. uint64_t a_mask = ctx->a_mask;
  653. uint64_t z_mask = ctx->z_mask;
  654. uint64_t s_mask = ctx->s_mask;
  655. /*
  656. * 32-bit ops generate 32-bit results, which for the purpose of
  657. * simplifying tcg are sign-extended. Certainly that's how we
  658. * represent our constants elsewhere. Note that the bits will
  659. * be reset properly for a 64-bit value when encountering the
  660. * type changing opcodes.
  661. */
  662. if (ctx->type == TCG_TYPE_I32) {
  663. a_mask = (int32_t)a_mask;
  664. z_mask = (int32_t)z_mask;
  665. s_mask |= MAKE_64BIT_MASK(32, 32);
  666. ctx->z_mask = z_mask;
  667. ctx->s_mask = s_mask;
  668. }
  669. if (z_mask == 0) {
  670. return tcg_opt_gen_movi(ctx, op, op->args[0], 0);
  671. }
  672. if (a_mask == 0) {
  673. return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
  674. }
  675. return false;
  676. }
  677. /*
  678. * Convert @op to NOT, if NOT is supported by the host.
  679. * Return true f the conversion is successful, which will still
  680. * indicate that the processing is complete.
  681. */
  682. static bool fold_not(OptContext *ctx, TCGOp *op);
  683. static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx)
  684. {
  685. TCGOpcode not_op;
  686. bool have_not;
  687. switch (ctx->type) {
  688. case TCG_TYPE_I32:
  689. not_op = INDEX_op_not_i32;
  690. have_not = TCG_TARGET_HAS_not_i32;
  691. break;
  692. case TCG_TYPE_I64:
  693. not_op = INDEX_op_not_i64;
  694. have_not = TCG_TARGET_HAS_not_i64;
  695. break;
  696. case TCG_TYPE_V64:
  697. case TCG_TYPE_V128:
  698. case TCG_TYPE_V256:
  699. not_op = INDEX_op_not_vec;
  700. have_not = TCG_TARGET_HAS_not_vec;
  701. break;
  702. default:
  703. g_assert_not_reached();
  704. }
  705. if (have_not) {
  706. op->opc = not_op;
  707. op->args[1] = op->args[idx];
  708. return fold_not(ctx, op);
  709. }
  710. return false;
  711. }
  712. /* If the binary operation has first argument @i, fold to @i. */
  713. static bool fold_ix_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
  714. {
  715. if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) {
  716. return tcg_opt_gen_movi(ctx, op, op->args[0], i);
  717. }
  718. return false;
  719. }
  720. /* If the binary operation has first argument @i, fold to NOT. */
  721. static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
  722. {
  723. if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) {
  724. return fold_to_not(ctx, op, 2);
  725. }
  726. return false;
  727. }
  728. /* If the binary operation has second argument @i, fold to @i. */
  729. static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
  730. {
  731. if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
  732. return tcg_opt_gen_movi(ctx, op, op->args[0], i);
  733. }
  734. return false;
  735. }
  736. /* If the binary operation has second argument @i, fold to identity. */
  737. static bool fold_xi_to_x(OptContext *ctx, TCGOp *op, uint64_t i)
  738. {
  739. if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
  740. return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
  741. }
  742. return false;
  743. }
  744. /* If the binary operation has second argument @i, fold to NOT. */
  745. static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
  746. {
  747. if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
  748. return fold_to_not(ctx, op, 1);
  749. }
  750. return false;
  751. }
  752. /* If the binary operation has both arguments equal, fold to @i. */
  753. static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
  754. {
  755. if (args_are_copies(op->args[1], op->args[2])) {
  756. return tcg_opt_gen_movi(ctx, op, op->args[0], i);
  757. }
  758. return false;
  759. }
  760. /* If the binary operation has both arguments equal, fold to identity. */
  761. static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
  762. {
  763. if (args_are_copies(op->args[1], op->args[2])) {
  764. return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
  765. }
  766. return false;
  767. }
  768. /*
  769. * These outermost fold_<op> functions are sorted alphabetically.
  770. *
  771. * The ordering of the transformations should be:
  772. * 1) those that produce a constant
  773. * 2) those that produce a copy
  774. * 3) those that produce information about the result value.
  775. */
  776. static bool fold_add(OptContext *ctx, TCGOp *op)
  777. {
  778. if (fold_const2_commutative(ctx, op) ||
  779. fold_xi_to_x(ctx, op, 0)) {
  780. return true;
  781. }
  782. return false;
  783. }
  784. /* We cannot as yet do_constant_folding with vectors. */
  785. static bool fold_add_vec(OptContext *ctx, TCGOp *op)
  786. {
  787. if (fold_commutative(ctx, op) ||
  788. fold_xi_to_x(ctx, op, 0)) {
  789. return true;
  790. }
  791. return false;
  792. }
  793. static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
  794. {
  795. if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) &&
  796. arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
  797. uint64_t al = arg_info(op->args[2])->val;
  798. uint64_t ah = arg_info(op->args[3])->val;
  799. uint64_t bl = arg_info(op->args[4])->val;
  800. uint64_t bh = arg_info(op->args[5])->val;
  801. TCGArg rl, rh;
  802. TCGOp *op2;
  803. if (ctx->type == TCG_TYPE_I32) {
  804. uint64_t a = deposit64(al, 32, 32, ah);
  805. uint64_t b = deposit64(bl, 32, 32, bh);
  806. if (add) {
  807. a += b;
  808. } else {
  809. a -= b;
  810. }
  811. al = sextract64(a, 0, 32);
  812. ah = sextract64(a, 32, 32);
  813. } else {
  814. Int128 a = int128_make128(al, ah);
  815. Int128 b = int128_make128(bl, bh);
  816. if (add) {
  817. a = int128_add(a, b);
  818. } else {
  819. a = int128_sub(a, b);
  820. }
  821. al = int128_getlo(a);
  822. ah = int128_gethi(a);
  823. }
  824. rl = op->args[0];
  825. rh = op->args[1];
  826. /* The proper opcode is supplied by tcg_opt_gen_mov. */
  827. op2 = tcg_op_insert_before(ctx->tcg, op, 0, 2);
  828. tcg_opt_gen_movi(ctx, op, rl, al);
  829. tcg_opt_gen_movi(ctx, op2, rh, ah);
  830. return true;
  831. }
  832. return false;
  833. }
  834. static bool fold_add2(OptContext *ctx, TCGOp *op)
  835. {
  836. /* Note that the high and low parts may be independently swapped. */
  837. swap_commutative(op->args[0], &op->args[2], &op->args[4]);
  838. swap_commutative(op->args[1], &op->args[3], &op->args[5]);
  839. return fold_addsub2(ctx, op, true);
  840. }
  841. static bool fold_and(OptContext *ctx, TCGOp *op)
  842. {
  843. uint64_t z1, z2;
  844. if (fold_const2_commutative(ctx, op) ||
  845. fold_xi_to_i(ctx, op, 0) ||
  846. fold_xi_to_x(ctx, op, -1) ||
  847. fold_xx_to_x(ctx, op)) {
  848. return true;
  849. }
  850. z1 = arg_info(op->args[1])->z_mask;
  851. z2 = arg_info(op->args[2])->z_mask;
  852. ctx->z_mask = z1 & z2;
  853. /*
  854. * Sign repetitions are perforce all identical, whether they are 1 or 0.
  855. * Bitwise operations preserve the relative quantity of the repetitions.
  856. */
  857. ctx->s_mask = arg_info(op->args[1])->s_mask
  858. & arg_info(op->args[2])->s_mask;
  859. /*
  860. * Known-zeros does not imply known-ones. Therefore unless
  861. * arg2 is constant, we can't infer affected bits from it.
  862. */
  863. if (arg_is_const(op->args[2])) {
  864. ctx->a_mask = z1 & ~z2;
  865. }
  866. return fold_masks(ctx, op);
  867. }
  868. static bool fold_andc(OptContext *ctx, TCGOp *op)
  869. {
  870. uint64_t z1;
  871. if (fold_const2(ctx, op) ||
  872. fold_xx_to_i(ctx, op, 0) ||
  873. fold_xi_to_x(ctx, op, 0) ||
  874. fold_ix_to_not(ctx, op, -1)) {
  875. return true;
  876. }
  877. z1 = arg_info(op->args[1])->z_mask;
  878. /*
  879. * Known-zeros does not imply known-ones. Therefore unless
  880. * arg2 is constant, we can't infer anything from it.
  881. */
  882. if (arg_is_const(op->args[2])) {
  883. uint64_t z2 = ~arg_info(op->args[2])->z_mask;
  884. ctx->a_mask = z1 & ~z2;
  885. z1 &= z2;
  886. }
  887. ctx->z_mask = z1;
  888. ctx->s_mask = arg_info(op->args[1])->s_mask
  889. & arg_info(op->args[2])->s_mask;
  890. return fold_masks(ctx, op);
  891. }
  892. static bool fold_brcond(OptContext *ctx, TCGOp *op)
  893. {
  894. TCGCond cond = op->args[2];
  895. int i;
  896. if (swap_commutative(NO_DEST, &op->args[0], &op->args[1])) {
  897. op->args[2] = cond = tcg_swap_cond(cond);
  898. }
  899. i = do_constant_folding_cond(ctx->type, op->args[0], op->args[1], cond);
  900. if (i == 0) {
  901. tcg_op_remove(ctx->tcg, op);
  902. return true;
  903. }
  904. if (i > 0) {
  905. op->opc = INDEX_op_br;
  906. op->args[0] = op->args[3];
  907. }
  908. return false;
  909. }
  910. static bool fold_brcond2(OptContext *ctx, TCGOp *op)
  911. {
  912. TCGCond cond = op->args[4];
  913. TCGArg label = op->args[5];
  914. int i, inv = 0;
  915. if (swap_commutative2(&op->args[0], &op->args[2])) {
  916. op->args[4] = cond = tcg_swap_cond(cond);
  917. }
  918. i = do_constant_folding_cond2(&op->args[0], &op->args[2], cond);
  919. if (i >= 0) {
  920. goto do_brcond_const;
  921. }
  922. switch (cond) {
  923. case TCG_COND_LT:
  924. case TCG_COND_GE:
  925. /*
  926. * Simplify LT/GE comparisons vs zero to a single compare
  927. * vs the high word of the input.
  928. */
  929. if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == 0 &&
  930. arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0) {
  931. goto do_brcond_high;
  932. }
  933. break;
  934. case TCG_COND_NE:
  935. inv = 1;
  936. QEMU_FALLTHROUGH;
  937. case TCG_COND_EQ:
  938. /*
  939. * Simplify EQ/NE comparisons where one of the pairs
  940. * can be simplified.
  941. */
  942. i = do_constant_folding_cond(TCG_TYPE_I32, op->args[0],
  943. op->args[2], cond);
  944. switch (i ^ inv) {
  945. case 0:
  946. goto do_brcond_const;
  947. case 1:
  948. goto do_brcond_high;
  949. }
  950. i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
  951. op->args[3], cond);
  952. switch (i ^ inv) {
  953. case 0:
  954. goto do_brcond_const;
  955. case 1:
  956. op->opc = INDEX_op_brcond_i32;
  957. op->args[1] = op->args[2];
  958. op->args[2] = cond;
  959. op->args[3] = label;
  960. break;
  961. }
  962. break;
  963. default:
  964. break;
  965. do_brcond_high:
  966. op->opc = INDEX_op_brcond_i32;
  967. op->args[0] = op->args[1];
  968. op->args[1] = op->args[3];
  969. op->args[2] = cond;
  970. op->args[3] = label;
  971. break;
  972. do_brcond_const:
  973. if (i == 0) {
  974. tcg_op_remove(ctx->tcg, op);
  975. return true;
  976. }
  977. op->opc = INDEX_op_br;
  978. op->args[0] = label;
  979. break;
  980. }
  981. return false;
  982. }
  983. static bool fold_bswap(OptContext *ctx, TCGOp *op)
  984. {
  985. uint64_t z_mask, s_mask, sign;
  986. if (arg_is_const(op->args[1])) {
  987. uint64_t t = arg_info(op->args[1])->val;
  988. t = do_constant_folding(op->opc, ctx->type, t, op->args[2]);
  989. return tcg_opt_gen_movi(ctx, op, op->args[0], t);
  990. }
  991. z_mask = arg_info(op->args[1])->z_mask;
  992. switch (op->opc) {
  993. case INDEX_op_bswap16_i32:
  994. case INDEX_op_bswap16_i64:
  995. z_mask = bswap16(z_mask);
  996. sign = INT16_MIN;
  997. break;
  998. case INDEX_op_bswap32_i32:
  999. case INDEX_op_bswap32_i64:
  1000. z_mask = bswap32(z_mask);
  1001. sign = INT32_MIN;
  1002. break;
  1003. case INDEX_op_bswap64_i64:
  1004. z_mask = bswap64(z_mask);
  1005. sign = INT64_MIN;
  1006. break;
  1007. default:
  1008. g_assert_not_reached();
  1009. }
  1010. s_mask = smask_from_zmask(z_mask);
  1011. switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
  1012. case TCG_BSWAP_OZ:
  1013. break;
  1014. case TCG_BSWAP_OS:
  1015. /* If the sign bit may be 1, force all the bits above to 1. */
  1016. if (z_mask & sign) {
  1017. z_mask |= sign;
  1018. s_mask = sign << 1;
  1019. }
  1020. break;
  1021. default:
  1022. /* The high bits are undefined: force all bits above the sign to 1. */
  1023. z_mask |= sign << 1;
  1024. s_mask = 0;
  1025. break;
  1026. }
  1027. ctx->z_mask = z_mask;
  1028. ctx->s_mask = s_mask;
  1029. return fold_masks(ctx, op);
  1030. }
  1031. static bool fold_call(OptContext *ctx, TCGOp *op)
  1032. {
  1033. TCGContext *s = ctx->tcg;
  1034. int nb_oargs = TCGOP_CALLO(op);
  1035. int nb_iargs = TCGOP_CALLI(op);
  1036. int flags, i;
  1037. init_arguments(ctx, op, nb_oargs + nb_iargs);
  1038. copy_propagate(ctx, op, nb_oargs, nb_iargs);
  1039. /* If the function reads or writes globals, reset temp data. */
  1040. flags = tcg_call_flags(op);
  1041. if (!(flags & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
  1042. int nb_globals = s->nb_globals;
  1043. for (i = 0; i < nb_globals; i++) {
  1044. if (test_bit(i, ctx->temps_used.l)) {
  1045. reset_ts(&ctx->tcg->temps[i]);
  1046. }
  1047. }
  1048. }
  1049. /* Reset temp data for outputs. */
  1050. for (i = 0; i < nb_oargs; i++) {
  1051. reset_temp(op->args[i]);
  1052. }
  1053. /* Stop optimizing MB across calls. */
  1054. ctx->prev_mb = NULL;
  1055. return true;
  1056. }
  1057. static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
  1058. {
  1059. uint64_t z_mask;
  1060. if (arg_is_const(op->args[1])) {
  1061. uint64_t t = arg_info(op->args[1])->val;
  1062. if (t != 0) {
  1063. t = do_constant_folding(op->opc, ctx->type, t, 0);
  1064. return tcg_opt_gen_movi(ctx, op, op->args[0], t);
  1065. }
  1066. return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
  1067. }
  1068. switch (ctx->type) {
  1069. case TCG_TYPE_I32:
  1070. z_mask = 31;
  1071. break;
  1072. case TCG_TYPE_I64:
  1073. z_mask = 63;
  1074. break;
  1075. default:
  1076. g_assert_not_reached();
  1077. }
  1078. ctx->z_mask = arg_info(op->args[2])->z_mask | z_mask;
  1079. ctx->s_mask = smask_from_zmask(ctx->z_mask);
  1080. return false;
  1081. }
  1082. static bool fold_ctpop(OptContext *ctx, TCGOp *op)
  1083. {
  1084. if (fold_const1(ctx, op)) {
  1085. return true;
  1086. }
  1087. switch (ctx->type) {
  1088. case TCG_TYPE_I32:
  1089. ctx->z_mask = 32 | 31;
  1090. break;
  1091. case TCG_TYPE_I64:
  1092. ctx->z_mask = 64 | 63;
  1093. break;
  1094. default:
  1095. g_assert_not_reached();
  1096. }
  1097. ctx->s_mask = smask_from_zmask(ctx->z_mask);
  1098. return false;
  1099. }
  1100. static bool fold_deposit(OptContext *ctx, TCGOp *op)
  1101. {
  1102. if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
  1103. uint64_t t1 = arg_info(op->args[1])->val;
  1104. uint64_t t2 = arg_info(op->args[2])->val;
  1105. t1 = deposit64(t1, op->args[3], op->args[4], t2);
  1106. return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
  1107. }
  1108. ctx->z_mask = deposit64(arg_info(op->args[1])->z_mask,
  1109. op->args[3], op->args[4],
  1110. arg_info(op->args[2])->z_mask);
  1111. return false;
  1112. }
  1113. static bool fold_divide(OptContext *ctx, TCGOp *op)
  1114. {
  1115. if (fold_const2(ctx, op) ||
  1116. fold_xi_to_x(ctx, op, 1)) {
  1117. return true;
  1118. }
  1119. return false;
  1120. }
  1121. static bool fold_dup(OptContext *ctx, TCGOp *op)
  1122. {
  1123. if (arg_is_const(op->args[1])) {
  1124. uint64_t t = arg_info(op->args[1])->val;
  1125. t = dup_const(TCGOP_VECE(op), t);
  1126. return tcg_opt_gen_movi(ctx, op, op->args[0], t);
  1127. }
  1128. return false;
  1129. }
  1130. static bool fold_dup2(OptContext *ctx, TCGOp *op)
  1131. {
  1132. if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
  1133. uint64_t t = deposit64(arg_info(op->args[1])->val, 32, 32,
  1134. arg_info(op->args[2])->val);
  1135. return tcg_opt_gen_movi(ctx, op, op->args[0], t);
  1136. }
  1137. if (args_are_copies(op->args[1], op->args[2])) {
  1138. op->opc = INDEX_op_dup_vec;
  1139. TCGOP_VECE(op) = MO_32;
  1140. }
  1141. return false;
  1142. }
  1143. static bool fold_eqv(OptContext *ctx, TCGOp *op)
  1144. {
  1145. if (fold_const2_commutative(ctx, op) ||
  1146. fold_xi_to_x(ctx, op, -1) ||
  1147. fold_xi_to_not(ctx, op, 0)) {
  1148. return true;
  1149. }
  1150. ctx->s_mask = arg_info(op->args[1])->s_mask
  1151. & arg_info(op->args[2])->s_mask;
  1152. return false;
  1153. }
  1154. static bool fold_extract(OptContext *ctx, TCGOp *op)
  1155. {
  1156. uint64_t z_mask_old, z_mask;
  1157. int pos = op->args[2];
  1158. int len = op->args[3];
  1159. if (arg_is_const(op->args[1])) {
  1160. uint64_t t;
  1161. t = arg_info(op->args[1])->val;
  1162. t = extract64(t, pos, len);
  1163. return tcg_opt_gen_movi(ctx, op, op->args[0], t);
  1164. }
  1165. z_mask_old = arg_info(op->args[1])->z_mask;
  1166. z_mask = extract64(z_mask_old, pos, len);
  1167. if (pos == 0) {
  1168. ctx->a_mask = z_mask_old ^ z_mask;
  1169. }
  1170. ctx->z_mask = z_mask;
  1171. ctx->s_mask = smask_from_zmask(z_mask);
  1172. return fold_masks(ctx, op);
  1173. }
  1174. static bool fold_extract2(OptContext *ctx, TCGOp *op)
  1175. {
  1176. if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
  1177. uint64_t v1 = arg_info(op->args[1])->val;
  1178. uint64_t v2 = arg_info(op->args[2])->val;
  1179. int shr = op->args[3];
  1180. if (op->opc == INDEX_op_extract2_i64) {
  1181. v1 >>= shr;
  1182. v2 <<= 64 - shr;
  1183. } else {
  1184. v1 = (uint32_t)v1 >> shr;
  1185. v2 = (uint64_t)((int32_t)v2 << (32 - shr));
  1186. }
  1187. return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2);
  1188. }
  1189. return false;
  1190. }
  1191. static bool fold_exts(OptContext *ctx, TCGOp *op)
  1192. {
  1193. uint64_t s_mask_old, s_mask, z_mask, sign;
  1194. bool type_change = false;
  1195. if (fold_const1(ctx, op)) {
  1196. return true;
  1197. }
  1198. z_mask = arg_info(op->args[1])->z_mask;
  1199. s_mask = arg_info(op->args[1])->s_mask;
  1200. s_mask_old = s_mask;
  1201. switch (op->opc) {
  1202. CASE_OP_32_64(ext8s):
  1203. sign = INT8_MIN;
  1204. z_mask = (uint8_t)z_mask;
  1205. break;
  1206. CASE_OP_32_64(ext16s):
  1207. sign = INT16_MIN;
  1208. z_mask = (uint16_t)z_mask;
  1209. break;
  1210. case INDEX_op_ext_i32_i64:
  1211. type_change = true;
  1212. QEMU_FALLTHROUGH;
  1213. case INDEX_op_ext32s_i64:
  1214. sign = INT32_MIN;
  1215. z_mask = (uint32_t)z_mask;
  1216. break;
  1217. default:
  1218. g_assert_not_reached();
  1219. }
  1220. if (z_mask & sign) {
  1221. z_mask |= sign;
  1222. }
  1223. s_mask |= sign << 1;
  1224. ctx->z_mask = z_mask;
  1225. ctx->s_mask = s_mask;
  1226. if (!type_change) {
  1227. ctx->a_mask = s_mask & ~s_mask_old;
  1228. }
  1229. return fold_masks(ctx, op);
  1230. }
  1231. static bool fold_extu(OptContext *ctx, TCGOp *op)
  1232. {
  1233. uint64_t z_mask_old, z_mask;
  1234. bool type_change = false;
  1235. if (fold_const1(ctx, op)) {
  1236. return true;
  1237. }
  1238. z_mask_old = z_mask = arg_info(op->args[1])->z_mask;
  1239. switch (op->opc) {
  1240. CASE_OP_32_64(ext8u):
  1241. z_mask = (uint8_t)z_mask;
  1242. break;
  1243. CASE_OP_32_64(ext16u):
  1244. z_mask = (uint16_t)z_mask;
  1245. break;
  1246. case INDEX_op_extrl_i64_i32:
  1247. case INDEX_op_extu_i32_i64:
  1248. type_change = true;
  1249. QEMU_FALLTHROUGH;
  1250. case INDEX_op_ext32u_i64:
  1251. z_mask = (uint32_t)z_mask;
  1252. break;
  1253. case INDEX_op_extrh_i64_i32:
  1254. type_change = true;
  1255. z_mask >>= 32;
  1256. break;
  1257. default:
  1258. g_assert_not_reached();
  1259. }
  1260. ctx->z_mask = z_mask;
  1261. ctx->s_mask = smask_from_zmask(z_mask);
  1262. if (!type_change) {
  1263. ctx->a_mask = z_mask_old ^ z_mask;
  1264. }
  1265. return fold_masks(ctx, op);
  1266. }
  1267. static bool fold_mb(OptContext *ctx, TCGOp *op)
  1268. {
  1269. /* Eliminate duplicate and redundant fence instructions. */
  1270. if (ctx->prev_mb) {
  1271. /*
  1272. * Merge two barriers of the same type into one,
  1273. * or a weaker barrier into a stronger one,
  1274. * or two weaker barriers into a stronger one.
  1275. * mb X; mb Y => mb X|Y
  1276. * mb; strl => mb; st
  1277. * ldaq; mb => ld; mb
  1278. * ldaq; strl => ld; mb; st
  1279. * Other combinations are also merged into a strong
  1280. * barrier. This is stricter than specified but for
  1281. * the purposes of TCG is better than not optimizing.
  1282. */
  1283. ctx->prev_mb->args[0] |= op->args[0];
  1284. tcg_op_remove(ctx->tcg, op);
  1285. } else {
  1286. ctx->prev_mb = op;
  1287. }
  1288. return true;
  1289. }
  1290. static bool fold_mov(OptContext *ctx, TCGOp *op)
  1291. {
  1292. return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
  1293. }
  1294. static bool fold_movcond(OptContext *ctx, TCGOp *op)
  1295. {
  1296. TCGCond cond = op->args[5];
  1297. int i;
  1298. if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
  1299. op->args[5] = cond = tcg_swap_cond(cond);
  1300. }
  1301. /*
  1302. * Canonicalize the "false" input reg to match the destination reg so
  1303. * that the tcg backend can implement a "move if true" operation.
  1304. */
  1305. if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
  1306. op->args[5] = cond = tcg_invert_cond(cond);
  1307. }
  1308. i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
  1309. if (i >= 0) {
  1310. return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
  1311. }
  1312. ctx->z_mask = arg_info(op->args[3])->z_mask
  1313. | arg_info(op->args[4])->z_mask;
  1314. ctx->s_mask = arg_info(op->args[3])->s_mask
  1315. & arg_info(op->args[4])->s_mask;
  1316. if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
  1317. uint64_t tv = arg_info(op->args[3])->val;
  1318. uint64_t fv = arg_info(op->args[4])->val;
  1319. TCGOpcode opc;
  1320. switch (ctx->type) {
  1321. case TCG_TYPE_I32:
  1322. opc = INDEX_op_setcond_i32;
  1323. break;
  1324. case TCG_TYPE_I64:
  1325. opc = INDEX_op_setcond_i64;
  1326. break;
  1327. default:
  1328. g_assert_not_reached();
  1329. }
  1330. if (tv == 1 && fv == 0) {
  1331. op->opc = opc;
  1332. op->args[3] = cond;
  1333. } else if (fv == 1 && tv == 0) {
  1334. op->opc = opc;
  1335. op->args[3] = tcg_invert_cond(cond);
  1336. }
  1337. }
  1338. return false;
  1339. }
  1340. static bool fold_mul(OptContext *ctx, TCGOp *op)
  1341. {
  1342. if (fold_const2(ctx, op) ||
  1343. fold_xi_to_i(ctx, op, 0) ||
  1344. fold_xi_to_x(ctx, op, 1)) {
  1345. return true;
  1346. }
  1347. return false;
  1348. }
  1349. static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
  1350. {
  1351. if (fold_const2_commutative(ctx, op) ||
  1352. fold_xi_to_i(ctx, op, 0)) {
  1353. return true;
  1354. }
  1355. return false;
  1356. }
  1357. static bool fold_multiply2(OptContext *ctx, TCGOp *op)
  1358. {
  1359. swap_commutative(op->args[0], &op->args[2], &op->args[3]);
  1360. if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
  1361. uint64_t a = arg_info(op->args[2])->val;
  1362. uint64_t b = arg_info(op->args[3])->val;
  1363. uint64_t h, l;
  1364. TCGArg rl, rh;
  1365. TCGOp *op2;
  1366. switch (op->opc) {
  1367. case INDEX_op_mulu2_i32:
  1368. l = (uint64_t)(uint32_t)a * (uint32_t)b;
  1369. h = (int32_t)(l >> 32);
  1370. l = (int32_t)l;
  1371. break;
  1372. case INDEX_op_muls2_i32:
  1373. l = (int64_t)(int32_t)a * (int32_t)b;
  1374. h = l >> 32;
  1375. l = (int32_t)l;
  1376. break;
  1377. case INDEX_op_mulu2_i64:
  1378. mulu64(&l, &h, a, b);
  1379. break;
  1380. case INDEX_op_muls2_i64:
  1381. muls64(&l, &h, a, b);
  1382. break;
  1383. default:
  1384. g_assert_not_reached();
  1385. }
  1386. rl = op->args[0];
  1387. rh = op->args[1];
  1388. /* The proper opcode is supplied by tcg_opt_gen_mov. */
  1389. op2 = tcg_op_insert_before(ctx->tcg, op, 0, 2);
  1390. tcg_opt_gen_movi(ctx, op, rl, l);
  1391. tcg_opt_gen_movi(ctx, op2, rh, h);
  1392. return true;
  1393. }
  1394. return false;
  1395. }
  1396. static bool fold_nand(OptContext *ctx, TCGOp *op)
  1397. {
  1398. if (fold_const2_commutative(ctx, op) ||
  1399. fold_xi_to_not(ctx, op, -1)) {
  1400. return true;
  1401. }
  1402. ctx->s_mask = arg_info(op->args[1])->s_mask
  1403. & arg_info(op->args[2])->s_mask;
  1404. return false;
  1405. }
  1406. static bool fold_neg(OptContext *ctx, TCGOp *op)
  1407. {
  1408. uint64_t z_mask;
  1409. if (fold_const1(ctx, op)) {
  1410. return true;
  1411. }
  1412. /* Set to 1 all bits to the left of the rightmost. */
  1413. z_mask = arg_info(op->args[1])->z_mask;
  1414. ctx->z_mask = -(z_mask & -z_mask);
  1415. /*
  1416. * Because of fold_sub_to_neg, we want to always return true,
  1417. * via finish_folding.
  1418. */
  1419. finish_folding(ctx, op);
  1420. return true;
  1421. }
  1422. static bool fold_nor(OptContext *ctx, TCGOp *op)
  1423. {
  1424. if (fold_const2_commutative(ctx, op) ||
  1425. fold_xi_to_not(ctx, op, 0)) {
  1426. return true;
  1427. }
  1428. ctx->s_mask = arg_info(op->args[1])->s_mask
  1429. & arg_info(op->args[2])->s_mask;
  1430. return false;
  1431. }
  1432. static bool fold_not(OptContext *ctx, TCGOp *op)
  1433. {
  1434. if (fold_const1(ctx, op)) {
  1435. return true;
  1436. }
  1437. ctx->s_mask = arg_info(op->args[1])->s_mask;
  1438. /* Because of fold_to_not, we want to always return true, via finish. */
  1439. finish_folding(ctx, op);
  1440. return true;
  1441. }
  1442. static bool fold_or(OptContext *ctx, TCGOp *op)
  1443. {
  1444. if (fold_const2_commutative(ctx, op) ||
  1445. fold_xi_to_x(ctx, op, 0) ||
  1446. fold_xx_to_x(ctx, op)) {
  1447. return true;
  1448. }
  1449. ctx->z_mask = arg_info(op->args[1])->z_mask
  1450. | arg_info(op->args[2])->z_mask;
  1451. ctx->s_mask = arg_info(op->args[1])->s_mask
  1452. & arg_info(op->args[2])->s_mask;
  1453. return fold_masks(ctx, op);
  1454. }
  1455. static bool fold_orc(OptContext *ctx, TCGOp *op)
  1456. {
  1457. if (fold_const2(ctx, op) ||
  1458. fold_xx_to_i(ctx, op, -1) ||
  1459. fold_xi_to_x(ctx, op, -1) ||
  1460. fold_ix_to_not(ctx, op, 0)) {
  1461. return true;
  1462. }
  1463. ctx->s_mask = arg_info(op->args[1])->s_mask
  1464. & arg_info(op->args[2])->s_mask;
  1465. return false;
  1466. }
  1467. static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
  1468. {
  1469. const TCGOpDef *def = &tcg_op_defs[op->opc];
  1470. MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
  1471. MemOp mop = get_memop(oi);
  1472. int width = 8 * memop_size(mop);
  1473. if (width < 64) {
  1474. ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width);
  1475. if (!(mop & MO_SIGN)) {
  1476. ctx->z_mask = MAKE_64BIT_MASK(0, width);
  1477. ctx->s_mask <<= 1;
  1478. }
  1479. }
  1480. /* Opcodes that touch guest memory stop the mb optimization. */
  1481. ctx->prev_mb = NULL;
  1482. return false;
  1483. }
  1484. static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
  1485. {
  1486. /* Opcodes that touch guest memory stop the mb optimization. */
  1487. ctx->prev_mb = NULL;
  1488. return false;
  1489. }
  1490. static bool fold_remainder(OptContext *ctx, TCGOp *op)
  1491. {
  1492. if (fold_const2(ctx, op) ||
  1493. fold_xx_to_i(ctx, op, 0)) {
  1494. return true;
  1495. }
  1496. return false;
  1497. }
  1498. static bool fold_setcond(OptContext *ctx, TCGOp *op)
  1499. {
  1500. TCGCond cond = op->args[3];
  1501. int i;
  1502. if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) {
  1503. op->args[3] = cond = tcg_swap_cond(cond);
  1504. }
  1505. i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
  1506. if (i >= 0) {
  1507. return tcg_opt_gen_movi(ctx, op, op->args[0], i);
  1508. }
  1509. ctx->z_mask = 1;
  1510. ctx->s_mask = smask_from_zmask(1);
  1511. return false;
  1512. }
  1513. static bool fold_setcond2(OptContext *ctx, TCGOp *op)
  1514. {
  1515. TCGCond cond = op->args[5];
  1516. int i, inv = 0;
  1517. if (swap_commutative2(&op->args[1], &op->args[3])) {
  1518. op->args[5] = cond = tcg_swap_cond(cond);
  1519. }
  1520. i = do_constant_folding_cond2(&op->args[1], &op->args[3], cond);
  1521. if (i >= 0) {
  1522. goto do_setcond_const;
  1523. }
  1524. switch (cond) {
  1525. case TCG_COND_LT:
  1526. case TCG_COND_GE:
  1527. /*
  1528. * Simplify LT/GE comparisons vs zero to a single compare
  1529. * vs the high word of the input.
  1530. */
  1531. if (arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0 &&
  1532. arg_is_const(op->args[4]) && arg_info(op->args[4])->val == 0) {
  1533. goto do_setcond_high;
  1534. }
  1535. break;
  1536. case TCG_COND_NE:
  1537. inv = 1;
  1538. QEMU_FALLTHROUGH;
  1539. case TCG_COND_EQ:
  1540. /*
  1541. * Simplify EQ/NE comparisons where one of the pairs
  1542. * can be simplified.
  1543. */
  1544. i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
  1545. op->args[3], cond);
  1546. switch (i ^ inv) {
  1547. case 0:
  1548. goto do_setcond_const;
  1549. case 1:
  1550. goto do_setcond_high;
  1551. }
  1552. i = do_constant_folding_cond(TCG_TYPE_I32, op->args[2],
  1553. op->args[4], cond);
  1554. switch (i ^ inv) {
  1555. case 0:
  1556. goto do_setcond_const;
  1557. case 1:
  1558. op->args[2] = op->args[3];
  1559. op->args[3] = cond;
  1560. op->opc = INDEX_op_setcond_i32;
  1561. break;
  1562. }
  1563. break;
  1564. default:
  1565. break;
  1566. do_setcond_high:
  1567. op->args[1] = op->args[2];
  1568. op->args[2] = op->args[4];
  1569. op->args[3] = cond;
  1570. op->opc = INDEX_op_setcond_i32;
  1571. break;
  1572. }
  1573. ctx->z_mask = 1;
  1574. ctx->s_mask = smask_from_zmask(1);
  1575. return false;
  1576. do_setcond_const:
  1577. return tcg_opt_gen_movi(ctx, op, op->args[0], i);
  1578. }
  1579. static bool fold_sextract(OptContext *ctx, TCGOp *op)
  1580. {
  1581. uint64_t z_mask, s_mask, s_mask_old;
  1582. int pos = op->args[2];
  1583. int len = op->args[3];
  1584. if (arg_is_const(op->args[1])) {
  1585. uint64_t t;
  1586. t = arg_info(op->args[1])->val;
  1587. t = sextract64(t, pos, len);
  1588. return tcg_opt_gen_movi(ctx, op, op->args[0], t);
  1589. }
  1590. z_mask = arg_info(op->args[1])->z_mask;
  1591. z_mask = sextract64(z_mask, pos, len);
  1592. ctx->z_mask = z_mask;
  1593. s_mask_old = arg_info(op->args[1])->s_mask;
  1594. s_mask = sextract64(s_mask_old, pos, len);
  1595. s_mask |= MAKE_64BIT_MASK(len, 64 - len);
  1596. ctx->s_mask = s_mask;
  1597. if (pos == 0) {
  1598. ctx->a_mask = s_mask & ~s_mask_old;
  1599. }
  1600. return fold_masks(ctx, op);
  1601. }
  1602. static bool fold_shift(OptContext *ctx, TCGOp *op)
  1603. {
  1604. uint64_t s_mask, z_mask, sign;
  1605. if (fold_const2(ctx, op) ||
  1606. fold_ix_to_i(ctx, op, 0) ||
  1607. fold_xi_to_x(ctx, op, 0)) {
  1608. return true;
  1609. }
  1610. s_mask = arg_info(op->args[1])->s_mask;
  1611. z_mask = arg_info(op->args[1])->z_mask;
  1612. if (arg_is_const(op->args[2])) {
  1613. int sh = arg_info(op->args[2])->val;
  1614. ctx->z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
  1615. s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
  1616. ctx->s_mask = smask_from_smask(s_mask);
  1617. return fold_masks(ctx, op);
  1618. }
  1619. switch (op->opc) {
  1620. CASE_OP_32_64(sar):
  1621. /*
  1622. * Arithmetic right shift will not reduce the number of
  1623. * input sign repetitions.
  1624. */
  1625. ctx->s_mask = s_mask;
  1626. break;
  1627. CASE_OP_32_64(shr):
  1628. /*
  1629. * If the sign bit is known zero, then logical right shift
  1630. * will not reduced the number of input sign repetitions.
  1631. */
  1632. sign = (s_mask & -s_mask) >> 1;
  1633. if (!(z_mask & sign)) {
  1634. ctx->s_mask = s_mask;
  1635. }
  1636. break;
  1637. default:
  1638. break;
  1639. }
  1640. return false;
  1641. }
  1642. static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
  1643. {
  1644. TCGOpcode neg_op;
  1645. bool have_neg;
  1646. if (!arg_is_const(op->args[1]) || arg_info(op->args[1])->val != 0) {
  1647. return false;
  1648. }
  1649. switch (ctx->type) {
  1650. case TCG_TYPE_I32:
  1651. neg_op = INDEX_op_neg_i32;
  1652. have_neg = TCG_TARGET_HAS_neg_i32;
  1653. break;
  1654. case TCG_TYPE_I64:
  1655. neg_op = INDEX_op_neg_i64;
  1656. have_neg = TCG_TARGET_HAS_neg_i64;
  1657. break;
  1658. case TCG_TYPE_V64:
  1659. case TCG_TYPE_V128:
  1660. case TCG_TYPE_V256:
  1661. neg_op = INDEX_op_neg_vec;
  1662. have_neg = (TCG_TARGET_HAS_neg_vec &&
  1663. tcg_can_emit_vec_op(neg_op, ctx->type, TCGOP_VECE(op)) > 0);
  1664. break;
  1665. default:
  1666. g_assert_not_reached();
  1667. }
  1668. if (have_neg) {
  1669. op->opc = neg_op;
  1670. op->args[1] = op->args[2];
  1671. return fold_neg(ctx, op);
  1672. }
  1673. return false;
  1674. }
  1675. /* We cannot as yet do_constant_folding with vectors. */
  1676. static bool fold_sub_vec(OptContext *ctx, TCGOp *op)
  1677. {
  1678. if (fold_xx_to_i(ctx, op, 0) ||
  1679. fold_xi_to_x(ctx, op, 0) ||
  1680. fold_sub_to_neg(ctx, op)) {
  1681. return true;
  1682. }
  1683. return false;
  1684. }
  1685. static bool fold_sub(OptContext *ctx, TCGOp *op)
  1686. {
  1687. return fold_const2(ctx, op) || fold_sub_vec(ctx, op);
  1688. }
  1689. static bool fold_sub2(OptContext *ctx, TCGOp *op)
  1690. {
  1691. return fold_addsub2(ctx, op, false);
  1692. }
  1693. static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
  1694. {
  1695. /* We can't do any folding with a load, but we can record bits. */
  1696. switch (op->opc) {
  1697. CASE_OP_32_64(ld8s):
  1698. ctx->s_mask = MAKE_64BIT_MASK(8, 56);
  1699. break;
  1700. CASE_OP_32_64(ld8u):
  1701. ctx->z_mask = MAKE_64BIT_MASK(0, 8);
  1702. ctx->s_mask = MAKE_64BIT_MASK(9, 55);
  1703. break;
  1704. CASE_OP_32_64(ld16s):
  1705. ctx->s_mask = MAKE_64BIT_MASK(16, 48);
  1706. break;
  1707. CASE_OP_32_64(ld16u):
  1708. ctx->z_mask = MAKE_64BIT_MASK(0, 16);
  1709. ctx->s_mask = MAKE_64BIT_MASK(17, 47);
  1710. break;
  1711. case INDEX_op_ld32s_i64:
  1712. ctx->s_mask = MAKE_64BIT_MASK(32, 32);
  1713. break;
  1714. case INDEX_op_ld32u_i64:
  1715. ctx->z_mask = MAKE_64BIT_MASK(0, 32);
  1716. ctx->s_mask = MAKE_64BIT_MASK(33, 31);
  1717. break;
  1718. default:
  1719. g_assert_not_reached();
  1720. }
  1721. return false;
  1722. }
  1723. static bool fold_xor(OptContext *ctx, TCGOp *op)
  1724. {
  1725. if (fold_const2_commutative(ctx, op) ||
  1726. fold_xx_to_i(ctx, op, 0) ||
  1727. fold_xi_to_x(ctx, op, 0) ||
  1728. fold_xi_to_not(ctx, op, -1)) {
  1729. return true;
  1730. }
  1731. ctx->z_mask = arg_info(op->args[1])->z_mask
  1732. | arg_info(op->args[2])->z_mask;
  1733. ctx->s_mask = arg_info(op->args[1])->s_mask
  1734. & arg_info(op->args[2])->s_mask;
  1735. return fold_masks(ctx, op);
  1736. }
  1737. /* Propagate constants and copies, fold constant expressions. */
  1738. void tcg_optimize(TCGContext *s)
  1739. {
  1740. int nb_temps, i;
  1741. TCGOp *op, *op_next;
  1742. OptContext ctx = { .tcg = s };
  1743. /* Array VALS has an element for each temp.
  1744. If this temp holds a constant then its value is kept in VALS' element.
  1745. If this temp is a copy of other ones then the other copies are
  1746. available through the doubly linked circular list. */
  1747. nb_temps = s->nb_temps;
  1748. for (i = 0; i < nb_temps; ++i) {
  1749. s->temps[i].state_ptr = NULL;
  1750. }
  1751. QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
  1752. TCGOpcode opc = op->opc;
  1753. const TCGOpDef *def;
  1754. bool done = false;
  1755. /* Calls are special. */
  1756. if (opc == INDEX_op_call) {
  1757. fold_call(&ctx, op);
  1758. continue;
  1759. }
  1760. def = &tcg_op_defs[opc];
  1761. init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs);
  1762. copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs);
  1763. /* Pre-compute the type of the operation. */
  1764. if (def->flags & TCG_OPF_VECTOR) {
  1765. ctx.type = TCG_TYPE_V64 + TCGOP_VECL(op);
  1766. } else if (def->flags & TCG_OPF_64BIT) {
  1767. ctx.type = TCG_TYPE_I64;
  1768. } else {
  1769. ctx.type = TCG_TYPE_I32;
  1770. }
  1771. /* Assume all bits affected, no bits known zero, no sign reps. */
  1772. ctx.a_mask = -1;
  1773. ctx.z_mask = -1;
  1774. ctx.s_mask = 0;
  1775. /*
  1776. * Process each opcode.
  1777. * Sorted alphabetically by opcode as much as possible.
  1778. */
  1779. switch (opc) {
  1780. CASE_OP_32_64(add):
  1781. done = fold_add(&ctx, op);
  1782. break;
  1783. case INDEX_op_add_vec:
  1784. done = fold_add_vec(&ctx, op);
  1785. break;
  1786. CASE_OP_32_64(add2):
  1787. done = fold_add2(&ctx, op);
  1788. break;
  1789. CASE_OP_32_64_VEC(and):
  1790. done = fold_and(&ctx, op);
  1791. break;
  1792. CASE_OP_32_64_VEC(andc):
  1793. done = fold_andc(&ctx, op);
  1794. break;
  1795. CASE_OP_32_64(brcond):
  1796. done = fold_brcond(&ctx, op);
  1797. break;
  1798. case INDEX_op_brcond2_i32:
  1799. done = fold_brcond2(&ctx, op);
  1800. break;
  1801. CASE_OP_32_64(bswap16):
  1802. CASE_OP_32_64(bswap32):
  1803. case INDEX_op_bswap64_i64:
  1804. done = fold_bswap(&ctx, op);
  1805. break;
  1806. CASE_OP_32_64(clz):
  1807. CASE_OP_32_64(ctz):
  1808. done = fold_count_zeros(&ctx, op);
  1809. break;
  1810. CASE_OP_32_64(ctpop):
  1811. done = fold_ctpop(&ctx, op);
  1812. break;
  1813. CASE_OP_32_64(deposit):
  1814. done = fold_deposit(&ctx, op);
  1815. break;
  1816. CASE_OP_32_64(div):
  1817. CASE_OP_32_64(divu):
  1818. done = fold_divide(&ctx, op);
  1819. break;
  1820. case INDEX_op_dup_vec:
  1821. done = fold_dup(&ctx, op);
  1822. break;
  1823. case INDEX_op_dup2_vec:
  1824. done = fold_dup2(&ctx, op);
  1825. break;
  1826. CASE_OP_32_64_VEC(eqv):
  1827. done = fold_eqv(&ctx, op);
  1828. break;
  1829. CASE_OP_32_64(extract):
  1830. done = fold_extract(&ctx, op);
  1831. break;
  1832. CASE_OP_32_64(extract2):
  1833. done = fold_extract2(&ctx, op);
  1834. break;
  1835. CASE_OP_32_64(ext8s):
  1836. CASE_OP_32_64(ext16s):
  1837. case INDEX_op_ext32s_i64:
  1838. case INDEX_op_ext_i32_i64:
  1839. done = fold_exts(&ctx, op);
  1840. break;
  1841. CASE_OP_32_64(ext8u):
  1842. CASE_OP_32_64(ext16u):
  1843. case INDEX_op_ext32u_i64:
  1844. case INDEX_op_extu_i32_i64:
  1845. case INDEX_op_extrl_i64_i32:
  1846. case INDEX_op_extrh_i64_i32:
  1847. done = fold_extu(&ctx, op);
  1848. break;
  1849. CASE_OP_32_64(ld8s):
  1850. CASE_OP_32_64(ld8u):
  1851. CASE_OP_32_64(ld16s):
  1852. CASE_OP_32_64(ld16u):
  1853. case INDEX_op_ld32s_i64:
  1854. case INDEX_op_ld32u_i64:
  1855. done = fold_tcg_ld(&ctx, op);
  1856. break;
  1857. case INDEX_op_mb:
  1858. done = fold_mb(&ctx, op);
  1859. break;
  1860. CASE_OP_32_64_VEC(mov):
  1861. done = fold_mov(&ctx, op);
  1862. break;
  1863. CASE_OP_32_64(movcond):
  1864. done = fold_movcond(&ctx, op);
  1865. break;
  1866. CASE_OP_32_64(mul):
  1867. done = fold_mul(&ctx, op);
  1868. break;
  1869. CASE_OP_32_64(mulsh):
  1870. CASE_OP_32_64(muluh):
  1871. done = fold_mul_highpart(&ctx, op);
  1872. break;
  1873. CASE_OP_32_64(muls2):
  1874. CASE_OP_32_64(mulu2):
  1875. done = fold_multiply2(&ctx, op);
  1876. break;
  1877. CASE_OP_32_64_VEC(nand):
  1878. done = fold_nand(&ctx, op);
  1879. break;
  1880. CASE_OP_32_64(neg):
  1881. done = fold_neg(&ctx, op);
  1882. break;
  1883. CASE_OP_32_64_VEC(nor):
  1884. done = fold_nor(&ctx, op);
  1885. break;
  1886. CASE_OP_32_64_VEC(not):
  1887. done = fold_not(&ctx, op);
  1888. break;
  1889. CASE_OP_32_64_VEC(or):
  1890. done = fold_or(&ctx, op);
  1891. break;
  1892. CASE_OP_32_64_VEC(orc):
  1893. done = fold_orc(&ctx, op);
  1894. break;
  1895. case INDEX_op_qemu_ld_i32:
  1896. case INDEX_op_qemu_ld_i64:
  1897. done = fold_qemu_ld(&ctx, op);
  1898. break;
  1899. case INDEX_op_qemu_st_i32:
  1900. case INDEX_op_qemu_st8_i32:
  1901. case INDEX_op_qemu_st_i64:
  1902. done = fold_qemu_st(&ctx, op);
  1903. break;
  1904. CASE_OP_32_64(rem):
  1905. CASE_OP_32_64(remu):
  1906. done = fold_remainder(&ctx, op);
  1907. break;
  1908. CASE_OP_32_64(rotl):
  1909. CASE_OP_32_64(rotr):
  1910. CASE_OP_32_64(sar):
  1911. CASE_OP_32_64(shl):
  1912. CASE_OP_32_64(shr):
  1913. done = fold_shift(&ctx, op);
  1914. break;
  1915. CASE_OP_32_64(setcond):
  1916. done = fold_setcond(&ctx, op);
  1917. break;
  1918. case INDEX_op_setcond2_i32:
  1919. done = fold_setcond2(&ctx, op);
  1920. break;
  1921. CASE_OP_32_64(sextract):
  1922. done = fold_sextract(&ctx, op);
  1923. break;
  1924. CASE_OP_32_64(sub):
  1925. done = fold_sub(&ctx, op);
  1926. break;
  1927. case INDEX_op_sub_vec:
  1928. done = fold_sub_vec(&ctx, op);
  1929. break;
  1930. CASE_OP_32_64(sub2):
  1931. done = fold_sub2(&ctx, op);
  1932. break;
  1933. CASE_OP_32_64_VEC(xor):
  1934. done = fold_xor(&ctx, op);
  1935. break;
  1936. default:
  1937. break;
  1938. }
  1939. if (!done) {
  1940. finish_folding(&ctx, op);
  1941. }
  1942. }
  1943. }