2
0

optimize.c 49 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535
  1. /*
  2. * Optimizations for Tiny Code Generator for QEMU
  3. *
  4. * Copyright (c) 2010 Samsung Electronics.
  5. * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
  6. *
  7. * Permission is hereby granted, free of charge, to any person obtaining a copy
  8. * of this software and associated documentation files (the "Software"), to deal
  9. * in the Software without restriction, including without limitation the rights
  10. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11. * copies of the Software, and to permit persons to whom the Software is
  12. * furnished to do so, subject to the following conditions:
  13. *
  14. * The above copyright notice and this permission notice shall be included in
  15. * all copies or substantial portions of the Software.
  16. *
  17. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23. * THE SOFTWARE.
  24. */
  25. #include "qemu/osdep.h"
  26. #include "tcg/tcg-op.h"
  27. #define CASE_OP_32_64(x) \
  28. glue(glue(case INDEX_op_, x), _i32): \
  29. glue(glue(case INDEX_op_, x), _i64)
  30. #define CASE_OP_32_64_VEC(x) \
  31. glue(glue(case INDEX_op_, x), _i32): \
  32. glue(glue(case INDEX_op_, x), _i64): \
  33. glue(glue(case INDEX_op_, x), _vec)
  34. struct tcg_temp_info {
  35. bool is_const;
  36. TCGTemp *prev_copy;
  37. TCGTemp *next_copy;
  38. tcg_target_ulong val;
  39. tcg_target_ulong mask;
  40. };
  41. static inline struct tcg_temp_info *ts_info(TCGTemp *ts)
  42. {
  43. return ts->state_ptr;
  44. }
  45. static inline struct tcg_temp_info *arg_info(TCGArg arg)
  46. {
  47. return ts_info(arg_temp(arg));
  48. }
  49. static inline bool ts_is_const(TCGTemp *ts)
  50. {
  51. return ts_info(ts)->is_const;
  52. }
  53. static inline bool arg_is_const(TCGArg arg)
  54. {
  55. return ts_is_const(arg_temp(arg));
  56. }
  57. static inline bool ts_is_copy(TCGTemp *ts)
  58. {
  59. return ts_info(ts)->next_copy != ts;
  60. }
  61. /* Reset TEMP's state, possibly removing the temp for the list of copies. */
  62. static void reset_ts(TCGTemp *ts)
  63. {
  64. struct tcg_temp_info *ti = ts_info(ts);
  65. struct tcg_temp_info *pi = ts_info(ti->prev_copy);
  66. struct tcg_temp_info *ni = ts_info(ti->next_copy);
  67. ni->prev_copy = ti->prev_copy;
  68. pi->next_copy = ti->next_copy;
  69. ti->next_copy = ts;
  70. ti->prev_copy = ts;
  71. ti->is_const = false;
  72. ti->mask = -1;
  73. }
  74. static void reset_temp(TCGArg arg)
  75. {
  76. reset_ts(arg_temp(arg));
  77. }
  78. /* Initialize and activate a temporary. */
  79. static void init_ts_info(struct tcg_temp_info *infos,
  80. TCGTempSet *temps_used, TCGTemp *ts)
  81. {
  82. size_t idx = temp_idx(ts);
  83. if (!test_bit(idx, temps_used->l)) {
  84. struct tcg_temp_info *ti = &infos[idx];
  85. ts->state_ptr = ti;
  86. ti->next_copy = ts;
  87. ti->prev_copy = ts;
  88. ti->is_const = false;
  89. ti->mask = -1;
  90. set_bit(idx, temps_used->l);
  91. }
  92. }
  93. static void init_arg_info(struct tcg_temp_info *infos,
  94. TCGTempSet *temps_used, TCGArg arg)
  95. {
  96. init_ts_info(infos, temps_used, arg_temp(arg));
  97. }
  98. static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
  99. {
  100. TCGTemp *i;
  101. /* If this is already a global, we can't do better. */
  102. if (ts->temp_global) {
  103. return ts;
  104. }
  105. /* Search for a global first. */
  106. for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
  107. if (i->temp_global) {
  108. return i;
  109. }
  110. }
  111. /* If it is a temp, search for a temp local. */
  112. if (!ts->temp_local) {
  113. for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
  114. if (ts->temp_local) {
  115. return i;
  116. }
  117. }
  118. }
  119. /* Failure to find a better representation, return the same temp. */
  120. return ts;
  121. }
  122. static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2)
  123. {
  124. TCGTemp *i;
  125. if (ts1 == ts2) {
  126. return true;
  127. }
  128. if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) {
  129. return false;
  130. }
  131. for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) {
  132. if (i == ts2) {
  133. return true;
  134. }
  135. }
  136. return false;
  137. }
  138. static bool args_are_copies(TCGArg arg1, TCGArg arg2)
  139. {
  140. return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
  141. }
  142. static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg val)
  143. {
  144. const TCGOpDef *def;
  145. TCGOpcode new_op;
  146. tcg_target_ulong mask;
  147. struct tcg_temp_info *di = arg_info(dst);
  148. def = &tcg_op_defs[op->opc];
  149. if (def->flags & TCG_OPF_VECTOR) {
  150. new_op = INDEX_op_dupi_vec;
  151. } else if (def->flags & TCG_OPF_64BIT) {
  152. new_op = INDEX_op_movi_i64;
  153. } else {
  154. new_op = INDEX_op_movi_i32;
  155. }
  156. op->opc = new_op;
  157. /* TCGOP_VECL and TCGOP_VECE remain unchanged. */
  158. op->args[0] = dst;
  159. op->args[1] = val;
  160. reset_temp(dst);
  161. di->is_const = true;
  162. di->val = val;
  163. mask = val;
  164. if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_movi_i32) {
  165. /* High bits of the destination are now garbage. */
  166. mask |= ~0xffffffffull;
  167. }
  168. di->mask = mask;
  169. }
  170. static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
  171. {
  172. TCGTemp *dst_ts = arg_temp(dst);
  173. TCGTemp *src_ts = arg_temp(src);
  174. const TCGOpDef *def;
  175. struct tcg_temp_info *di;
  176. struct tcg_temp_info *si;
  177. tcg_target_ulong mask;
  178. TCGOpcode new_op;
  179. if (ts_are_copies(dst_ts, src_ts)) {
  180. tcg_op_remove(s, op);
  181. return;
  182. }
  183. reset_ts(dst_ts);
  184. di = ts_info(dst_ts);
  185. si = ts_info(src_ts);
  186. def = &tcg_op_defs[op->opc];
  187. if (def->flags & TCG_OPF_VECTOR) {
  188. new_op = INDEX_op_mov_vec;
  189. } else if (def->flags & TCG_OPF_64BIT) {
  190. new_op = INDEX_op_mov_i64;
  191. } else {
  192. new_op = INDEX_op_mov_i32;
  193. }
  194. op->opc = new_op;
  195. /* TCGOP_VECL and TCGOP_VECE remain unchanged. */
  196. op->args[0] = dst;
  197. op->args[1] = src;
  198. mask = si->mask;
  199. if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) {
  200. /* High bits of the destination are now garbage. */
  201. mask |= ~0xffffffffull;
  202. }
  203. di->mask = mask;
  204. if (src_ts->type == dst_ts->type) {
  205. struct tcg_temp_info *ni = ts_info(si->next_copy);
  206. di->next_copy = si->next_copy;
  207. di->prev_copy = src_ts;
  208. ni->prev_copy = dst_ts;
  209. si->next_copy = dst_ts;
  210. di->is_const = si->is_const;
  211. di->val = si->val;
  212. }
  213. }
  214. static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
  215. {
  216. uint64_t l64, h64;
  217. switch (op) {
  218. CASE_OP_32_64(add):
  219. return x + y;
  220. CASE_OP_32_64(sub):
  221. return x - y;
  222. CASE_OP_32_64(mul):
  223. return x * y;
  224. CASE_OP_32_64(and):
  225. return x & y;
  226. CASE_OP_32_64(or):
  227. return x | y;
  228. CASE_OP_32_64(xor):
  229. return x ^ y;
  230. case INDEX_op_shl_i32:
  231. return (uint32_t)x << (y & 31);
  232. case INDEX_op_shl_i64:
  233. return (uint64_t)x << (y & 63);
  234. case INDEX_op_shr_i32:
  235. return (uint32_t)x >> (y & 31);
  236. case INDEX_op_shr_i64:
  237. return (uint64_t)x >> (y & 63);
  238. case INDEX_op_sar_i32:
  239. return (int32_t)x >> (y & 31);
  240. case INDEX_op_sar_i64:
  241. return (int64_t)x >> (y & 63);
  242. case INDEX_op_rotr_i32:
  243. return ror32(x, y & 31);
  244. case INDEX_op_rotr_i64:
  245. return ror64(x, y & 63);
  246. case INDEX_op_rotl_i32:
  247. return rol32(x, y & 31);
  248. case INDEX_op_rotl_i64:
  249. return rol64(x, y & 63);
  250. CASE_OP_32_64(not):
  251. return ~x;
  252. CASE_OP_32_64(neg):
  253. return -x;
  254. CASE_OP_32_64(andc):
  255. return x & ~y;
  256. CASE_OP_32_64(orc):
  257. return x | ~y;
  258. CASE_OP_32_64(eqv):
  259. return ~(x ^ y);
  260. CASE_OP_32_64(nand):
  261. return ~(x & y);
  262. CASE_OP_32_64(nor):
  263. return ~(x | y);
  264. case INDEX_op_clz_i32:
  265. return (uint32_t)x ? clz32(x) : y;
  266. case INDEX_op_clz_i64:
  267. return x ? clz64(x) : y;
  268. case INDEX_op_ctz_i32:
  269. return (uint32_t)x ? ctz32(x) : y;
  270. case INDEX_op_ctz_i64:
  271. return x ? ctz64(x) : y;
  272. case INDEX_op_ctpop_i32:
  273. return ctpop32(x);
  274. case INDEX_op_ctpop_i64:
  275. return ctpop64(x);
  276. CASE_OP_32_64(ext8s):
  277. return (int8_t)x;
  278. CASE_OP_32_64(ext16s):
  279. return (int16_t)x;
  280. CASE_OP_32_64(ext8u):
  281. return (uint8_t)x;
  282. CASE_OP_32_64(ext16u):
  283. return (uint16_t)x;
  284. CASE_OP_32_64(bswap16):
  285. return bswap16(x);
  286. CASE_OP_32_64(bswap32):
  287. return bswap32(x);
  288. case INDEX_op_bswap64_i64:
  289. return bswap64(x);
  290. case INDEX_op_ext_i32_i64:
  291. case INDEX_op_ext32s_i64:
  292. return (int32_t)x;
  293. case INDEX_op_extu_i32_i64:
  294. case INDEX_op_extrl_i64_i32:
  295. case INDEX_op_ext32u_i64:
  296. return (uint32_t)x;
  297. case INDEX_op_extrh_i64_i32:
  298. return (uint64_t)x >> 32;
  299. case INDEX_op_muluh_i32:
  300. return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
  301. case INDEX_op_mulsh_i32:
  302. return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
  303. case INDEX_op_muluh_i64:
  304. mulu64(&l64, &h64, x, y);
  305. return h64;
  306. case INDEX_op_mulsh_i64:
  307. muls64(&l64, &h64, x, y);
  308. return h64;
  309. case INDEX_op_div_i32:
  310. /* Avoid crashing on divide by zero, otherwise undefined. */
  311. return (int32_t)x / ((int32_t)y ? : 1);
  312. case INDEX_op_divu_i32:
  313. return (uint32_t)x / ((uint32_t)y ? : 1);
  314. case INDEX_op_div_i64:
  315. return (int64_t)x / ((int64_t)y ? : 1);
  316. case INDEX_op_divu_i64:
  317. return (uint64_t)x / ((uint64_t)y ? : 1);
  318. case INDEX_op_rem_i32:
  319. return (int32_t)x % ((int32_t)y ? : 1);
  320. case INDEX_op_remu_i32:
  321. return (uint32_t)x % ((uint32_t)y ? : 1);
  322. case INDEX_op_rem_i64:
  323. return (int64_t)x % ((int64_t)y ? : 1);
  324. case INDEX_op_remu_i64:
  325. return (uint64_t)x % ((uint64_t)y ? : 1);
  326. default:
  327. fprintf(stderr,
  328. "Unrecognized operation %d in do_constant_folding.\n", op);
  329. tcg_abort();
  330. }
  331. }
  332. static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y)
  333. {
  334. const TCGOpDef *def = &tcg_op_defs[op];
  335. TCGArg res = do_constant_folding_2(op, x, y);
  336. if (!(def->flags & TCG_OPF_64BIT)) {
  337. res = (int32_t)res;
  338. }
  339. return res;
  340. }
  341. static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
  342. {
  343. switch (c) {
  344. case TCG_COND_EQ:
  345. return x == y;
  346. case TCG_COND_NE:
  347. return x != y;
  348. case TCG_COND_LT:
  349. return (int32_t)x < (int32_t)y;
  350. case TCG_COND_GE:
  351. return (int32_t)x >= (int32_t)y;
  352. case TCG_COND_LE:
  353. return (int32_t)x <= (int32_t)y;
  354. case TCG_COND_GT:
  355. return (int32_t)x > (int32_t)y;
  356. case TCG_COND_LTU:
  357. return x < y;
  358. case TCG_COND_GEU:
  359. return x >= y;
  360. case TCG_COND_LEU:
  361. return x <= y;
  362. case TCG_COND_GTU:
  363. return x > y;
  364. default:
  365. tcg_abort();
  366. }
  367. }
  368. static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
  369. {
  370. switch (c) {
  371. case TCG_COND_EQ:
  372. return x == y;
  373. case TCG_COND_NE:
  374. return x != y;
  375. case TCG_COND_LT:
  376. return (int64_t)x < (int64_t)y;
  377. case TCG_COND_GE:
  378. return (int64_t)x >= (int64_t)y;
  379. case TCG_COND_LE:
  380. return (int64_t)x <= (int64_t)y;
  381. case TCG_COND_GT:
  382. return (int64_t)x > (int64_t)y;
  383. case TCG_COND_LTU:
  384. return x < y;
  385. case TCG_COND_GEU:
  386. return x >= y;
  387. case TCG_COND_LEU:
  388. return x <= y;
  389. case TCG_COND_GTU:
  390. return x > y;
  391. default:
  392. tcg_abort();
  393. }
  394. }
  395. static bool do_constant_folding_cond_eq(TCGCond c)
  396. {
  397. switch (c) {
  398. case TCG_COND_GT:
  399. case TCG_COND_LTU:
  400. case TCG_COND_LT:
  401. case TCG_COND_GTU:
  402. case TCG_COND_NE:
  403. return 0;
  404. case TCG_COND_GE:
  405. case TCG_COND_GEU:
  406. case TCG_COND_LE:
  407. case TCG_COND_LEU:
  408. case TCG_COND_EQ:
  409. return 1;
  410. default:
  411. tcg_abort();
  412. }
  413. }
  414. /* Return 2 if the condition can't be simplified, and the result
  415. of the condition (0 or 1) if it can */
  416. static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
  417. TCGArg y, TCGCond c)
  418. {
  419. tcg_target_ulong xv = arg_info(x)->val;
  420. tcg_target_ulong yv = arg_info(y)->val;
  421. if (arg_is_const(x) && arg_is_const(y)) {
  422. const TCGOpDef *def = &tcg_op_defs[op];
  423. tcg_debug_assert(!(def->flags & TCG_OPF_VECTOR));
  424. if (def->flags & TCG_OPF_64BIT) {
  425. return do_constant_folding_cond_64(xv, yv, c);
  426. } else {
  427. return do_constant_folding_cond_32(xv, yv, c);
  428. }
  429. } else if (args_are_copies(x, y)) {
  430. return do_constant_folding_cond_eq(c);
  431. } else if (arg_is_const(y) && yv == 0) {
  432. switch (c) {
  433. case TCG_COND_LTU:
  434. return 0;
  435. case TCG_COND_GEU:
  436. return 1;
  437. default:
  438. return 2;
  439. }
  440. }
  441. return 2;
  442. }
  443. /* Return 2 if the condition can't be simplified, and the result
  444. of the condition (0 or 1) if it can */
  445. static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
  446. {
  447. TCGArg al = p1[0], ah = p1[1];
  448. TCGArg bl = p2[0], bh = p2[1];
  449. if (arg_is_const(bl) && arg_is_const(bh)) {
  450. tcg_target_ulong blv = arg_info(bl)->val;
  451. tcg_target_ulong bhv = arg_info(bh)->val;
  452. uint64_t b = deposit64(blv, 32, 32, bhv);
  453. if (arg_is_const(al) && arg_is_const(ah)) {
  454. tcg_target_ulong alv = arg_info(al)->val;
  455. tcg_target_ulong ahv = arg_info(ah)->val;
  456. uint64_t a = deposit64(alv, 32, 32, ahv);
  457. return do_constant_folding_cond_64(a, b, c);
  458. }
  459. if (b == 0) {
  460. switch (c) {
  461. case TCG_COND_LTU:
  462. return 0;
  463. case TCG_COND_GEU:
  464. return 1;
  465. default:
  466. break;
  467. }
  468. }
  469. }
  470. if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
  471. return do_constant_folding_cond_eq(c);
  472. }
  473. return 2;
  474. }
  475. static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
  476. {
  477. TCGArg a1 = *p1, a2 = *p2;
  478. int sum = 0;
  479. sum += arg_is_const(a1);
  480. sum -= arg_is_const(a2);
  481. /* Prefer the constant in second argument, and then the form
  482. op a, a, b, which is better handled on non-RISC hosts. */
  483. if (sum > 0 || (sum == 0 && dest == a2)) {
  484. *p1 = a2;
  485. *p2 = a1;
  486. return true;
  487. }
  488. return false;
  489. }
  490. static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
  491. {
  492. int sum = 0;
  493. sum += arg_is_const(p1[0]);
  494. sum += arg_is_const(p1[1]);
  495. sum -= arg_is_const(p2[0]);
  496. sum -= arg_is_const(p2[1]);
  497. if (sum > 0) {
  498. TCGArg t;
  499. t = p1[0], p1[0] = p2[0], p2[0] = t;
  500. t = p1[1], p1[1] = p2[1], p2[1] = t;
  501. return true;
  502. }
  503. return false;
  504. }
  505. /* Propagate constants and copies, fold constant expressions. */
  506. void tcg_optimize(TCGContext *s)
  507. {
  508. int nb_temps, nb_globals;
  509. TCGOp *op, *op_next, *prev_mb = NULL;
  510. struct tcg_temp_info *infos;
  511. TCGTempSet temps_used;
  512. /* Array VALS has an element for each temp.
  513. If this temp holds a constant then its value is kept in VALS' element.
  514. If this temp is a copy of other ones then the other copies are
  515. available through the doubly linked circular list. */
  516. nb_temps = s->nb_temps;
  517. nb_globals = s->nb_globals;
  518. bitmap_zero(temps_used.l, nb_temps);
  519. infos = tcg_malloc(sizeof(struct tcg_temp_info) * nb_temps);
  520. QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
  521. tcg_target_ulong mask, partmask, affected;
  522. int nb_oargs, nb_iargs, i;
  523. TCGArg tmp;
  524. TCGOpcode opc = op->opc;
  525. const TCGOpDef *def = &tcg_op_defs[opc];
  526. /* Count the arguments, and initialize the temps that are
  527. going to be used */
  528. if (opc == INDEX_op_call) {
  529. nb_oargs = TCGOP_CALLO(op);
  530. nb_iargs = TCGOP_CALLI(op);
  531. for (i = 0; i < nb_oargs + nb_iargs; i++) {
  532. TCGTemp *ts = arg_temp(op->args[i]);
  533. if (ts) {
  534. init_ts_info(infos, &temps_used, ts);
  535. }
  536. }
  537. } else {
  538. nb_oargs = def->nb_oargs;
  539. nb_iargs = def->nb_iargs;
  540. for (i = 0; i < nb_oargs + nb_iargs; i++) {
  541. init_arg_info(infos, &temps_used, op->args[i]);
  542. }
  543. }
  544. /* Do copy propagation */
  545. for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
  546. TCGTemp *ts = arg_temp(op->args[i]);
  547. if (ts && ts_is_copy(ts)) {
  548. op->args[i] = temp_arg(find_better_copy(s, ts));
  549. }
  550. }
  551. /* For commutative operations make constant second argument */
  552. switch (opc) {
  553. CASE_OP_32_64_VEC(add):
  554. CASE_OP_32_64_VEC(mul):
  555. CASE_OP_32_64_VEC(and):
  556. CASE_OP_32_64_VEC(or):
  557. CASE_OP_32_64_VEC(xor):
  558. CASE_OP_32_64(eqv):
  559. CASE_OP_32_64(nand):
  560. CASE_OP_32_64(nor):
  561. CASE_OP_32_64(muluh):
  562. CASE_OP_32_64(mulsh):
  563. swap_commutative(op->args[0], &op->args[1], &op->args[2]);
  564. break;
  565. CASE_OP_32_64(brcond):
  566. if (swap_commutative(-1, &op->args[0], &op->args[1])) {
  567. op->args[2] = tcg_swap_cond(op->args[2]);
  568. }
  569. break;
  570. CASE_OP_32_64(setcond):
  571. if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) {
  572. op->args[3] = tcg_swap_cond(op->args[3]);
  573. }
  574. break;
  575. CASE_OP_32_64(movcond):
  576. if (swap_commutative(-1, &op->args[1], &op->args[2])) {
  577. op->args[5] = tcg_swap_cond(op->args[5]);
  578. }
  579. /* For movcond, we canonicalize the "false" input reg to match
  580. the destination reg so that the tcg backend can implement
  581. a "move if true" operation. */
  582. if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
  583. op->args[5] = tcg_invert_cond(op->args[5]);
  584. }
  585. break;
  586. CASE_OP_32_64(add2):
  587. swap_commutative(op->args[0], &op->args[2], &op->args[4]);
  588. swap_commutative(op->args[1], &op->args[3], &op->args[5]);
  589. break;
  590. CASE_OP_32_64(mulu2):
  591. CASE_OP_32_64(muls2):
  592. swap_commutative(op->args[0], &op->args[2], &op->args[3]);
  593. break;
  594. case INDEX_op_brcond2_i32:
  595. if (swap_commutative2(&op->args[0], &op->args[2])) {
  596. op->args[4] = tcg_swap_cond(op->args[4]);
  597. }
  598. break;
  599. case INDEX_op_setcond2_i32:
  600. if (swap_commutative2(&op->args[1], &op->args[3])) {
  601. op->args[5] = tcg_swap_cond(op->args[5]);
  602. }
  603. break;
  604. default:
  605. break;
  606. }
  607. /* Simplify expressions for "shift/rot r, 0, a => movi r, 0",
  608. and "sub r, 0, a => neg r, a" case. */
  609. switch (opc) {
  610. CASE_OP_32_64(shl):
  611. CASE_OP_32_64(shr):
  612. CASE_OP_32_64(sar):
  613. CASE_OP_32_64(rotl):
  614. CASE_OP_32_64(rotr):
  615. if (arg_is_const(op->args[1])
  616. && arg_info(op->args[1])->val == 0) {
  617. tcg_opt_gen_movi(s, op, op->args[0], 0);
  618. continue;
  619. }
  620. break;
  621. CASE_OP_32_64_VEC(sub):
  622. {
  623. TCGOpcode neg_op;
  624. bool have_neg;
  625. if (arg_is_const(op->args[2])) {
  626. /* Proceed with possible constant folding. */
  627. break;
  628. }
  629. if (opc == INDEX_op_sub_i32) {
  630. neg_op = INDEX_op_neg_i32;
  631. have_neg = TCG_TARGET_HAS_neg_i32;
  632. } else if (opc == INDEX_op_sub_i64) {
  633. neg_op = INDEX_op_neg_i64;
  634. have_neg = TCG_TARGET_HAS_neg_i64;
  635. } else if (TCG_TARGET_HAS_neg_vec) {
  636. TCGType type = TCGOP_VECL(op) + TCG_TYPE_V64;
  637. unsigned vece = TCGOP_VECE(op);
  638. neg_op = INDEX_op_neg_vec;
  639. have_neg = tcg_can_emit_vec_op(neg_op, type, vece) > 0;
  640. } else {
  641. break;
  642. }
  643. if (!have_neg) {
  644. break;
  645. }
  646. if (arg_is_const(op->args[1])
  647. && arg_info(op->args[1])->val == 0) {
  648. op->opc = neg_op;
  649. reset_temp(op->args[0]);
  650. op->args[1] = op->args[2];
  651. continue;
  652. }
  653. }
  654. break;
  655. CASE_OP_32_64_VEC(xor):
  656. CASE_OP_32_64(nand):
  657. if (!arg_is_const(op->args[1])
  658. && arg_is_const(op->args[2])
  659. && arg_info(op->args[2])->val == -1) {
  660. i = 1;
  661. goto try_not;
  662. }
  663. break;
  664. CASE_OP_32_64(nor):
  665. if (!arg_is_const(op->args[1])
  666. && arg_is_const(op->args[2])
  667. && arg_info(op->args[2])->val == 0) {
  668. i = 1;
  669. goto try_not;
  670. }
  671. break;
  672. CASE_OP_32_64_VEC(andc):
  673. if (!arg_is_const(op->args[2])
  674. && arg_is_const(op->args[1])
  675. && arg_info(op->args[1])->val == -1) {
  676. i = 2;
  677. goto try_not;
  678. }
  679. break;
  680. CASE_OP_32_64_VEC(orc):
  681. CASE_OP_32_64(eqv):
  682. if (!arg_is_const(op->args[2])
  683. && arg_is_const(op->args[1])
  684. && arg_info(op->args[1])->val == 0) {
  685. i = 2;
  686. goto try_not;
  687. }
  688. break;
  689. try_not:
  690. {
  691. TCGOpcode not_op;
  692. bool have_not;
  693. if (def->flags & TCG_OPF_VECTOR) {
  694. not_op = INDEX_op_not_vec;
  695. have_not = TCG_TARGET_HAS_not_vec;
  696. } else if (def->flags & TCG_OPF_64BIT) {
  697. not_op = INDEX_op_not_i64;
  698. have_not = TCG_TARGET_HAS_not_i64;
  699. } else {
  700. not_op = INDEX_op_not_i32;
  701. have_not = TCG_TARGET_HAS_not_i32;
  702. }
  703. if (!have_not) {
  704. break;
  705. }
  706. op->opc = not_op;
  707. reset_temp(op->args[0]);
  708. op->args[1] = op->args[i];
  709. continue;
  710. }
  711. default:
  712. break;
  713. }
  714. /* Simplify expression for "op r, a, const => mov r, a" cases */
  715. switch (opc) {
  716. CASE_OP_32_64_VEC(add):
  717. CASE_OP_32_64_VEC(sub):
  718. CASE_OP_32_64_VEC(or):
  719. CASE_OP_32_64_VEC(xor):
  720. CASE_OP_32_64_VEC(andc):
  721. CASE_OP_32_64(shl):
  722. CASE_OP_32_64(shr):
  723. CASE_OP_32_64(sar):
  724. CASE_OP_32_64(rotl):
  725. CASE_OP_32_64(rotr):
  726. if (!arg_is_const(op->args[1])
  727. && arg_is_const(op->args[2])
  728. && arg_info(op->args[2])->val == 0) {
  729. tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
  730. continue;
  731. }
  732. break;
  733. CASE_OP_32_64_VEC(and):
  734. CASE_OP_32_64_VEC(orc):
  735. CASE_OP_32_64(eqv):
  736. if (!arg_is_const(op->args[1])
  737. && arg_is_const(op->args[2])
  738. && arg_info(op->args[2])->val == -1) {
  739. tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
  740. continue;
  741. }
  742. break;
  743. default:
  744. break;
  745. }
  746. /* Simplify using known-zero bits. Currently only ops with a single
  747. output argument is supported. */
  748. mask = -1;
  749. affected = -1;
  750. switch (opc) {
  751. CASE_OP_32_64(ext8s):
  752. if ((arg_info(op->args[1])->mask & 0x80) != 0) {
  753. break;
  754. }
  755. CASE_OP_32_64(ext8u):
  756. mask = 0xff;
  757. goto and_const;
  758. CASE_OP_32_64(ext16s):
  759. if ((arg_info(op->args[1])->mask & 0x8000) != 0) {
  760. break;
  761. }
  762. CASE_OP_32_64(ext16u):
  763. mask = 0xffff;
  764. goto and_const;
  765. case INDEX_op_ext32s_i64:
  766. if ((arg_info(op->args[1])->mask & 0x80000000) != 0) {
  767. break;
  768. }
  769. case INDEX_op_ext32u_i64:
  770. mask = 0xffffffffU;
  771. goto and_const;
  772. CASE_OP_32_64(and):
  773. mask = arg_info(op->args[2])->mask;
  774. if (arg_is_const(op->args[2])) {
  775. and_const:
  776. affected = arg_info(op->args[1])->mask & ~mask;
  777. }
  778. mask = arg_info(op->args[1])->mask & mask;
  779. break;
  780. case INDEX_op_ext_i32_i64:
  781. if ((arg_info(op->args[1])->mask & 0x80000000) != 0) {
  782. break;
  783. }
  784. case INDEX_op_extu_i32_i64:
  785. /* We do not compute affected as it is a size changing op. */
  786. mask = (uint32_t)arg_info(op->args[1])->mask;
  787. break;
  788. CASE_OP_32_64(andc):
  789. /* Known-zeros does not imply known-ones. Therefore unless
  790. op->args[2] is constant, we can't infer anything from it. */
  791. if (arg_is_const(op->args[2])) {
  792. mask = ~arg_info(op->args[2])->mask;
  793. goto and_const;
  794. }
  795. /* But we certainly know nothing outside args[1] may be set. */
  796. mask = arg_info(op->args[1])->mask;
  797. break;
  798. case INDEX_op_sar_i32:
  799. if (arg_is_const(op->args[2])) {
  800. tmp = arg_info(op->args[2])->val & 31;
  801. mask = (int32_t)arg_info(op->args[1])->mask >> tmp;
  802. }
  803. break;
  804. case INDEX_op_sar_i64:
  805. if (arg_is_const(op->args[2])) {
  806. tmp = arg_info(op->args[2])->val & 63;
  807. mask = (int64_t)arg_info(op->args[1])->mask >> tmp;
  808. }
  809. break;
  810. case INDEX_op_shr_i32:
  811. if (arg_is_const(op->args[2])) {
  812. tmp = arg_info(op->args[2])->val & 31;
  813. mask = (uint32_t)arg_info(op->args[1])->mask >> tmp;
  814. }
  815. break;
  816. case INDEX_op_shr_i64:
  817. if (arg_is_const(op->args[2])) {
  818. tmp = arg_info(op->args[2])->val & 63;
  819. mask = (uint64_t)arg_info(op->args[1])->mask >> tmp;
  820. }
  821. break;
  822. case INDEX_op_extrl_i64_i32:
  823. mask = (uint32_t)arg_info(op->args[1])->mask;
  824. break;
  825. case INDEX_op_extrh_i64_i32:
  826. mask = (uint64_t)arg_info(op->args[1])->mask >> 32;
  827. break;
  828. CASE_OP_32_64(shl):
  829. if (arg_is_const(op->args[2])) {
  830. tmp = arg_info(op->args[2])->val & (TCG_TARGET_REG_BITS - 1);
  831. mask = arg_info(op->args[1])->mask << tmp;
  832. }
  833. break;
  834. CASE_OP_32_64(neg):
  835. /* Set to 1 all bits to the left of the rightmost. */
  836. mask = -(arg_info(op->args[1])->mask
  837. & -arg_info(op->args[1])->mask);
  838. break;
  839. CASE_OP_32_64(deposit):
  840. mask = deposit64(arg_info(op->args[1])->mask,
  841. op->args[3], op->args[4],
  842. arg_info(op->args[2])->mask);
  843. break;
  844. CASE_OP_32_64(extract):
  845. mask = extract64(arg_info(op->args[1])->mask,
  846. op->args[2], op->args[3]);
  847. if (op->args[2] == 0) {
  848. affected = arg_info(op->args[1])->mask & ~mask;
  849. }
  850. break;
  851. CASE_OP_32_64(sextract):
  852. mask = sextract64(arg_info(op->args[1])->mask,
  853. op->args[2], op->args[3]);
  854. if (op->args[2] == 0 && (tcg_target_long)mask >= 0) {
  855. affected = arg_info(op->args[1])->mask & ~mask;
  856. }
  857. break;
  858. CASE_OP_32_64(or):
  859. CASE_OP_32_64(xor):
  860. mask = arg_info(op->args[1])->mask | arg_info(op->args[2])->mask;
  861. break;
  862. case INDEX_op_clz_i32:
  863. case INDEX_op_ctz_i32:
  864. mask = arg_info(op->args[2])->mask | 31;
  865. break;
  866. case INDEX_op_clz_i64:
  867. case INDEX_op_ctz_i64:
  868. mask = arg_info(op->args[2])->mask | 63;
  869. break;
  870. case INDEX_op_ctpop_i32:
  871. mask = 32 | 31;
  872. break;
  873. case INDEX_op_ctpop_i64:
  874. mask = 64 | 63;
  875. break;
  876. CASE_OP_32_64(setcond):
  877. case INDEX_op_setcond2_i32:
  878. mask = 1;
  879. break;
  880. CASE_OP_32_64(movcond):
  881. mask = arg_info(op->args[3])->mask | arg_info(op->args[4])->mask;
  882. break;
  883. CASE_OP_32_64(ld8u):
  884. mask = 0xff;
  885. break;
  886. CASE_OP_32_64(ld16u):
  887. mask = 0xffff;
  888. break;
  889. case INDEX_op_ld32u_i64:
  890. mask = 0xffffffffu;
  891. break;
  892. CASE_OP_32_64(qemu_ld):
  893. {
  894. TCGMemOpIdx oi = op->args[nb_oargs + nb_iargs];
  895. MemOp mop = get_memop(oi);
  896. if (!(mop & MO_SIGN)) {
  897. mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
  898. }
  899. }
  900. break;
  901. default:
  902. break;
  903. }
  904. /* 32-bit ops generate 32-bit results. For the result is zero test
  905. below, we can ignore high bits, but for further optimizations we
  906. need to record that the high bits contain garbage. */
  907. partmask = mask;
  908. if (!(def->flags & TCG_OPF_64BIT)) {
  909. mask |= ~(tcg_target_ulong)0xffffffffu;
  910. partmask &= 0xffffffffu;
  911. affected &= 0xffffffffu;
  912. }
  913. if (partmask == 0) {
  914. tcg_debug_assert(nb_oargs == 1);
  915. tcg_opt_gen_movi(s, op, op->args[0], 0);
  916. continue;
  917. }
  918. if (affected == 0) {
  919. tcg_debug_assert(nb_oargs == 1);
  920. tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
  921. continue;
  922. }
  923. /* Simplify expression for "op r, a, 0 => movi r, 0" cases */
  924. switch (opc) {
  925. CASE_OP_32_64_VEC(and):
  926. CASE_OP_32_64_VEC(mul):
  927. CASE_OP_32_64(muluh):
  928. CASE_OP_32_64(mulsh):
  929. if (arg_is_const(op->args[2])
  930. && arg_info(op->args[2])->val == 0) {
  931. tcg_opt_gen_movi(s, op, op->args[0], 0);
  932. continue;
  933. }
  934. break;
  935. default:
  936. break;
  937. }
  938. /* Simplify expression for "op r, a, a => mov r, a" cases */
  939. switch (opc) {
  940. CASE_OP_32_64_VEC(or):
  941. CASE_OP_32_64_VEC(and):
  942. if (args_are_copies(op->args[1], op->args[2])) {
  943. tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
  944. continue;
  945. }
  946. break;
  947. default:
  948. break;
  949. }
  950. /* Simplify expression for "op r, a, a => movi r, 0" cases */
  951. switch (opc) {
  952. CASE_OP_32_64_VEC(andc):
  953. CASE_OP_32_64_VEC(sub):
  954. CASE_OP_32_64_VEC(xor):
  955. if (args_are_copies(op->args[1], op->args[2])) {
  956. tcg_opt_gen_movi(s, op, op->args[0], 0);
  957. continue;
  958. }
  959. break;
  960. default:
  961. break;
  962. }
  963. /* Propagate constants through copy operations and do constant
  964. folding. Constants will be substituted to arguments by register
  965. allocator where needed and possible. Also detect copies. */
  966. switch (opc) {
  967. CASE_OP_32_64_VEC(mov):
  968. tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
  969. break;
  970. CASE_OP_32_64(movi):
  971. case INDEX_op_dupi_vec:
  972. tcg_opt_gen_movi(s, op, op->args[0], op->args[1]);
  973. break;
  974. case INDEX_op_dup_vec:
  975. if (arg_is_const(op->args[1])) {
  976. tmp = arg_info(op->args[1])->val;
  977. tmp = dup_const(TCGOP_VECE(op), tmp);
  978. tcg_opt_gen_movi(s, op, op->args[0], tmp);
  979. break;
  980. }
  981. goto do_default;
  982. CASE_OP_32_64(not):
  983. CASE_OP_32_64(neg):
  984. CASE_OP_32_64(ext8s):
  985. CASE_OP_32_64(ext8u):
  986. CASE_OP_32_64(ext16s):
  987. CASE_OP_32_64(ext16u):
  988. CASE_OP_32_64(ctpop):
  989. CASE_OP_32_64(bswap16):
  990. CASE_OP_32_64(bswap32):
  991. case INDEX_op_bswap64_i64:
  992. case INDEX_op_ext32s_i64:
  993. case INDEX_op_ext32u_i64:
  994. case INDEX_op_ext_i32_i64:
  995. case INDEX_op_extu_i32_i64:
  996. case INDEX_op_extrl_i64_i32:
  997. case INDEX_op_extrh_i64_i32:
  998. if (arg_is_const(op->args[1])) {
  999. tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0);
  1000. tcg_opt_gen_movi(s, op, op->args[0], tmp);
  1001. break;
  1002. }
  1003. goto do_default;
  1004. CASE_OP_32_64(add):
  1005. CASE_OP_32_64(sub):
  1006. CASE_OP_32_64(mul):
  1007. CASE_OP_32_64(or):
  1008. CASE_OP_32_64(and):
  1009. CASE_OP_32_64(xor):
  1010. CASE_OP_32_64(shl):
  1011. CASE_OP_32_64(shr):
  1012. CASE_OP_32_64(sar):
  1013. CASE_OP_32_64(rotl):
  1014. CASE_OP_32_64(rotr):
  1015. CASE_OP_32_64(andc):
  1016. CASE_OP_32_64(orc):
  1017. CASE_OP_32_64(eqv):
  1018. CASE_OP_32_64(nand):
  1019. CASE_OP_32_64(nor):
  1020. CASE_OP_32_64(muluh):
  1021. CASE_OP_32_64(mulsh):
  1022. CASE_OP_32_64(div):
  1023. CASE_OP_32_64(divu):
  1024. CASE_OP_32_64(rem):
  1025. CASE_OP_32_64(remu):
  1026. if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
  1027. tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
  1028. arg_info(op->args[2])->val);
  1029. tcg_opt_gen_movi(s, op, op->args[0], tmp);
  1030. break;
  1031. }
  1032. goto do_default;
  1033. CASE_OP_32_64(clz):
  1034. CASE_OP_32_64(ctz):
  1035. if (arg_is_const(op->args[1])) {
  1036. TCGArg v = arg_info(op->args[1])->val;
  1037. if (v != 0) {
  1038. tmp = do_constant_folding(opc, v, 0);
  1039. tcg_opt_gen_movi(s, op, op->args[0], tmp);
  1040. } else {
  1041. tcg_opt_gen_mov(s, op, op->args[0], op->args[2]);
  1042. }
  1043. break;
  1044. }
  1045. goto do_default;
  1046. CASE_OP_32_64(deposit):
  1047. if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
  1048. tmp = deposit64(arg_info(op->args[1])->val,
  1049. op->args[3], op->args[4],
  1050. arg_info(op->args[2])->val);
  1051. tcg_opt_gen_movi(s, op, op->args[0], tmp);
  1052. break;
  1053. }
  1054. goto do_default;
  1055. CASE_OP_32_64(extract):
  1056. if (arg_is_const(op->args[1])) {
  1057. tmp = extract64(arg_info(op->args[1])->val,
  1058. op->args[2], op->args[3]);
  1059. tcg_opt_gen_movi(s, op, op->args[0], tmp);
  1060. break;
  1061. }
  1062. goto do_default;
  1063. CASE_OP_32_64(sextract):
  1064. if (arg_is_const(op->args[1])) {
  1065. tmp = sextract64(arg_info(op->args[1])->val,
  1066. op->args[2], op->args[3]);
  1067. tcg_opt_gen_movi(s, op, op->args[0], tmp);
  1068. break;
  1069. }
  1070. goto do_default;
  1071. CASE_OP_32_64(extract2):
  1072. if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
  1073. TCGArg v1 = arg_info(op->args[1])->val;
  1074. TCGArg v2 = arg_info(op->args[2])->val;
  1075. if (opc == INDEX_op_extract2_i64) {
  1076. tmp = (v1 >> op->args[3]) | (v2 << (64 - op->args[3]));
  1077. } else {
  1078. tmp = (int32_t)(((uint32_t)v1 >> op->args[3]) |
  1079. ((uint32_t)v2 << (32 - op->args[3])));
  1080. }
  1081. tcg_opt_gen_movi(s, op, op->args[0], tmp);
  1082. break;
  1083. }
  1084. goto do_default;
  1085. CASE_OP_32_64(setcond):
  1086. tmp = do_constant_folding_cond(opc, op->args[1],
  1087. op->args[2], op->args[3]);
  1088. if (tmp != 2) {
  1089. tcg_opt_gen_movi(s, op, op->args[0], tmp);
  1090. break;
  1091. }
  1092. goto do_default;
  1093. CASE_OP_32_64(brcond):
  1094. tmp = do_constant_folding_cond(opc, op->args[0],
  1095. op->args[1], op->args[2]);
  1096. if (tmp != 2) {
  1097. if (tmp) {
  1098. bitmap_zero(temps_used.l, nb_temps);
  1099. op->opc = INDEX_op_br;
  1100. op->args[0] = op->args[3];
  1101. } else {
  1102. tcg_op_remove(s, op);
  1103. }
  1104. break;
  1105. }
  1106. goto do_default;
  1107. CASE_OP_32_64(movcond):
  1108. tmp = do_constant_folding_cond(opc, op->args[1],
  1109. op->args[2], op->args[5]);
  1110. if (tmp != 2) {
  1111. tcg_opt_gen_mov(s, op, op->args[0], op->args[4-tmp]);
  1112. break;
  1113. }
  1114. if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
  1115. tcg_target_ulong tv = arg_info(op->args[3])->val;
  1116. tcg_target_ulong fv = arg_info(op->args[4])->val;
  1117. TCGCond cond = op->args[5];
  1118. if (fv == 1 && tv == 0) {
  1119. cond = tcg_invert_cond(cond);
  1120. } else if (!(tv == 1 && fv == 0)) {
  1121. goto do_default;
  1122. }
  1123. op->args[3] = cond;
  1124. op->opc = opc = (opc == INDEX_op_movcond_i32
  1125. ? INDEX_op_setcond_i32
  1126. : INDEX_op_setcond_i64);
  1127. nb_iargs = 2;
  1128. }
  1129. goto do_default;
  1130. case INDEX_op_add2_i32:
  1131. case INDEX_op_sub2_i32:
  1132. if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])
  1133. && arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
  1134. uint32_t al = arg_info(op->args[2])->val;
  1135. uint32_t ah = arg_info(op->args[3])->val;
  1136. uint32_t bl = arg_info(op->args[4])->val;
  1137. uint32_t bh = arg_info(op->args[5])->val;
  1138. uint64_t a = ((uint64_t)ah << 32) | al;
  1139. uint64_t b = ((uint64_t)bh << 32) | bl;
  1140. TCGArg rl, rh;
  1141. TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32);
  1142. if (opc == INDEX_op_add2_i32) {
  1143. a += b;
  1144. } else {
  1145. a -= b;
  1146. }
  1147. rl = op->args[0];
  1148. rh = op->args[1];
  1149. tcg_opt_gen_movi(s, op, rl, (int32_t)a);
  1150. tcg_opt_gen_movi(s, op2, rh, (int32_t)(a >> 32));
  1151. break;
  1152. }
  1153. goto do_default;
  1154. case INDEX_op_mulu2_i32:
  1155. if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
  1156. uint32_t a = arg_info(op->args[2])->val;
  1157. uint32_t b = arg_info(op->args[3])->val;
  1158. uint64_t r = (uint64_t)a * b;
  1159. TCGArg rl, rh;
  1160. TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32);
  1161. rl = op->args[0];
  1162. rh = op->args[1];
  1163. tcg_opt_gen_movi(s, op, rl, (int32_t)r);
  1164. tcg_opt_gen_movi(s, op2, rh, (int32_t)(r >> 32));
  1165. break;
  1166. }
  1167. goto do_default;
  1168. case INDEX_op_brcond2_i32:
  1169. tmp = do_constant_folding_cond2(&op->args[0], &op->args[2],
  1170. op->args[4]);
  1171. if (tmp != 2) {
  1172. if (tmp) {
  1173. do_brcond_true:
  1174. bitmap_zero(temps_used.l, nb_temps);
  1175. op->opc = INDEX_op_br;
  1176. op->args[0] = op->args[5];
  1177. } else {
  1178. do_brcond_false:
  1179. tcg_op_remove(s, op);
  1180. }
  1181. } else if ((op->args[4] == TCG_COND_LT
  1182. || op->args[4] == TCG_COND_GE)
  1183. && arg_is_const(op->args[2])
  1184. && arg_info(op->args[2])->val == 0
  1185. && arg_is_const(op->args[3])
  1186. && arg_info(op->args[3])->val == 0) {
  1187. /* Simplify LT/GE comparisons vs zero to a single compare
  1188. vs the high word of the input. */
  1189. do_brcond_high:
  1190. bitmap_zero(temps_used.l, nb_temps);
  1191. op->opc = INDEX_op_brcond_i32;
  1192. op->args[0] = op->args[1];
  1193. op->args[1] = op->args[3];
  1194. op->args[2] = op->args[4];
  1195. op->args[3] = op->args[5];
  1196. } else if (op->args[4] == TCG_COND_EQ) {
  1197. /* Simplify EQ comparisons where one of the pairs
  1198. can be simplified. */
  1199. tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
  1200. op->args[0], op->args[2],
  1201. TCG_COND_EQ);
  1202. if (tmp == 0) {
  1203. goto do_brcond_false;
  1204. } else if (tmp == 1) {
  1205. goto do_brcond_high;
  1206. }
  1207. tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
  1208. op->args[1], op->args[3],
  1209. TCG_COND_EQ);
  1210. if (tmp == 0) {
  1211. goto do_brcond_false;
  1212. } else if (tmp != 1) {
  1213. goto do_default;
  1214. }
  1215. do_brcond_low:
  1216. bitmap_zero(temps_used.l, nb_temps);
  1217. op->opc = INDEX_op_brcond_i32;
  1218. op->args[1] = op->args[2];
  1219. op->args[2] = op->args[4];
  1220. op->args[3] = op->args[5];
  1221. } else if (op->args[4] == TCG_COND_NE) {
  1222. /* Simplify NE comparisons where one of the pairs
  1223. can be simplified. */
  1224. tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
  1225. op->args[0], op->args[2],
  1226. TCG_COND_NE);
  1227. if (tmp == 0) {
  1228. goto do_brcond_high;
  1229. } else if (tmp == 1) {
  1230. goto do_brcond_true;
  1231. }
  1232. tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
  1233. op->args[1], op->args[3],
  1234. TCG_COND_NE);
  1235. if (tmp == 0) {
  1236. goto do_brcond_low;
  1237. } else if (tmp == 1) {
  1238. goto do_brcond_true;
  1239. }
  1240. goto do_default;
  1241. } else {
  1242. goto do_default;
  1243. }
  1244. break;
  1245. case INDEX_op_setcond2_i32:
  1246. tmp = do_constant_folding_cond2(&op->args[1], &op->args[3],
  1247. op->args[5]);
  1248. if (tmp != 2) {
  1249. do_setcond_const:
  1250. tcg_opt_gen_movi(s, op, op->args[0], tmp);
  1251. } else if ((op->args[5] == TCG_COND_LT
  1252. || op->args[5] == TCG_COND_GE)
  1253. && arg_is_const(op->args[3])
  1254. && arg_info(op->args[3])->val == 0
  1255. && arg_is_const(op->args[4])
  1256. && arg_info(op->args[4])->val == 0) {
  1257. /* Simplify LT/GE comparisons vs zero to a single compare
  1258. vs the high word of the input. */
  1259. do_setcond_high:
  1260. reset_temp(op->args[0]);
  1261. arg_info(op->args[0])->mask = 1;
  1262. op->opc = INDEX_op_setcond_i32;
  1263. op->args[1] = op->args[2];
  1264. op->args[2] = op->args[4];
  1265. op->args[3] = op->args[5];
  1266. } else if (op->args[5] == TCG_COND_EQ) {
  1267. /* Simplify EQ comparisons where one of the pairs
  1268. can be simplified. */
  1269. tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
  1270. op->args[1], op->args[3],
  1271. TCG_COND_EQ);
  1272. if (tmp == 0) {
  1273. goto do_setcond_const;
  1274. } else if (tmp == 1) {
  1275. goto do_setcond_high;
  1276. }
  1277. tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
  1278. op->args[2], op->args[4],
  1279. TCG_COND_EQ);
  1280. if (tmp == 0) {
  1281. goto do_setcond_high;
  1282. } else if (tmp != 1) {
  1283. goto do_default;
  1284. }
  1285. do_setcond_low:
  1286. reset_temp(op->args[0]);
  1287. arg_info(op->args[0])->mask = 1;
  1288. op->opc = INDEX_op_setcond_i32;
  1289. op->args[2] = op->args[3];
  1290. op->args[3] = op->args[5];
  1291. } else if (op->args[5] == TCG_COND_NE) {
  1292. /* Simplify NE comparisons where one of the pairs
  1293. can be simplified. */
  1294. tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
  1295. op->args[1], op->args[3],
  1296. TCG_COND_NE);
  1297. if (tmp == 0) {
  1298. goto do_setcond_high;
  1299. } else if (tmp == 1) {
  1300. goto do_setcond_const;
  1301. }
  1302. tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
  1303. op->args[2], op->args[4],
  1304. TCG_COND_NE);
  1305. if (tmp == 0) {
  1306. goto do_setcond_low;
  1307. } else if (tmp == 1) {
  1308. goto do_setcond_const;
  1309. }
  1310. goto do_default;
  1311. } else {
  1312. goto do_default;
  1313. }
  1314. break;
  1315. case INDEX_op_call:
  1316. if (!(op->args[nb_oargs + nb_iargs + 1]
  1317. & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
  1318. for (i = 0; i < nb_globals; i++) {
  1319. if (test_bit(i, temps_used.l)) {
  1320. reset_ts(&s->temps[i]);
  1321. }
  1322. }
  1323. }
  1324. goto do_reset_output;
  1325. default:
  1326. do_default:
  1327. /* Default case: we know nothing about operation (or were unable
  1328. to compute the operation result) so no propagation is done.
  1329. We trash everything if the operation is the end of a basic
  1330. block, otherwise we only trash the output args. "mask" is
  1331. the non-zero bits mask for the first output arg. */
  1332. if (def->flags & TCG_OPF_BB_END) {
  1333. bitmap_zero(temps_used.l, nb_temps);
  1334. } else {
  1335. do_reset_output:
  1336. for (i = 0; i < nb_oargs; i++) {
  1337. reset_temp(op->args[i]);
  1338. /* Save the corresponding known-zero bits mask for the
  1339. first output argument (only one supported so far). */
  1340. if (i == 0) {
  1341. arg_info(op->args[i])->mask = mask;
  1342. }
  1343. }
  1344. }
  1345. break;
  1346. }
  1347. /* Eliminate duplicate and redundant fence instructions. */
  1348. if (prev_mb) {
  1349. switch (opc) {
  1350. case INDEX_op_mb:
  1351. /* Merge two barriers of the same type into one,
  1352. * or a weaker barrier into a stronger one,
  1353. * or two weaker barriers into a stronger one.
  1354. * mb X; mb Y => mb X|Y
  1355. * mb; strl => mb; st
  1356. * ldaq; mb => ld; mb
  1357. * ldaq; strl => ld; mb; st
  1358. * Other combinations are also merged into a strong
  1359. * barrier. This is stricter than specified but for
  1360. * the purposes of TCG is better than not optimizing.
  1361. */
  1362. prev_mb->args[0] |= op->args[0];
  1363. tcg_op_remove(s, op);
  1364. break;
  1365. default:
  1366. /* Opcodes that end the block stop the optimization. */
  1367. if ((def->flags & TCG_OPF_BB_END) == 0) {
  1368. break;
  1369. }
  1370. /* fallthru */
  1371. case INDEX_op_qemu_ld_i32:
  1372. case INDEX_op_qemu_ld_i64:
  1373. case INDEX_op_qemu_st_i32:
  1374. case INDEX_op_qemu_st_i64:
  1375. case INDEX_op_call:
  1376. /* Opcodes that touch guest memory stop the optimization. */
  1377. prev_mb = NULL;
  1378. break;
  1379. }
  1380. } else if (opc == INDEX_op_mb) {
  1381. prev_mb = op;
  1382. }
  1383. }
  1384. }