tcg-op-vec.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805
  1. /*
  2. * Tiny Code Generator for QEMU
  3. *
  4. * Copyright (c) 2018 Linaro, Inc.
  5. *
  6. * This library is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * This library is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18. */
  19. #include "qemu/osdep.h"
  20. #include "tcg/tcg.h"
  21. #include "tcg/tcg-temp-internal.h"
  22. #include "tcg/tcg-op-common.h"
  23. #include "tcg/tcg-mo.h"
  24. #include "tcg-internal.h"
  25. #include "tcg-has.h"
  26. /*
  27. * Vector optional opcode tracking.
  28. * Except for the basic logical operations (and, or, xor), and
  29. * data movement (mov, ld, st, dupi), many vector opcodes are
  30. * optional and may not be supported on the host. Thank Intel
  31. * for the irregularity in their instruction set.
  32. *
  33. * The gvec expanders allow custom vector operations to be composed,
  34. * generally via the .fniv callback in the GVecGen* structures. At
  35. * the same time, in deciding whether to use this hook we need to
  36. * know if the host supports the required operations. This is
  37. * presented as an array of opcodes, terminated by 0. Each opcode
  38. * is assumed to be expanded with the given VECE.
  39. *
  40. * For debugging, we want to validate this array. Therefore, when
  41. * tcg_ctx->vec_opt_opc is non-NULL, the tcg_gen_*_vec expanders
  42. * will validate that their opcode is present in the list.
  43. */
  44. static void tcg_assert_listed_vecop(TCGOpcode op)
  45. {
  46. #ifdef CONFIG_DEBUG_TCG
  47. const TCGOpcode *p = tcg_ctx->vecop_list;
  48. if (p) {
  49. for (; *p; ++p) {
  50. if (*p == op) {
  51. return;
  52. }
  53. }
  54. g_assert_not_reached();
  55. }
  56. #endif
  57. }
  58. bool tcg_can_emit_vecop_list(const TCGOpcode *list,
  59. TCGType type, unsigned vece)
  60. {
  61. if (list == NULL) {
  62. return true;
  63. }
  64. for (; *list; ++list) {
  65. TCGOpcode opc = *list;
  66. #ifdef CONFIG_DEBUG_TCG
  67. switch (opc) {
  68. case INDEX_op_and_vec:
  69. case INDEX_op_or_vec:
  70. case INDEX_op_xor_vec:
  71. case INDEX_op_mov_vec:
  72. case INDEX_op_dup_vec:
  73. case INDEX_op_dup2_vec:
  74. case INDEX_op_ld_vec:
  75. case INDEX_op_st_vec:
  76. case INDEX_op_bitsel_vec:
  77. /* These opcodes are mandatory and should not be listed. */
  78. g_assert_not_reached();
  79. case INDEX_op_not_vec:
  80. /* These opcodes have generic expansions using the above. */
  81. g_assert_not_reached();
  82. default:
  83. break;
  84. }
  85. #endif
  86. if (tcg_can_emit_vec_op(opc, type, vece)) {
  87. continue;
  88. }
  89. /*
  90. * The opcode list is created by front ends based on what they
  91. * actually invoke. We must mirror the logic in the routines
  92. * below for generic expansions using other opcodes.
  93. */
  94. switch (opc) {
  95. case INDEX_op_neg_vec:
  96. if (tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece)) {
  97. continue;
  98. }
  99. break;
  100. case INDEX_op_abs_vec:
  101. if (tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece)
  102. && (tcg_can_emit_vec_op(INDEX_op_smax_vec, type, vece) > 0
  103. || tcg_can_emit_vec_op(INDEX_op_sari_vec, type, vece) > 0
  104. || tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece))) {
  105. continue;
  106. }
  107. break;
  108. case INDEX_op_usadd_vec:
  109. if (tcg_can_emit_vec_op(INDEX_op_umin_vec, type, vece) ||
  110. tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) {
  111. continue;
  112. }
  113. break;
  114. case INDEX_op_ussub_vec:
  115. if (tcg_can_emit_vec_op(INDEX_op_umax_vec, type, vece) ||
  116. tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) {
  117. continue;
  118. }
  119. break;
  120. case INDEX_op_cmpsel_vec:
  121. case INDEX_op_smin_vec:
  122. case INDEX_op_smax_vec:
  123. case INDEX_op_umin_vec:
  124. case INDEX_op_umax_vec:
  125. if (tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) {
  126. continue;
  127. }
  128. break;
  129. default:
  130. break;
  131. }
  132. return false;
  133. }
  134. return true;
  135. }
  136. void vec_gen_2(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, TCGArg a)
  137. {
  138. TCGOp *op = tcg_emit_op(opc, 2);
  139. TCGOP_TYPE(op) = type;
  140. TCGOP_VECE(op) = vece;
  141. op->args[0] = r;
  142. op->args[1] = a;
  143. }
  144. void vec_gen_3(TCGOpcode opc, TCGType type, unsigned vece,
  145. TCGArg r, TCGArg a, TCGArg b)
  146. {
  147. TCGOp *op = tcg_emit_op(opc, 3);
  148. TCGOP_TYPE(op) = type;
  149. TCGOP_VECE(op) = vece;
  150. op->args[0] = r;
  151. op->args[1] = a;
  152. op->args[2] = b;
  153. }
  154. void vec_gen_4(TCGOpcode opc, TCGType type, unsigned vece,
  155. TCGArg r, TCGArg a, TCGArg b, TCGArg c)
  156. {
  157. TCGOp *op = tcg_emit_op(opc, 4);
  158. TCGOP_TYPE(op) = type;
  159. TCGOP_VECE(op) = vece;
  160. op->args[0] = r;
  161. op->args[1] = a;
  162. op->args[2] = b;
  163. op->args[3] = c;
  164. }
  165. void vec_gen_6(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r,
  166. TCGArg a, TCGArg b, TCGArg c, TCGArg d, TCGArg e)
  167. {
  168. TCGOp *op = tcg_emit_op(opc, 6);
  169. TCGOP_TYPE(op) = type;
  170. TCGOP_VECE(op) = vece;
  171. op->args[0] = r;
  172. op->args[1] = a;
  173. op->args[2] = b;
  174. op->args[3] = c;
  175. op->args[4] = d;
  176. op->args[5] = e;
  177. }
  178. static void vec_gen_op2(TCGOpcode opc, unsigned vece, TCGv_vec r, TCGv_vec a)
  179. {
  180. TCGTemp *rt = tcgv_vec_temp(r);
  181. TCGTemp *at = tcgv_vec_temp(a);
  182. TCGType type = rt->base_type;
  183. /* Must enough inputs for the output. */
  184. tcg_debug_assert(at->base_type >= type);
  185. vec_gen_2(opc, type, vece, temp_arg(rt), temp_arg(at));
  186. }
  187. static void vec_gen_op3(TCGOpcode opc, unsigned vece,
  188. TCGv_vec r, TCGv_vec a, TCGv_vec b)
  189. {
  190. TCGTemp *rt = tcgv_vec_temp(r);
  191. TCGTemp *at = tcgv_vec_temp(a);
  192. TCGTemp *bt = tcgv_vec_temp(b);
  193. TCGType type = rt->base_type;
  194. /* Must enough inputs for the output. */
  195. tcg_debug_assert(at->base_type >= type);
  196. tcg_debug_assert(bt->base_type >= type);
  197. vec_gen_3(opc, type, vece, temp_arg(rt), temp_arg(at), temp_arg(bt));
  198. }
  199. void tcg_gen_mov_vec(TCGv_vec r, TCGv_vec a)
  200. {
  201. if (r != a) {
  202. vec_gen_op2(INDEX_op_mov_vec, 0, r, a);
  203. }
  204. }
  205. void tcg_gen_dupi_vec(unsigned vece, TCGv_vec r, uint64_t a)
  206. {
  207. TCGTemp *rt = tcgv_vec_temp(r);
  208. tcg_gen_mov_vec(r, tcg_constant_vec(rt->base_type, vece, a));
  209. }
  210. void tcg_gen_dup_i64_vec(unsigned vece, TCGv_vec r, TCGv_i64 a)
  211. {
  212. TCGArg ri = tcgv_vec_arg(r);
  213. TCGTemp *rt = arg_temp(ri);
  214. TCGType type = rt->base_type;
  215. if (TCG_TARGET_REG_BITS == 64) {
  216. TCGArg ai = tcgv_i64_arg(a);
  217. vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai);
  218. } else if (vece == MO_64) {
  219. TCGArg al = tcgv_i32_arg(TCGV_LOW(a));
  220. TCGArg ah = tcgv_i32_arg(TCGV_HIGH(a));
  221. vec_gen_3(INDEX_op_dup2_vec, type, MO_64, ri, al, ah);
  222. } else {
  223. TCGArg ai = tcgv_i32_arg(TCGV_LOW(a));
  224. vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai);
  225. }
  226. }
  227. void tcg_gen_dup_i32_vec(unsigned vece, TCGv_vec r, TCGv_i32 a)
  228. {
  229. TCGArg ri = tcgv_vec_arg(r);
  230. TCGArg ai = tcgv_i32_arg(a);
  231. TCGTemp *rt = arg_temp(ri);
  232. TCGType type = rt->base_type;
  233. vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai);
  234. }
  235. void tcg_gen_dup_mem_vec(unsigned vece, TCGv_vec r, TCGv_ptr b,
  236. tcg_target_long ofs)
  237. {
  238. TCGArg ri = tcgv_vec_arg(r);
  239. TCGArg bi = tcgv_ptr_arg(b);
  240. TCGTemp *rt = arg_temp(ri);
  241. TCGType type = rt->base_type;
  242. vec_gen_3(INDEX_op_dupm_vec, type, vece, ri, bi, ofs);
  243. }
  244. static void vec_gen_ldst(TCGOpcode opc, TCGv_vec r, TCGv_ptr b, TCGArg o)
  245. {
  246. TCGArg ri = tcgv_vec_arg(r);
  247. TCGArg bi = tcgv_ptr_arg(b);
  248. TCGTemp *rt = arg_temp(ri);
  249. TCGType type = rt->base_type;
  250. vec_gen_3(opc, type, 0, ri, bi, o);
  251. }
  252. void tcg_gen_ld_vec(TCGv_vec r, TCGv_ptr b, TCGArg o)
  253. {
  254. vec_gen_ldst(INDEX_op_ld_vec, r, b, o);
  255. }
  256. void tcg_gen_st_vec(TCGv_vec r, TCGv_ptr b, TCGArg o)
  257. {
  258. vec_gen_ldst(INDEX_op_st_vec, r, b, o);
  259. }
  260. void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr b, TCGArg o, TCGType low_type)
  261. {
  262. TCGArg ri = tcgv_vec_arg(r);
  263. TCGArg bi = tcgv_ptr_arg(b);
  264. TCGTemp *rt = arg_temp(ri);
  265. TCGType type = rt->base_type;
  266. tcg_debug_assert(low_type >= TCG_TYPE_V64);
  267. tcg_debug_assert(low_type <= type);
  268. vec_gen_3(INDEX_op_st_vec, low_type, 0, ri, bi, o);
  269. }
  270. void tcg_gen_and_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
  271. {
  272. vec_gen_op3(INDEX_op_and_vec, 0, r, a, b);
  273. }
  274. void tcg_gen_or_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
  275. {
  276. vec_gen_op3(INDEX_op_or_vec, 0, r, a, b);
  277. }
  278. void tcg_gen_xor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
  279. {
  280. vec_gen_op3(INDEX_op_xor_vec, 0, r, a, b);
  281. }
  282. void tcg_gen_andc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
  283. {
  284. if (TCG_TARGET_HAS_andc_vec) {
  285. vec_gen_op3(INDEX_op_andc_vec, 0, r, a, b);
  286. } else {
  287. TCGv_vec t = tcg_temp_new_vec_matching(r);
  288. tcg_gen_not_vec(0, t, b);
  289. tcg_gen_and_vec(0, r, a, t);
  290. tcg_temp_free_vec(t);
  291. }
  292. }
  293. void tcg_gen_orc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
  294. {
  295. if (TCG_TARGET_HAS_orc_vec) {
  296. vec_gen_op3(INDEX_op_orc_vec, 0, r, a, b);
  297. } else {
  298. TCGv_vec t = tcg_temp_new_vec_matching(r);
  299. tcg_gen_not_vec(0, t, b);
  300. tcg_gen_or_vec(0, r, a, t);
  301. tcg_temp_free_vec(t);
  302. }
  303. }
  304. void tcg_gen_nand_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
  305. {
  306. if (TCG_TARGET_HAS_nand_vec) {
  307. vec_gen_op3(INDEX_op_nand_vec, 0, r, a, b);
  308. } else {
  309. tcg_gen_and_vec(0, r, a, b);
  310. tcg_gen_not_vec(0, r, r);
  311. }
  312. }
  313. void tcg_gen_nor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
  314. {
  315. if (TCG_TARGET_HAS_nor_vec) {
  316. vec_gen_op3(INDEX_op_nor_vec, 0, r, a, b);
  317. } else {
  318. tcg_gen_or_vec(0, r, a, b);
  319. tcg_gen_not_vec(0, r, r);
  320. }
  321. }
  322. void tcg_gen_eqv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
  323. {
  324. if (TCG_TARGET_HAS_eqv_vec) {
  325. vec_gen_op3(INDEX_op_eqv_vec, 0, r, a, b);
  326. } else {
  327. tcg_gen_xor_vec(0, r, a, b);
  328. tcg_gen_not_vec(0, r, r);
  329. }
  330. }
  331. static bool do_op2(unsigned vece, TCGv_vec r, TCGv_vec a, TCGOpcode opc)
  332. {
  333. TCGTemp *rt = tcgv_vec_temp(r);
  334. TCGTemp *at = tcgv_vec_temp(a);
  335. TCGArg ri = temp_arg(rt);
  336. TCGArg ai = temp_arg(at);
  337. TCGType type = rt->base_type;
  338. int can;
  339. tcg_debug_assert(at->base_type >= type);
  340. tcg_assert_listed_vecop(opc);
  341. can = tcg_can_emit_vec_op(opc, type, vece);
  342. if (can > 0) {
  343. vec_gen_2(opc, type, vece, ri, ai);
  344. } else if (can < 0) {
  345. const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
  346. tcg_expand_vec_op(opc, type, vece, ri, ai);
  347. tcg_swap_vecop_list(hold_list);
  348. } else {
  349. return false;
  350. }
  351. return true;
  352. }
  353. void tcg_gen_not_vec(unsigned vece, TCGv_vec r, TCGv_vec a)
  354. {
  355. if (TCG_TARGET_HAS_not_vec) {
  356. vec_gen_op2(INDEX_op_not_vec, 0, r, a);
  357. } else {
  358. tcg_gen_xor_vec(0, r, a, tcg_constant_vec_matching(r, 0, -1));
  359. }
  360. }
  361. void tcg_gen_neg_vec(unsigned vece, TCGv_vec r, TCGv_vec a)
  362. {
  363. const TCGOpcode *hold_list;
  364. tcg_assert_listed_vecop(INDEX_op_neg_vec);
  365. hold_list = tcg_swap_vecop_list(NULL);
  366. if (!TCG_TARGET_HAS_neg_vec || !do_op2(vece, r, a, INDEX_op_neg_vec)) {
  367. tcg_gen_sub_vec(vece, r, tcg_constant_vec_matching(r, vece, 0), a);
  368. }
  369. tcg_swap_vecop_list(hold_list);
  370. }
  371. void tcg_gen_abs_vec(unsigned vece, TCGv_vec r, TCGv_vec a)
  372. {
  373. const TCGOpcode *hold_list;
  374. tcg_assert_listed_vecop(INDEX_op_abs_vec);
  375. hold_list = tcg_swap_vecop_list(NULL);
  376. if (!do_op2(vece, r, a, INDEX_op_abs_vec)) {
  377. TCGType type = tcgv_vec_temp(r)->base_type;
  378. TCGv_vec t = tcg_temp_new_vec(type);
  379. tcg_debug_assert(tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece));
  380. if (tcg_can_emit_vec_op(INDEX_op_smax_vec, type, vece) > 0) {
  381. tcg_gen_neg_vec(vece, t, a);
  382. tcg_gen_smax_vec(vece, r, a, t);
  383. } else {
  384. if (tcg_can_emit_vec_op(INDEX_op_sari_vec, type, vece) > 0) {
  385. tcg_gen_sari_vec(vece, t, a, (8 << vece) - 1);
  386. } else {
  387. tcg_gen_cmp_vec(TCG_COND_LT, vece, t, a,
  388. tcg_constant_vec(type, vece, 0));
  389. }
  390. tcg_gen_xor_vec(vece, r, a, t);
  391. tcg_gen_sub_vec(vece, r, r, t);
  392. }
  393. tcg_temp_free_vec(t);
  394. }
  395. tcg_swap_vecop_list(hold_list);
  396. }
  397. static void do_shifti(TCGOpcode opc, unsigned vece,
  398. TCGv_vec r, TCGv_vec a, int64_t i)
  399. {
  400. TCGTemp *rt = tcgv_vec_temp(r);
  401. TCGTemp *at = tcgv_vec_temp(a);
  402. TCGArg ri = temp_arg(rt);
  403. TCGArg ai = temp_arg(at);
  404. TCGType type = rt->base_type;
  405. int can;
  406. tcg_debug_assert(at->base_type == type);
  407. tcg_debug_assert(i >= 0 && i < (8 << vece));
  408. tcg_assert_listed_vecop(opc);
  409. if (i == 0) {
  410. tcg_gen_mov_vec(r, a);
  411. return;
  412. }
  413. can = tcg_can_emit_vec_op(opc, type, vece);
  414. if (can > 0) {
  415. vec_gen_3(opc, type, vece, ri, ai, i);
  416. } else {
  417. /* We leave the choice of expansion via scalar or vector shift
  418. to the target. Often, but not always, dupi can feed a vector
  419. shift easier than a scalar. */
  420. const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
  421. tcg_debug_assert(can < 0);
  422. tcg_expand_vec_op(opc, type, vece, ri, ai, i);
  423. tcg_swap_vecop_list(hold_list);
  424. }
  425. }
  426. void tcg_gen_shli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
  427. {
  428. do_shifti(INDEX_op_shli_vec, vece, r, a, i);
  429. }
  430. void tcg_gen_shri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
  431. {
  432. do_shifti(INDEX_op_shri_vec, vece, r, a, i);
  433. }
  434. void tcg_gen_sari_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
  435. {
  436. do_shifti(INDEX_op_sari_vec, vece, r, a, i);
  437. }
  438. void tcg_gen_rotli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
  439. {
  440. do_shifti(INDEX_op_rotli_vec, vece, r, a, i);
  441. }
  442. void tcg_gen_rotri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
  443. {
  444. int bits = 8 << vece;
  445. tcg_debug_assert(i >= 0 && i < bits);
  446. do_shifti(INDEX_op_rotli_vec, vece, r, a, -i & (bits - 1));
  447. }
  448. void tcg_gen_cmp_vec(TCGCond cond, unsigned vece,
  449. TCGv_vec r, TCGv_vec a, TCGv_vec b)
  450. {
  451. TCGTemp *rt = tcgv_vec_temp(r);
  452. TCGTemp *at = tcgv_vec_temp(a);
  453. TCGTemp *bt = tcgv_vec_temp(b);
  454. TCGTemp *tt = NULL;
  455. TCGArg ri = temp_arg(rt);
  456. TCGArg ai = temp_arg(at);
  457. TCGArg bi = temp_arg(bt);
  458. TCGArg ti;
  459. TCGType type = rt->base_type;
  460. int can;
  461. tcg_debug_assert(at->base_type >= type);
  462. tcg_debug_assert(bt->base_type >= type);
  463. tcg_assert_listed_vecop(INDEX_op_cmp_vec);
  464. can = tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece);
  465. if (!TCG_TARGET_HAS_tst_vec && is_tst_cond(cond)) {
  466. tt = tcg_temp_new_internal(type, TEMP_EBB);
  467. ti = temp_arg(tt);
  468. vec_gen_3(INDEX_op_and_vec, type, 0, ti, ai, bi);
  469. at = tt;
  470. ai = ti;
  471. bt = tcg_constant_internal(type, 0);
  472. bi = temp_arg(bt);
  473. cond = tcg_tst_eqne_cond(cond);
  474. }
  475. if (can > 0) {
  476. vec_gen_4(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond);
  477. } else {
  478. const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
  479. tcg_debug_assert(can < 0);
  480. tcg_expand_vec_op(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond);
  481. tcg_swap_vecop_list(hold_list);
  482. }
  483. if (tt) {
  484. tcg_temp_free_internal(tt);
  485. }
  486. }
  487. static bool do_op3(unsigned vece, TCGv_vec r, TCGv_vec a,
  488. TCGv_vec b, TCGOpcode opc)
  489. {
  490. TCGTemp *rt = tcgv_vec_temp(r);
  491. TCGTemp *at = tcgv_vec_temp(a);
  492. TCGTemp *bt = tcgv_vec_temp(b);
  493. TCGArg ri = temp_arg(rt);
  494. TCGArg ai = temp_arg(at);
  495. TCGArg bi = temp_arg(bt);
  496. TCGType type = rt->base_type;
  497. int can;
  498. tcg_debug_assert(at->base_type >= type);
  499. tcg_debug_assert(bt->base_type >= type);
  500. tcg_assert_listed_vecop(opc);
  501. can = tcg_can_emit_vec_op(opc, type, vece);
  502. if (can > 0) {
  503. vec_gen_3(opc, type, vece, ri, ai, bi);
  504. } else if (can < 0) {
  505. const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
  506. tcg_expand_vec_op(opc, type, vece, ri, ai, bi);
  507. tcg_swap_vecop_list(hold_list);
  508. } else {
  509. return false;
  510. }
  511. return true;
  512. }
  513. static void do_op3_nofail(unsigned vece, TCGv_vec r, TCGv_vec a,
  514. TCGv_vec b, TCGOpcode opc)
  515. {
  516. bool ok = do_op3(vece, r, a, b, opc);
  517. tcg_debug_assert(ok);
  518. }
  519. void tcg_gen_add_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
  520. {
  521. do_op3_nofail(vece, r, a, b, INDEX_op_add_vec);
  522. }
  523. void tcg_gen_sub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
  524. {
  525. do_op3_nofail(vece, r, a, b, INDEX_op_sub_vec);
  526. }
  527. void tcg_gen_mul_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
  528. {
  529. do_op3_nofail(vece, r, a, b, INDEX_op_mul_vec);
  530. }
  531. void tcg_gen_ssadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
  532. {
  533. do_op3_nofail(vece, r, a, b, INDEX_op_ssadd_vec);
  534. }
  535. void tcg_gen_usadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
  536. {
  537. if (!do_op3(vece, r, a, b, INDEX_op_usadd_vec)) {
  538. const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
  539. TCGv_vec t = tcg_temp_new_vec_matching(r);
  540. /* usadd(a, b) = min(a, ~b) + b */
  541. tcg_gen_not_vec(vece, t, b);
  542. tcg_gen_umin_vec(vece, t, t, a);
  543. tcg_gen_add_vec(vece, r, t, b);
  544. tcg_temp_free_vec(t);
  545. tcg_swap_vecop_list(hold_list);
  546. }
  547. }
  548. void tcg_gen_sssub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
  549. {
  550. do_op3_nofail(vece, r, a, b, INDEX_op_sssub_vec);
  551. }
  552. void tcg_gen_ussub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
  553. {
  554. if (!do_op3(vece, r, a, b, INDEX_op_ussub_vec)) {
  555. const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
  556. TCGv_vec t = tcg_temp_new_vec_matching(r);
  557. /* ussub(a, b) = max(a, b) - b */
  558. tcg_gen_umax_vec(vece, t, a, b);
  559. tcg_gen_sub_vec(vece, r, t, b);
  560. tcg_temp_free_vec(t);
  561. tcg_swap_vecop_list(hold_list);
  562. }
  563. }
  564. static void do_minmax(unsigned vece, TCGv_vec r, TCGv_vec a,
  565. TCGv_vec b, TCGOpcode opc, TCGCond cond)
  566. {
  567. if (!do_op3(vece, r, a, b, opc)) {
  568. const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
  569. tcg_gen_cmpsel_vec(cond, vece, r, a, b, a, b);
  570. tcg_swap_vecop_list(hold_list);
  571. }
  572. }
  573. void tcg_gen_smin_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
  574. {
  575. do_minmax(vece, r, a, b, INDEX_op_smin_vec, TCG_COND_LT);
  576. }
  577. void tcg_gen_umin_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
  578. {
  579. do_minmax(vece, r, a, b, INDEX_op_umin_vec, TCG_COND_LTU);
  580. }
  581. void tcg_gen_smax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
  582. {
  583. do_minmax(vece, r, a, b, INDEX_op_smax_vec, TCG_COND_GT);
  584. }
  585. void tcg_gen_umax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
  586. {
  587. do_minmax(vece, r, a, b, INDEX_op_umax_vec, TCG_COND_GTU);
  588. }
  589. void tcg_gen_shlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
  590. {
  591. do_op3_nofail(vece, r, a, b, INDEX_op_shlv_vec);
  592. }
  593. void tcg_gen_shrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
  594. {
  595. do_op3_nofail(vece, r, a, b, INDEX_op_shrv_vec);
  596. }
  597. void tcg_gen_sarv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
  598. {
  599. do_op3_nofail(vece, r, a, b, INDEX_op_sarv_vec);
  600. }
  601. void tcg_gen_rotlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
  602. {
  603. do_op3_nofail(vece, r, a, b, INDEX_op_rotlv_vec);
  604. }
  605. void tcg_gen_rotrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
  606. {
  607. do_op3_nofail(vece, r, a, b, INDEX_op_rotrv_vec);
  608. }
  609. static void do_shifts(unsigned vece, TCGv_vec r, TCGv_vec a,
  610. TCGv_i32 s, TCGOpcode opc)
  611. {
  612. TCGTemp *rt = tcgv_vec_temp(r);
  613. TCGTemp *at = tcgv_vec_temp(a);
  614. TCGTemp *st = tcgv_i32_temp(s);
  615. TCGArg ri = temp_arg(rt);
  616. TCGArg ai = temp_arg(at);
  617. TCGArg si = temp_arg(st);
  618. TCGType type = rt->base_type;
  619. int can;
  620. tcg_debug_assert(at->base_type >= type);
  621. tcg_assert_listed_vecop(opc);
  622. can = tcg_can_emit_vec_op(opc, type, vece);
  623. if (can > 0) {
  624. vec_gen_3(opc, type, vece, ri, ai, si);
  625. } else if (can < 0) {
  626. const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
  627. tcg_expand_vec_op(opc, type, vece, ri, ai, si);
  628. tcg_swap_vecop_list(hold_list);
  629. } else {
  630. g_assert_not_reached();
  631. }
  632. }
  633. void tcg_gen_shls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
  634. {
  635. do_shifts(vece, r, a, b, INDEX_op_shls_vec);
  636. }
  637. void tcg_gen_shrs_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
  638. {
  639. do_shifts(vece, r, a, b, INDEX_op_shrs_vec);
  640. }
  641. void tcg_gen_sars_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
  642. {
  643. do_shifts(vece, r, a, b, INDEX_op_sars_vec);
  644. }
  645. void tcg_gen_rotls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s)
  646. {
  647. do_shifts(vece, r, a, s, INDEX_op_rotls_vec);
  648. }
  649. void tcg_gen_bitsel_vec(unsigned vece, TCGv_vec r, TCGv_vec a,
  650. TCGv_vec b, TCGv_vec c)
  651. {
  652. TCGTemp *rt = tcgv_vec_temp(r);
  653. TCGTemp *at = tcgv_vec_temp(a);
  654. TCGTemp *bt = tcgv_vec_temp(b);
  655. TCGTemp *ct = tcgv_vec_temp(c);
  656. TCGType type = rt->base_type;
  657. tcg_debug_assert(at->base_type >= type);
  658. tcg_debug_assert(bt->base_type >= type);
  659. tcg_debug_assert(ct->base_type >= type);
  660. if (TCG_TARGET_HAS_bitsel_vec) {
  661. vec_gen_4(INDEX_op_bitsel_vec, type, MO_8,
  662. temp_arg(rt), temp_arg(at), temp_arg(bt), temp_arg(ct));
  663. } else {
  664. TCGv_vec t = tcg_temp_new_vec(type);
  665. tcg_gen_and_vec(MO_8, t, a, b);
  666. tcg_gen_andc_vec(MO_8, r, c, a);
  667. tcg_gen_or_vec(MO_8, r, r, t);
  668. tcg_temp_free_vec(t);
  669. }
  670. }
  671. void tcg_gen_cmpsel_vec(TCGCond cond, unsigned vece, TCGv_vec r,
  672. TCGv_vec a, TCGv_vec b, TCGv_vec c, TCGv_vec d)
  673. {
  674. TCGTemp *rt = tcgv_vec_temp(r);
  675. TCGTemp *at = tcgv_vec_temp(a);
  676. TCGTemp *bt = tcgv_vec_temp(b);
  677. TCGTemp *ct = tcgv_vec_temp(c);
  678. TCGTemp *dt = tcgv_vec_temp(d);
  679. TCGArg ri = temp_arg(rt);
  680. TCGArg ai = temp_arg(at);
  681. TCGArg bi = temp_arg(bt);
  682. TCGArg ci = temp_arg(ct);
  683. TCGArg di = temp_arg(dt);
  684. TCGType type = rt->base_type;
  685. const TCGOpcode *hold_list;
  686. int can;
  687. tcg_debug_assert(at->base_type >= type);
  688. tcg_debug_assert(bt->base_type >= type);
  689. tcg_debug_assert(ct->base_type >= type);
  690. tcg_debug_assert(dt->base_type >= type);
  691. tcg_assert_listed_vecop(INDEX_op_cmpsel_vec);
  692. hold_list = tcg_swap_vecop_list(NULL);
  693. can = tcg_can_emit_vec_op(INDEX_op_cmpsel_vec, type, vece);
  694. if (can > 0) {
  695. vec_gen_6(INDEX_op_cmpsel_vec, type, vece, ri, ai, bi, ci, di, cond);
  696. } else if (can < 0) {
  697. tcg_expand_vec_op(INDEX_op_cmpsel_vec, type, vece,
  698. ri, ai, bi, ci, di, cond);
  699. } else {
  700. TCGv_vec t = tcg_temp_new_vec(type);
  701. tcg_gen_cmp_vec(cond, vece, t, a, b);
  702. tcg_gen_bitsel_vec(vece, r, t, c, d);
  703. tcg_temp_free_vec(t);
  704. }
  705. tcg_swap_vecop_list(hold_list);
  706. }