2
0

tcg-op-vec.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798
  1. /*
  2. * Tiny Code Generator for QEMU
  3. *
  4. * Copyright (c) 2018 Linaro, Inc.
  5. *
  6. * This library is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * This library is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18. */
  19. #include "qemu/osdep.h"
  20. #include "tcg/tcg.h"
  21. #include "tcg/tcg-temp-internal.h"
  22. #include "tcg/tcg-op.h"
  23. #include "tcg/tcg-mo.h"
  24. #include "tcg-internal.h"
  25. /* Reduce the number of ifdefs below. This assumes that all uses of
  26. TCGV_HIGH and TCGV_LOW are properly protected by a conditional that
  27. the compiler can eliminate. */
  28. #if TCG_TARGET_REG_BITS == 64
  29. extern TCGv_i32 TCGV_LOW_link_error(TCGv_i64);
  30. extern TCGv_i32 TCGV_HIGH_link_error(TCGv_i64);
  31. #define TCGV_LOW TCGV_LOW_link_error
  32. #define TCGV_HIGH TCGV_HIGH_link_error
  33. #endif
  34. /*
  35. * Vector optional opcode tracking.
  36. * Except for the basic logical operations (and, or, xor), and
  37. * data movement (mov, ld, st, dupi), many vector opcodes are
  38. * optional and may not be supported on the host. Thank Intel
  39. * for the irregularity in their instruction set.
  40. *
  41. * The gvec expanders allow custom vector operations to be composed,
  42. * generally via the .fniv callback in the GVecGen* structures. At
  43. * the same time, in deciding whether to use this hook we need to
  44. * know if the host supports the required operations. This is
  45. * presented as an array of opcodes, terminated by 0. Each opcode
  46. * is assumed to be expanded with the given VECE.
  47. *
  48. * For debugging, we want to validate this array. Therefore, when
  49. * tcg_ctx->vec_opt_opc is non-NULL, the tcg_gen_*_vec expanders
  50. * will validate that their opcode is present in the list.
  51. */
  52. #ifdef CONFIG_DEBUG_TCG
  53. void tcg_assert_listed_vecop(TCGOpcode op)
  54. {
  55. const TCGOpcode *p = tcg_ctx->vecop_list;
  56. if (p) {
  57. for (; *p; ++p) {
  58. if (*p == op) {
  59. return;
  60. }
  61. }
  62. g_assert_not_reached();
  63. }
  64. }
  65. #endif
  66. bool tcg_can_emit_vecop_list(const TCGOpcode *list,
  67. TCGType type, unsigned vece)
  68. {
  69. if (list == NULL) {
  70. return true;
  71. }
  72. for (; *list; ++list) {
  73. TCGOpcode opc = *list;
  74. #ifdef CONFIG_DEBUG_TCG
  75. switch (opc) {
  76. case INDEX_op_and_vec:
  77. case INDEX_op_or_vec:
  78. case INDEX_op_xor_vec:
  79. case INDEX_op_mov_vec:
  80. case INDEX_op_dup_vec:
  81. case INDEX_op_dup2_vec:
  82. case INDEX_op_ld_vec:
  83. case INDEX_op_st_vec:
  84. case INDEX_op_bitsel_vec:
  85. /* These opcodes are mandatory and should not be listed. */
  86. g_assert_not_reached();
  87. case INDEX_op_not_vec:
  88. /* These opcodes have generic expansions using the above. */
  89. g_assert_not_reached();
  90. default:
  91. break;
  92. }
  93. #endif
  94. if (tcg_can_emit_vec_op(opc, type, vece)) {
  95. continue;
  96. }
  97. /*
  98. * The opcode list is created by front ends based on what they
  99. * actually invoke. We must mirror the logic in the routines
  100. * below for generic expansions using other opcodes.
  101. */
  102. switch (opc) {
  103. case INDEX_op_neg_vec:
  104. if (tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece)) {
  105. continue;
  106. }
  107. break;
  108. case INDEX_op_abs_vec:
  109. if (tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece)
  110. && (tcg_can_emit_vec_op(INDEX_op_smax_vec, type, vece) > 0
  111. || tcg_can_emit_vec_op(INDEX_op_sari_vec, type, vece) > 0
  112. || tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece))) {
  113. continue;
  114. }
  115. break;
  116. case INDEX_op_usadd_vec:
  117. if (tcg_can_emit_vec_op(INDEX_op_umin_vec, type, vece) ||
  118. tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) {
  119. continue;
  120. }
  121. break;
  122. case INDEX_op_ussub_vec:
  123. if (tcg_can_emit_vec_op(INDEX_op_umax_vec, type, vece) ||
  124. tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) {
  125. continue;
  126. }
  127. break;
  128. case INDEX_op_cmpsel_vec:
  129. case INDEX_op_smin_vec:
  130. case INDEX_op_smax_vec:
  131. case INDEX_op_umin_vec:
  132. case INDEX_op_umax_vec:
  133. if (tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) {
  134. continue;
  135. }
  136. break;
  137. default:
  138. break;
  139. }
  140. return false;
  141. }
  142. return true;
  143. }
  144. void vec_gen_2(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, TCGArg a)
  145. {
  146. TCGOp *op = tcg_emit_op(opc, 2);
  147. TCGOP_VECL(op) = type - TCG_TYPE_V64;
  148. TCGOP_VECE(op) = vece;
  149. op->args[0] = r;
  150. op->args[1] = a;
  151. }
  152. void vec_gen_3(TCGOpcode opc, TCGType type, unsigned vece,
  153. TCGArg r, TCGArg a, TCGArg b)
  154. {
  155. TCGOp *op = tcg_emit_op(opc, 3);
  156. TCGOP_VECL(op) = type - TCG_TYPE_V64;
  157. TCGOP_VECE(op) = vece;
  158. op->args[0] = r;
  159. op->args[1] = a;
  160. op->args[2] = b;
  161. }
  162. void vec_gen_4(TCGOpcode opc, TCGType type, unsigned vece,
  163. TCGArg r, TCGArg a, TCGArg b, TCGArg c)
  164. {
  165. TCGOp *op = tcg_emit_op(opc, 4);
  166. TCGOP_VECL(op) = type - TCG_TYPE_V64;
  167. TCGOP_VECE(op) = vece;
  168. op->args[0] = r;
  169. op->args[1] = a;
  170. op->args[2] = b;
  171. op->args[3] = c;
  172. }
  173. static void vec_gen_6(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r,
  174. TCGArg a, TCGArg b, TCGArg c, TCGArg d, TCGArg e)
  175. {
  176. TCGOp *op = tcg_emit_op(opc, 6);
  177. TCGOP_VECL(op) = type - TCG_TYPE_V64;
  178. TCGOP_VECE(op) = vece;
  179. op->args[0] = r;
  180. op->args[1] = a;
  181. op->args[2] = b;
  182. op->args[3] = c;
  183. op->args[4] = d;
  184. op->args[5] = e;
  185. }
  186. static void vec_gen_op2(TCGOpcode opc, unsigned vece, TCGv_vec r, TCGv_vec a)
  187. {
  188. TCGTemp *rt = tcgv_vec_temp(r);
  189. TCGTemp *at = tcgv_vec_temp(a);
  190. TCGType type = rt->base_type;
  191. /* Must enough inputs for the output. */
  192. tcg_debug_assert(at->base_type >= type);
  193. vec_gen_2(opc, type, vece, temp_arg(rt), temp_arg(at));
  194. }
  195. static void vec_gen_op3(TCGOpcode opc, unsigned vece,
  196. TCGv_vec r, TCGv_vec a, TCGv_vec b)
  197. {
  198. TCGTemp *rt = tcgv_vec_temp(r);
  199. TCGTemp *at = tcgv_vec_temp(a);
  200. TCGTemp *bt = tcgv_vec_temp(b);
  201. TCGType type = rt->base_type;
  202. /* Must enough inputs for the output. */
  203. tcg_debug_assert(at->base_type >= type);
  204. tcg_debug_assert(bt->base_type >= type);
  205. vec_gen_3(opc, type, vece, temp_arg(rt), temp_arg(at), temp_arg(bt));
  206. }
  207. void tcg_gen_mov_vec(TCGv_vec r, TCGv_vec a)
  208. {
  209. if (r != a) {
  210. vec_gen_op2(INDEX_op_mov_vec, 0, r, a);
  211. }
  212. }
  213. void tcg_gen_dupi_vec(unsigned vece, TCGv_vec r, uint64_t a)
  214. {
  215. TCGTemp *rt = tcgv_vec_temp(r);
  216. tcg_gen_mov_vec(r, tcg_constant_vec(rt->base_type, vece, a));
  217. }
  218. void tcg_gen_dup_i64_vec(unsigned vece, TCGv_vec r, TCGv_i64 a)
  219. {
  220. TCGArg ri = tcgv_vec_arg(r);
  221. TCGTemp *rt = arg_temp(ri);
  222. TCGType type = rt->base_type;
  223. if (TCG_TARGET_REG_BITS == 64) {
  224. TCGArg ai = tcgv_i64_arg(a);
  225. vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai);
  226. } else if (vece == MO_64) {
  227. TCGArg al = tcgv_i32_arg(TCGV_LOW(a));
  228. TCGArg ah = tcgv_i32_arg(TCGV_HIGH(a));
  229. vec_gen_3(INDEX_op_dup2_vec, type, MO_64, ri, al, ah);
  230. } else {
  231. TCGArg ai = tcgv_i32_arg(TCGV_LOW(a));
  232. vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai);
  233. }
  234. }
  235. void tcg_gen_dup_i32_vec(unsigned vece, TCGv_vec r, TCGv_i32 a)
  236. {
  237. TCGArg ri = tcgv_vec_arg(r);
  238. TCGArg ai = tcgv_i32_arg(a);
  239. TCGTemp *rt = arg_temp(ri);
  240. TCGType type = rt->base_type;
  241. vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai);
  242. }
  243. void tcg_gen_dup_mem_vec(unsigned vece, TCGv_vec r, TCGv_ptr b,
  244. tcg_target_long ofs)
  245. {
  246. TCGArg ri = tcgv_vec_arg(r);
  247. TCGArg bi = tcgv_ptr_arg(b);
  248. TCGTemp *rt = arg_temp(ri);
  249. TCGType type = rt->base_type;
  250. vec_gen_3(INDEX_op_dupm_vec, type, vece, ri, bi, ofs);
  251. }
  252. static void vec_gen_ldst(TCGOpcode opc, TCGv_vec r, TCGv_ptr b, TCGArg o)
  253. {
  254. TCGArg ri = tcgv_vec_arg(r);
  255. TCGArg bi = tcgv_ptr_arg(b);
  256. TCGTemp *rt = arg_temp(ri);
  257. TCGType type = rt->base_type;
  258. vec_gen_3(opc, type, 0, ri, bi, o);
  259. }
  260. void tcg_gen_ld_vec(TCGv_vec r, TCGv_ptr b, TCGArg o)
  261. {
  262. vec_gen_ldst(INDEX_op_ld_vec, r, b, o);
  263. }
  264. void tcg_gen_st_vec(TCGv_vec r, TCGv_ptr b, TCGArg o)
  265. {
  266. vec_gen_ldst(INDEX_op_st_vec, r, b, o);
  267. }
  268. void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr b, TCGArg o, TCGType low_type)
  269. {
  270. TCGArg ri = tcgv_vec_arg(r);
  271. TCGArg bi = tcgv_ptr_arg(b);
  272. TCGTemp *rt = arg_temp(ri);
  273. TCGType type = rt->base_type;
  274. tcg_debug_assert(low_type >= TCG_TYPE_V64);
  275. tcg_debug_assert(low_type <= type);
  276. vec_gen_3(INDEX_op_st_vec, low_type, 0, ri, bi, o);
  277. }
  278. void tcg_gen_and_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
  279. {
  280. vec_gen_op3(INDEX_op_and_vec, 0, r, a, b);
  281. }
  282. void tcg_gen_or_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
  283. {
  284. vec_gen_op3(INDEX_op_or_vec, 0, r, a, b);
  285. }
  286. void tcg_gen_xor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
  287. {
  288. vec_gen_op3(INDEX_op_xor_vec, 0, r, a, b);
  289. }
  290. void tcg_gen_andc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
  291. {
  292. if (TCG_TARGET_HAS_andc_vec) {
  293. vec_gen_op3(INDEX_op_andc_vec, 0, r, a, b);
  294. } else {
  295. TCGv_vec t = tcg_temp_new_vec_matching(r);
  296. tcg_gen_not_vec(0, t, b);
  297. tcg_gen_and_vec(0, r, a, t);
  298. tcg_temp_free_vec(t);
  299. }
  300. }
  301. void tcg_gen_orc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
  302. {
  303. if (TCG_TARGET_HAS_orc_vec) {
  304. vec_gen_op3(INDEX_op_orc_vec, 0, r, a, b);
  305. } else {
  306. TCGv_vec t = tcg_temp_new_vec_matching(r);
  307. tcg_gen_not_vec(0, t, b);
  308. tcg_gen_or_vec(0, r, a, t);
  309. tcg_temp_free_vec(t);
  310. }
  311. }
  312. void tcg_gen_nand_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
  313. {
  314. if (TCG_TARGET_HAS_nand_vec) {
  315. vec_gen_op3(INDEX_op_nand_vec, 0, r, a, b);
  316. } else {
  317. tcg_gen_and_vec(0, r, a, b);
  318. tcg_gen_not_vec(0, r, r);
  319. }
  320. }
  321. void tcg_gen_nor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
  322. {
  323. if (TCG_TARGET_HAS_nor_vec) {
  324. vec_gen_op3(INDEX_op_nor_vec, 0, r, a, b);
  325. } else {
  326. tcg_gen_or_vec(0, r, a, b);
  327. tcg_gen_not_vec(0, r, r);
  328. }
  329. }
  330. void tcg_gen_eqv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
  331. {
  332. if (TCG_TARGET_HAS_eqv_vec) {
  333. vec_gen_op3(INDEX_op_eqv_vec, 0, r, a, b);
  334. } else {
  335. tcg_gen_xor_vec(0, r, a, b);
  336. tcg_gen_not_vec(0, r, r);
  337. }
  338. }
  339. static bool do_op2(unsigned vece, TCGv_vec r, TCGv_vec a, TCGOpcode opc)
  340. {
  341. TCGTemp *rt = tcgv_vec_temp(r);
  342. TCGTemp *at = tcgv_vec_temp(a);
  343. TCGArg ri = temp_arg(rt);
  344. TCGArg ai = temp_arg(at);
  345. TCGType type = rt->base_type;
  346. int can;
  347. tcg_debug_assert(at->base_type >= type);
  348. tcg_assert_listed_vecop(opc);
  349. can = tcg_can_emit_vec_op(opc, type, vece);
  350. if (can > 0) {
  351. vec_gen_2(opc, type, vece, ri, ai);
  352. } else if (can < 0) {
  353. const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
  354. tcg_expand_vec_op(opc, type, vece, ri, ai);
  355. tcg_swap_vecop_list(hold_list);
  356. } else {
  357. return false;
  358. }
  359. return true;
  360. }
  361. void tcg_gen_not_vec(unsigned vece, TCGv_vec r, TCGv_vec a)
  362. {
  363. const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
  364. if (!TCG_TARGET_HAS_not_vec || !do_op2(vece, r, a, INDEX_op_not_vec)) {
  365. tcg_gen_xor_vec(0, r, a, tcg_constant_vec_matching(r, 0, -1));
  366. }
  367. tcg_swap_vecop_list(hold_list);
  368. }
  369. void tcg_gen_neg_vec(unsigned vece, TCGv_vec r, TCGv_vec a)
  370. {
  371. const TCGOpcode *hold_list;
  372. tcg_assert_listed_vecop(INDEX_op_neg_vec);
  373. hold_list = tcg_swap_vecop_list(NULL);
  374. if (!TCG_TARGET_HAS_neg_vec || !do_op2(vece, r, a, INDEX_op_neg_vec)) {
  375. tcg_gen_sub_vec(vece, r, tcg_constant_vec_matching(r, vece, 0), a);
  376. }
  377. tcg_swap_vecop_list(hold_list);
  378. }
  379. void tcg_gen_abs_vec(unsigned vece, TCGv_vec r, TCGv_vec a)
  380. {
  381. const TCGOpcode *hold_list;
  382. tcg_assert_listed_vecop(INDEX_op_abs_vec);
  383. hold_list = tcg_swap_vecop_list(NULL);
  384. if (!do_op2(vece, r, a, INDEX_op_abs_vec)) {
  385. TCGType type = tcgv_vec_temp(r)->base_type;
  386. TCGv_vec t = tcg_temp_new_vec(type);
  387. tcg_debug_assert(tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece));
  388. if (tcg_can_emit_vec_op(INDEX_op_smax_vec, type, vece) > 0) {
  389. tcg_gen_neg_vec(vece, t, a);
  390. tcg_gen_smax_vec(vece, r, a, t);
  391. } else {
  392. if (tcg_can_emit_vec_op(INDEX_op_sari_vec, type, vece) > 0) {
  393. tcg_gen_sari_vec(vece, t, a, (8 << vece) - 1);
  394. } else {
  395. tcg_gen_cmp_vec(TCG_COND_LT, vece, t, a,
  396. tcg_constant_vec(type, vece, 0));
  397. }
  398. tcg_gen_xor_vec(vece, r, a, t);
  399. tcg_gen_sub_vec(vece, r, r, t);
  400. }
  401. tcg_temp_free_vec(t);
  402. }
  403. tcg_swap_vecop_list(hold_list);
  404. }
  405. static void do_shifti(TCGOpcode opc, unsigned vece,
  406. TCGv_vec r, TCGv_vec a, int64_t i)
  407. {
  408. TCGTemp *rt = tcgv_vec_temp(r);
  409. TCGTemp *at = tcgv_vec_temp(a);
  410. TCGArg ri = temp_arg(rt);
  411. TCGArg ai = temp_arg(at);
  412. TCGType type = rt->base_type;
  413. int can;
  414. tcg_debug_assert(at->base_type == type);
  415. tcg_debug_assert(i >= 0 && i < (8 << vece));
  416. tcg_assert_listed_vecop(opc);
  417. if (i == 0) {
  418. tcg_gen_mov_vec(r, a);
  419. return;
  420. }
  421. can = tcg_can_emit_vec_op(opc, type, vece);
  422. if (can > 0) {
  423. vec_gen_3(opc, type, vece, ri, ai, i);
  424. } else {
  425. /* We leave the choice of expansion via scalar or vector shift
  426. to the target. Often, but not always, dupi can feed a vector
  427. shift easier than a scalar. */
  428. const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
  429. tcg_debug_assert(can < 0);
  430. tcg_expand_vec_op(opc, type, vece, ri, ai, i);
  431. tcg_swap_vecop_list(hold_list);
  432. }
  433. }
  434. void tcg_gen_shli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
  435. {
  436. do_shifti(INDEX_op_shli_vec, vece, r, a, i);
  437. }
  438. void tcg_gen_shri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
  439. {
  440. do_shifti(INDEX_op_shri_vec, vece, r, a, i);
  441. }
  442. void tcg_gen_sari_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
  443. {
  444. do_shifti(INDEX_op_sari_vec, vece, r, a, i);
  445. }
  446. void tcg_gen_rotli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
  447. {
  448. do_shifti(INDEX_op_rotli_vec, vece, r, a, i);
  449. }
  450. void tcg_gen_rotri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
  451. {
  452. int bits = 8 << vece;
  453. tcg_debug_assert(i >= 0 && i < bits);
  454. do_shifti(INDEX_op_rotli_vec, vece, r, a, -i & (bits - 1));
  455. }
  456. void tcg_gen_cmp_vec(TCGCond cond, unsigned vece,
  457. TCGv_vec r, TCGv_vec a, TCGv_vec b)
  458. {
  459. TCGTemp *rt = tcgv_vec_temp(r);
  460. TCGTemp *at = tcgv_vec_temp(a);
  461. TCGTemp *bt = tcgv_vec_temp(b);
  462. TCGArg ri = temp_arg(rt);
  463. TCGArg ai = temp_arg(at);
  464. TCGArg bi = temp_arg(bt);
  465. TCGType type = rt->base_type;
  466. int can;
  467. tcg_debug_assert(at->base_type >= type);
  468. tcg_debug_assert(bt->base_type >= type);
  469. tcg_assert_listed_vecop(INDEX_op_cmp_vec);
  470. can = tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece);
  471. if (can > 0) {
  472. vec_gen_4(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond);
  473. } else {
  474. const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
  475. tcg_debug_assert(can < 0);
  476. tcg_expand_vec_op(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond);
  477. tcg_swap_vecop_list(hold_list);
  478. }
  479. }
  480. static bool do_op3(unsigned vece, TCGv_vec r, TCGv_vec a,
  481. TCGv_vec b, TCGOpcode opc)
  482. {
  483. TCGTemp *rt = tcgv_vec_temp(r);
  484. TCGTemp *at = tcgv_vec_temp(a);
  485. TCGTemp *bt = tcgv_vec_temp(b);
  486. TCGArg ri = temp_arg(rt);
  487. TCGArg ai = temp_arg(at);
  488. TCGArg bi = temp_arg(bt);
  489. TCGType type = rt->base_type;
  490. int can;
  491. tcg_debug_assert(at->base_type >= type);
  492. tcg_debug_assert(bt->base_type >= type);
  493. tcg_assert_listed_vecop(opc);
  494. can = tcg_can_emit_vec_op(opc, type, vece);
  495. if (can > 0) {
  496. vec_gen_3(opc, type, vece, ri, ai, bi);
  497. } else if (can < 0) {
  498. const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
  499. tcg_expand_vec_op(opc, type, vece, ri, ai, bi);
  500. tcg_swap_vecop_list(hold_list);
  501. } else {
  502. return false;
  503. }
  504. return true;
  505. }
  506. static void do_op3_nofail(unsigned vece, TCGv_vec r, TCGv_vec a,
  507. TCGv_vec b, TCGOpcode opc)
  508. {
  509. bool ok = do_op3(vece, r, a, b, opc);
  510. tcg_debug_assert(ok);
  511. }
  512. void tcg_gen_add_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
  513. {
  514. do_op3_nofail(vece, r, a, b, INDEX_op_add_vec);
  515. }
  516. void tcg_gen_sub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
  517. {
  518. do_op3_nofail(vece, r, a, b, INDEX_op_sub_vec);
  519. }
  520. void tcg_gen_mul_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
  521. {
  522. do_op3_nofail(vece, r, a, b, INDEX_op_mul_vec);
  523. }
  524. void tcg_gen_ssadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
  525. {
  526. do_op3_nofail(vece, r, a, b, INDEX_op_ssadd_vec);
  527. }
  528. void tcg_gen_usadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
  529. {
  530. if (!do_op3(vece, r, a, b, INDEX_op_usadd_vec)) {
  531. const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
  532. TCGv_vec t = tcg_temp_new_vec_matching(r);
  533. /* usadd(a, b) = min(a, ~b) + b */
  534. tcg_gen_not_vec(vece, t, b);
  535. tcg_gen_umin_vec(vece, t, t, a);
  536. tcg_gen_add_vec(vece, r, t, b);
  537. tcg_temp_free_vec(t);
  538. tcg_swap_vecop_list(hold_list);
  539. }
  540. }
  541. void tcg_gen_sssub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
  542. {
  543. do_op3_nofail(vece, r, a, b, INDEX_op_sssub_vec);
  544. }
  545. void tcg_gen_ussub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
  546. {
  547. if (!do_op3(vece, r, a, b, INDEX_op_ussub_vec)) {
  548. const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
  549. TCGv_vec t = tcg_temp_new_vec_matching(r);
  550. /* ussub(a, b) = max(a, b) - b */
  551. tcg_gen_umax_vec(vece, t, a, b);
  552. tcg_gen_sub_vec(vece, r, t, b);
  553. tcg_temp_free_vec(t);
  554. tcg_swap_vecop_list(hold_list);
  555. }
  556. }
  557. static void do_minmax(unsigned vece, TCGv_vec r, TCGv_vec a,
  558. TCGv_vec b, TCGOpcode opc, TCGCond cond)
  559. {
  560. if (!do_op3(vece, r, a, b, opc)) {
  561. const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
  562. tcg_gen_cmpsel_vec(cond, vece, r, a, b, a, b);
  563. tcg_swap_vecop_list(hold_list);
  564. }
  565. }
  566. void tcg_gen_smin_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
  567. {
  568. do_minmax(vece, r, a, b, INDEX_op_smin_vec, TCG_COND_LT);
  569. }
  570. void tcg_gen_umin_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
  571. {
  572. do_minmax(vece, r, a, b, INDEX_op_umin_vec, TCG_COND_LTU);
  573. }
  574. void tcg_gen_smax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
  575. {
  576. do_minmax(vece, r, a, b, INDEX_op_smax_vec, TCG_COND_GT);
  577. }
  578. void tcg_gen_umax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
  579. {
  580. do_minmax(vece, r, a, b, INDEX_op_umax_vec, TCG_COND_GTU);
  581. }
  582. void tcg_gen_shlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
  583. {
  584. do_op3_nofail(vece, r, a, b, INDEX_op_shlv_vec);
  585. }
  586. void tcg_gen_shrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
  587. {
  588. do_op3_nofail(vece, r, a, b, INDEX_op_shrv_vec);
  589. }
  590. void tcg_gen_sarv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
  591. {
  592. do_op3_nofail(vece, r, a, b, INDEX_op_sarv_vec);
  593. }
  594. void tcg_gen_rotlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
  595. {
  596. do_op3_nofail(vece, r, a, b, INDEX_op_rotlv_vec);
  597. }
  598. void tcg_gen_rotrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
  599. {
  600. do_op3_nofail(vece, r, a, b, INDEX_op_rotrv_vec);
  601. }
  602. static void do_shifts(unsigned vece, TCGv_vec r, TCGv_vec a,
  603. TCGv_i32 s, TCGOpcode opc)
  604. {
  605. TCGTemp *rt = tcgv_vec_temp(r);
  606. TCGTemp *at = tcgv_vec_temp(a);
  607. TCGTemp *st = tcgv_i32_temp(s);
  608. TCGArg ri = temp_arg(rt);
  609. TCGArg ai = temp_arg(at);
  610. TCGArg si = temp_arg(st);
  611. TCGType type = rt->base_type;
  612. int can;
  613. tcg_debug_assert(at->base_type >= type);
  614. tcg_assert_listed_vecop(opc);
  615. can = tcg_can_emit_vec_op(opc, type, vece);
  616. if (can > 0) {
  617. vec_gen_3(opc, type, vece, ri, ai, si);
  618. } else if (can < 0) {
  619. const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
  620. tcg_expand_vec_op(opc, type, vece, ri, ai, si);
  621. tcg_swap_vecop_list(hold_list);
  622. } else {
  623. g_assert_not_reached();
  624. }
  625. }
  626. void tcg_gen_shls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
  627. {
  628. do_shifts(vece, r, a, b, INDEX_op_shls_vec);
  629. }
  630. void tcg_gen_shrs_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
  631. {
  632. do_shifts(vece, r, a, b, INDEX_op_shrs_vec);
  633. }
  634. void tcg_gen_sars_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
  635. {
  636. do_shifts(vece, r, a, b, INDEX_op_sars_vec);
  637. }
  638. void tcg_gen_rotls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s)
  639. {
  640. do_shifts(vece, r, a, s, INDEX_op_rotls_vec);
  641. }
  642. void tcg_gen_bitsel_vec(unsigned vece, TCGv_vec r, TCGv_vec a,
  643. TCGv_vec b, TCGv_vec c)
  644. {
  645. TCGTemp *rt = tcgv_vec_temp(r);
  646. TCGTemp *at = tcgv_vec_temp(a);
  647. TCGTemp *bt = tcgv_vec_temp(b);
  648. TCGTemp *ct = tcgv_vec_temp(c);
  649. TCGType type = rt->base_type;
  650. tcg_debug_assert(at->base_type >= type);
  651. tcg_debug_assert(bt->base_type >= type);
  652. tcg_debug_assert(ct->base_type >= type);
  653. if (TCG_TARGET_HAS_bitsel_vec) {
  654. vec_gen_4(INDEX_op_bitsel_vec, type, MO_8,
  655. temp_arg(rt), temp_arg(at), temp_arg(bt), temp_arg(ct));
  656. } else {
  657. TCGv_vec t = tcg_temp_new_vec(type);
  658. tcg_gen_and_vec(MO_8, t, a, b);
  659. tcg_gen_andc_vec(MO_8, r, c, a);
  660. tcg_gen_or_vec(MO_8, r, r, t);
  661. tcg_temp_free_vec(t);
  662. }
  663. }
  664. void tcg_gen_cmpsel_vec(TCGCond cond, unsigned vece, TCGv_vec r,
  665. TCGv_vec a, TCGv_vec b, TCGv_vec c, TCGv_vec d)
  666. {
  667. TCGTemp *rt = tcgv_vec_temp(r);
  668. TCGTemp *at = tcgv_vec_temp(a);
  669. TCGTemp *bt = tcgv_vec_temp(b);
  670. TCGTemp *ct = tcgv_vec_temp(c);
  671. TCGTemp *dt = tcgv_vec_temp(d);
  672. TCGArg ri = temp_arg(rt);
  673. TCGArg ai = temp_arg(at);
  674. TCGArg bi = temp_arg(bt);
  675. TCGArg ci = temp_arg(ct);
  676. TCGArg di = temp_arg(dt);
  677. TCGType type = rt->base_type;
  678. const TCGOpcode *hold_list;
  679. int can;
  680. tcg_debug_assert(at->base_type >= type);
  681. tcg_debug_assert(bt->base_type >= type);
  682. tcg_debug_assert(ct->base_type >= type);
  683. tcg_debug_assert(dt->base_type >= type);
  684. tcg_assert_listed_vecop(INDEX_op_cmpsel_vec);
  685. hold_list = tcg_swap_vecop_list(NULL);
  686. can = tcg_can_emit_vec_op(INDEX_op_cmpsel_vec, type, vece);
  687. if (can > 0) {
  688. vec_gen_6(INDEX_op_cmpsel_vec, type, vece, ri, ai, bi, ci, di, cond);
  689. } else if (can < 0) {
  690. tcg_expand_vec_op(INDEX_op_cmpsel_vec, type, vece,
  691. ri, ai, bi, ci, di, cond);
  692. } else {
  693. TCGv_vec t = tcg_temp_new_vec(type);
  694. tcg_gen_cmp_vec(cond, vece, t, a, b);
  695. tcg_gen_bitsel_vec(vece, r, t, c, d);
  696. tcg_temp_free_vec(t);
  697. }
  698. tcg_swap_vecop_list(hold_list);
  699. }