|
@@ -1658,6 +1658,7 @@ static void tcg_out_setcond(TCGContext *s, int rexw, TCGCond cond,
|
|
|
TCGArg dest, TCGArg arg1, TCGArg arg2,
|
|
|
int const_arg2, bool neg)
|
|
|
{
|
|
|
+ int cmp_rexw = rexw;
|
|
|
bool inv = false;
|
|
|
bool cleared;
|
|
|
int jcc;
|
|
@@ -1674,6 +1675,18 @@ static void tcg_out_setcond(TCGContext *s, int rexw, TCGCond cond,
|
|
|
}
|
|
|
break;
|
|
|
|
|
|
+ case TCG_COND_TSTNE:
|
|
|
+ inv = true;
|
|
|
+ /* fall through */
|
|
|
+ case TCG_COND_TSTEQ:
|
|
|
+ /* If arg2 is -1, convert to LTU/GEU vs 1. */
|
|
|
+ if (const_arg2 && arg2 == 0xffffffffu) {
|
|
|
+ arg2 = 1;
|
|
|
+ cmp_rexw = 0;
|
|
|
+ goto do_ltu;
|
|
|
+ }
|
|
|
+ break;
|
|
|
+
|
|
|
case TCG_COND_LEU:
|
|
|
inv = true;
|
|
|
/* fall through */
|
|
@@ -1697,7 +1710,7 @@ static void tcg_out_setcond(TCGContext *s, int rexw, TCGCond cond,
|
|
|
* We can then use NEG or INC to produce the desired result.
|
|
|
* This is always smaller than the SETCC expansion.
|
|
|
*/
|
|
|
- tcg_out_cmp(s, TCG_COND_LTU, arg1, arg2, const_arg2, rexw);
|
|
|
+ tcg_out_cmp(s, TCG_COND_LTU, arg1, arg2, const_arg2, cmp_rexw);
|
|
|
|
|
|
/* X - X - C = -C = (C ? -1 : 0) */
|
|
|
tgen_arithr(s, ARITH_SBB + (neg ? rexw : 0), dest, dest);
|
|
@@ -1744,7 +1757,7 @@ static void tcg_out_setcond(TCGContext *s, int rexw, TCGCond cond,
|
|
|
cleared = true;
|
|
|
}
|
|
|
|
|
|
- jcc = tcg_out_cmp(s, cond, arg1, arg2, const_arg2, rexw);
|
|
|
+ jcc = tcg_out_cmp(s, cond, arg1, arg2, const_arg2, cmp_rexw);
|
|
|
tcg_out_modrm(s, OPC_SETCC | jcc, 0, dest);
|
|
|
|
|
|
if (!cleared) {
|
|
@@ -3769,49 +3782,20 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-static void expand_vec_shi(TCGType type, unsigned vece, TCGOpcode opc,
|
|
|
+static void expand_vec_shi(TCGType type, unsigned vece, bool right,
|
|
|
TCGv_vec v0, TCGv_vec v1, TCGArg imm)
|
|
|
{
|
|
|
- TCGv_vec t1, t2;
|
|
|
+ uint8_t mask;
|
|
|
|
|
|
tcg_debug_assert(vece == MO_8);
|
|
|
-
|
|
|
- t1 = tcg_temp_new_vec(type);
|
|
|
- t2 = tcg_temp_new_vec(type);
|
|
|
-
|
|
|
- /*
|
|
|
- * Unpack to W, shift, and repack. Tricky bits:
|
|
|
- * (1) Use punpck*bw x,x to produce DDCCBBAA,
|
|
|
- * i.e. duplicate in other half of the 16-bit lane.
|
|
|
- * (2) For right-shift, add 8 so that the high half of the lane
|
|
|
- * becomes zero. For left-shift, and left-rotate, we must
|
|
|
- * shift up and down again.
|
|
|
- * (3) Step 2 leaves high half zero such that PACKUSWB
|
|
|
- * (pack with unsigned saturation) does not modify
|
|
|
- * the quantity.
|
|
|
- */
|
|
|
- vec_gen_3(INDEX_op_x86_punpckl_vec, type, MO_8,
|
|
|
- tcgv_vec_arg(t1), tcgv_vec_arg(v1), tcgv_vec_arg(v1));
|
|
|
- vec_gen_3(INDEX_op_x86_punpckh_vec, type, MO_8,
|
|
|
- tcgv_vec_arg(t2), tcgv_vec_arg(v1), tcgv_vec_arg(v1));
|
|
|
-
|
|
|
- if (opc != INDEX_op_rotli_vec) {
|
|
|
- imm += 8;
|
|
|
- }
|
|
|
- if (opc == INDEX_op_shri_vec) {
|
|
|
- tcg_gen_shri_vec(MO_16, t1, t1, imm);
|
|
|
- tcg_gen_shri_vec(MO_16, t2, t2, imm);
|
|
|
+ if (right) {
|
|
|
+ mask = 0xff >> imm;
|
|
|
+ tcg_gen_shri_vec(MO_16, v0, v1, imm);
|
|
|
} else {
|
|
|
- tcg_gen_shli_vec(MO_16, t1, t1, imm);
|
|
|
- tcg_gen_shli_vec(MO_16, t2, t2, imm);
|
|
|
- tcg_gen_shri_vec(MO_16, t1, t1, 8);
|
|
|
- tcg_gen_shri_vec(MO_16, t2, t2, 8);
|
|
|
+ mask = 0xff << imm;
|
|
|
+ tcg_gen_shli_vec(MO_16, v0, v1, imm);
|
|
|
}
|
|
|
-
|
|
|
- vec_gen_3(INDEX_op_x86_packus_vec, type, MO_8,
|
|
|
- tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(t2));
|
|
|
- tcg_temp_free_vec(t1);
|
|
|
- tcg_temp_free_vec(t2);
|
|
|
+ tcg_gen_and_vec(MO_8, v0, v0, tcg_constant_vec(type, MO_8, mask));
|
|
|
}
|
|
|
|
|
|
static void expand_vec_sari(TCGType type, unsigned vece,
|
|
@@ -3821,7 +3805,7 @@ static void expand_vec_sari(TCGType type, unsigned vece,
|
|
|
|
|
|
switch (vece) {
|
|
|
case MO_8:
|
|
|
- /* Unpack to W, shift, and repack, as in expand_vec_shi. */
|
|
|
+ /* Unpack to 16-bit, shift, and repack. */
|
|
|
t1 = tcg_temp_new_vec(type);
|
|
|
t2 = tcg_temp_new_vec(type);
|
|
|
vec_gen_3(INDEX_op_x86_punpckl_vec, type, MO_8,
|
|
@@ -3874,12 +3858,7 @@ static void expand_vec_rotli(TCGType type, unsigned vece,
|
|
|
{
|
|
|
TCGv_vec t;
|
|
|
|
|
|
- if (vece == MO_8) {
|
|
|
- expand_vec_shi(type, vece, INDEX_op_rotli_vec, v0, v1, imm);
|
|
|
- return;
|
|
|
- }
|
|
|
-
|
|
|
- if (have_avx512vbmi2) {
|
|
|
+ if (vece != MO_8 && have_avx512vbmi2) {
|
|
|
vec_gen_4(INDEX_op_x86_vpshldi_vec, type, vece,
|
|
|
tcgv_vec_arg(v0), tcgv_vec_arg(v1), tcgv_vec_arg(v1), imm);
|
|
|
return;
|
|
@@ -4155,10 +4134,11 @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
|
|
|
|
|
|
switch (opc) {
|
|
|
case INDEX_op_shli_vec:
|
|
|
+ expand_vec_shi(type, vece, false, v0, v1, a2);
|
|
|
+ break;
|
|
|
case INDEX_op_shri_vec:
|
|
|
- expand_vec_shi(type, vece, opc, v0, v1, a2);
|
|
|
+ expand_vec_shi(type, vece, true, v0, v1, a2);
|
|
|
break;
|
|
|
-
|
|
|
case INDEX_op_sari_vec:
|
|
|
expand_vec_sari(type, vece, v0, v1, a2);
|
|
|
break;
|