|
@@ -84,6 +84,7 @@ typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
|
|
|
typedef void CryptoTwoOpFn(TCGv_ptr, TCGv_ptr);
|
|
|
typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
|
|
|
typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
|
|
|
+typedef void AtomicThreeOpFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGArg, TCGMemOp);
|
|
|
|
|
|
/* Note that the gvec expanders operate on offsets + sizes. */
|
|
|
typedef void GVecGen2Fn(unsigned, uint32_t, uint32_t, uint32_t, uint32_t);
|
|
@@ -2113,6 +2114,103 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
|
|
|
tcg_gen_movi_i64(cpu_exclusive_addr, -1);
|
|
|
}
|
|
|
|
|
|
+static void gen_compare_and_swap(DisasContext *s, int rs, int rt,
|
|
|
+ int rn, int size)
|
|
|
+{
|
|
|
+ TCGv_i64 tcg_rs = cpu_reg(s, rs);
|
|
|
+ TCGv_i64 tcg_rt = cpu_reg(s, rt);
|
|
|
+ int memidx = get_mem_index(s);
|
|
|
+ TCGv_i64 addr = cpu_reg_sp(s, rn);
|
|
|
+
|
|
|
+ if (rn == 31) {
|
|
|
+ gen_check_sp_alignment(s);
|
|
|
+ }
|
|
|
+ tcg_gen_atomic_cmpxchg_i64(tcg_rs, addr, tcg_rs, tcg_rt, memidx,
|
|
|
+ size | MO_ALIGN | s->be_data);
|
|
|
+}
|
|
|
+
|
|
|
+static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
|
|
|
+ int rn, int size)
|
|
|
+{
|
|
|
+ TCGv_i64 s1 = cpu_reg(s, rs);
|
|
|
+ TCGv_i64 s2 = cpu_reg(s, rs + 1);
|
|
|
+ TCGv_i64 t1 = cpu_reg(s, rt);
|
|
|
+ TCGv_i64 t2 = cpu_reg(s, rt + 1);
|
|
|
+ TCGv_i64 addr = cpu_reg_sp(s, rn);
|
|
|
+ int memidx = get_mem_index(s);
|
|
|
+
|
|
|
+ if (rn == 31) {
|
|
|
+ gen_check_sp_alignment(s);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (size == 2) {
|
|
|
+ TCGv_i64 cmp = tcg_temp_new_i64();
|
|
|
+ TCGv_i64 val = tcg_temp_new_i64();
|
|
|
+
|
|
|
+ if (s->be_data == MO_LE) {
|
|
|
+ tcg_gen_concat32_i64(val, t1, t2);
|
|
|
+ tcg_gen_concat32_i64(cmp, s1, s2);
|
|
|
+ } else {
|
|
|
+ tcg_gen_concat32_i64(val, t2, t1);
|
|
|
+ tcg_gen_concat32_i64(cmp, s2, s1);
|
|
|
+ }
|
|
|
+
|
|
|
+ tcg_gen_atomic_cmpxchg_i64(cmp, addr, cmp, val, memidx,
|
|
|
+ MO_64 | MO_ALIGN | s->be_data);
|
|
|
+ tcg_temp_free_i64(val);
|
|
|
+
|
|
|
+ if (s->be_data == MO_LE) {
|
|
|
+ tcg_gen_extr32_i64(s1, s2, cmp);
|
|
|
+ } else {
|
|
|
+ tcg_gen_extr32_i64(s2, s1, cmp);
|
|
|
+ }
|
|
|
+ tcg_temp_free_i64(cmp);
|
|
|
+ } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
|
|
|
+ TCGv_i32 tcg_rs = tcg_const_i32(rs);
|
|
|
+
|
|
|
+ if (s->be_data == MO_LE) {
|
|
|
+ gen_helper_casp_le_parallel(cpu_env, tcg_rs, addr, t1, t2);
|
|
|
+ } else {
|
|
|
+ gen_helper_casp_be_parallel(cpu_env, tcg_rs, addr, t1, t2);
|
|
|
+ }
|
|
|
+ tcg_temp_free_i32(tcg_rs);
|
|
|
+ } else {
|
|
|
+ TCGv_i64 d1 = tcg_temp_new_i64();
|
|
|
+ TCGv_i64 d2 = tcg_temp_new_i64();
|
|
|
+ TCGv_i64 a2 = tcg_temp_new_i64();
|
|
|
+ TCGv_i64 c1 = tcg_temp_new_i64();
|
|
|
+ TCGv_i64 c2 = tcg_temp_new_i64();
|
|
|
+ TCGv_i64 zero = tcg_const_i64(0);
|
|
|
+
|
|
|
+ /* Load the two words, in memory order. */
|
|
|
+ tcg_gen_qemu_ld_i64(d1, addr, memidx,
|
|
|
+ MO_64 | MO_ALIGN_16 | s->be_data);
|
|
|
+ tcg_gen_addi_i64(a2, addr, 8);
|
|
|
+ tcg_gen_qemu_ld_i64(d2, addr, memidx, MO_64 | s->be_data);
|
|
|
+
|
|
|
+ /* Compare the two words, also in memory order. */
|
|
|
+ tcg_gen_setcond_i64(TCG_COND_EQ, c1, d1, s1);
|
|
|
+ tcg_gen_setcond_i64(TCG_COND_EQ, c2, d2, s2);
|
|
|
+ tcg_gen_and_i64(c2, c2, c1);
|
|
|
+
|
|
|
+ /* If compare equal, write back new data, else write back old data. */
|
|
|
+ tcg_gen_movcond_i64(TCG_COND_NE, c1, c2, zero, t1, d1);
|
|
|
+ tcg_gen_movcond_i64(TCG_COND_NE, c2, c2, zero, t2, d2);
|
|
|
+ tcg_gen_qemu_st_i64(c1, addr, memidx, MO_64 | s->be_data);
|
|
|
+ tcg_gen_qemu_st_i64(c2, a2, memidx, MO_64 | s->be_data);
|
|
|
+ tcg_temp_free_i64(a2);
|
|
|
+ tcg_temp_free_i64(c1);
|
|
|
+ tcg_temp_free_i64(c2);
|
|
|
+ tcg_temp_free_i64(zero);
|
|
|
+
|
|
|
+ /* Write back the data from memory to Rs. */
|
|
|
+ tcg_gen_mov_i64(s1, d1);
|
|
|
+ tcg_gen_mov_i64(s2, d2);
|
|
|
+ tcg_temp_free_i64(d1);
|
|
|
+ tcg_temp_free_i64(d2);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
/* Update the Sixty-Four bit (SF) registersize. This logic is derived
|
|
|
* from the ARMv8 specs for LDR (Shared decode for all encodings).
|
|
|
*/
|
|
@@ -2147,62 +2245,114 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn)
|
|
|
int rt = extract32(insn, 0, 5);
|
|
|
int rn = extract32(insn, 5, 5);
|
|
|
int rt2 = extract32(insn, 10, 5);
|
|
|
- int is_lasr = extract32(insn, 15, 1);
|
|
|
int rs = extract32(insn, 16, 5);
|
|
|
- int is_pair = extract32(insn, 21, 1);
|
|
|
- int is_store = !extract32(insn, 22, 1);
|
|
|
- int is_excl = !extract32(insn, 23, 1);
|
|
|
+ int is_lasr = extract32(insn, 15, 1);
|
|
|
+ int o2_L_o1_o0 = extract32(insn, 21, 3) * 2 | is_lasr;
|
|
|
int size = extract32(insn, 30, 2);
|
|
|
TCGv_i64 tcg_addr;
|
|
|
|
|
|
- if ((!is_excl && !is_pair && !is_lasr) ||
|
|
|
- (!is_excl && is_pair) ||
|
|
|
- (is_pair && size < 2)) {
|
|
|
- unallocated_encoding(s);
|
|
|
+ switch (o2_L_o1_o0) {
|
|
|
+ case 0x0: /* STXR */
|
|
|
+ case 0x1: /* STLXR */
|
|
|
+ if (rn == 31) {
|
|
|
+ gen_check_sp_alignment(s);
|
|
|
+ }
|
|
|
+ if (is_lasr) {
|
|
|
+ tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
|
|
|
+ }
|
|
|
+ tcg_addr = read_cpu_reg_sp(s, rn, 1);
|
|
|
+ gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, false);
|
|
|
return;
|
|
|
- }
|
|
|
|
|
|
- if (rn == 31) {
|
|
|
- gen_check_sp_alignment(s);
|
|
|
- }
|
|
|
- tcg_addr = read_cpu_reg_sp(s, rn, 1);
|
|
|
+ case 0x4: /* LDXR */
|
|
|
+ case 0x5: /* LDAXR */
|
|
|
+ if (rn == 31) {
|
|
|
+ gen_check_sp_alignment(s);
|
|
|
+ }
|
|
|
+ tcg_addr = read_cpu_reg_sp(s, rn, 1);
|
|
|
+ s->is_ldex = true;
|
|
|
+ gen_load_exclusive(s, rt, rt2, tcg_addr, size, false);
|
|
|
+ if (is_lasr) {
|
|
|
+ tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
|
|
|
+ }
|
|
|
+ return;
|
|
|
|
|
|
- /* Note that since TCG is single threaded load-acquire/store-release
|
|
|
- * semantics require no extra if (is_lasr) { ... } handling.
|
|
|
- */
|
|
|
+ case 0x9: /* STLR */
|
|
|
+ /* Generate ISS for non-exclusive accesses including LASR. */
|
|
|
+ if (rn == 31) {
|
|
|
+ gen_check_sp_alignment(s);
|
|
|
+ }
|
|
|
+ tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
|
|
|
+ tcg_addr = read_cpu_reg_sp(s, rn, 1);
|
|
|
+ do_gpr_st(s, cpu_reg(s, rt), tcg_addr, size, true, rt,
|
|
|
+ disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
|
|
|
+ return;
|
|
|
|
|
|
- if (is_excl) {
|
|
|
- if (!is_store) {
|
|
|
- s->is_ldex = true;
|
|
|
- gen_load_exclusive(s, rt, rt2, tcg_addr, size, is_pair);
|
|
|
- if (is_lasr) {
|
|
|
- tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
|
|
|
+ case 0xd: /* LDAR */
|
|
|
+ /* Generate ISS for non-exclusive accesses including LASR. */
|
|
|
+ if (rn == 31) {
|
|
|
+ gen_check_sp_alignment(s);
|
|
|
+ }
|
|
|
+ tcg_addr = read_cpu_reg_sp(s, rn, 1);
|
|
|
+ do_gpr_ld(s, cpu_reg(s, rt), tcg_addr, size, false, false, true, rt,
|
|
|
+ disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
|
|
|
+ tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
|
|
|
+ return;
|
|
|
+
|
|
|
+ case 0x2: case 0x3: /* CASP / STXP */
|
|
|
+ if (size & 2) { /* STXP / STLXP */
|
|
|
+ if (rn == 31) {
|
|
|
+ gen_check_sp_alignment(s);
|
|
|
}
|
|
|
- } else {
|
|
|
if (is_lasr) {
|
|
|
tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
|
|
|
}
|
|
|
- gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, is_pair);
|
|
|
+ tcg_addr = read_cpu_reg_sp(s, rn, 1);
|
|
|
+ gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, true);
|
|
|
+ return;
|
|
|
}
|
|
|
- } else {
|
|
|
- TCGv_i64 tcg_rt = cpu_reg(s, rt);
|
|
|
- bool iss_sf = disas_ldst_compute_iss_sf(size, false, 0);
|
|
|
+ if (rt2 == 31
|
|
|
+ && ((rt | rs) & 1) == 0
|
|
|
+ && arm_dc_feature(s, ARM_FEATURE_V8_ATOMICS)) {
|
|
|
+ /* CASP / CASPL */
|
|
|
+ gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ break;
|
|
|
|
|
|
- /* Generate ISS for non-exclusive accesses including LASR. */
|
|
|
- if (is_store) {
|
|
|
- if (is_lasr) {
|
|
|
- tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
|
|
|
+ case 0x6: case 0x7: /* CASPA / LDXP */
|
|
|
+ if (size & 2) { /* LDXP / LDAXP */
|
|
|
+ if (rn == 31) {
|
|
|
+ gen_check_sp_alignment(s);
|
|
|
}
|
|
|
- do_gpr_st(s, tcg_rt, tcg_addr, size,
|
|
|
- true, rt, iss_sf, is_lasr);
|
|
|
- } else {
|
|
|
- do_gpr_ld(s, tcg_rt, tcg_addr, size, false, false,
|
|
|
- true, rt, iss_sf, is_lasr);
|
|
|
+ tcg_addr = read_cpu_reg_sp(s, rn, 1);
|
|
|
+ s->is_ldex = true;
|
|
|
+ gen_load_exclusive(s, rt, rt2, tcg_addr, size, true);
|
|
|
if (is_lasr) {
|
|
|
tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
|
|
|
}
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ if (rt2 == 31
|
|
|
+ && ((rt | rs) & 1) == 0
|
|
|
+ && arm_dc_feature(s, ARM_FEATURE_V8_ATOMICS)) {
|
|
|
+ /* CASPA / CASPAL */
|
|
|
+ gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
|
|
|
+ return;
|
|
|
}
|
|
|
+ break;
|
|
|
+
|
|
|
+ case 0xa: /* CAS */
|
|
|
+ case 0xb: /* CASL */
|
|
|
+ case 0xe: /* CASA */
|
|
|
+ case 0xf: /* CASAL */
|
|
|
+ if (rt2 == 31 && arm_dc_feature(s, ARM_FEATURE_V8_ATOMICS)) {
|
|
|
+ gen_compare_and_swap(s, rs, rt, rn, size);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ break;
|
|
|
}
|
|
|
+ unallocated_encoding(s);
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -2715,6 +2865,88 @@ static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn,
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+/* Atomic memory operations
|
|
|
+ *
|
|
|
+ * 31 30 27 26 24 22 21 16 15 12 10 5 0
|
|
|
+ * +------+-------+---+-----+-----+---+----+----+-----+-----+----+-----+
|
|
|
+ * | size | 1 1 1 | V | 0 0 | A R | 1 | Rs | o3 | opc | 0 0 | Rn | Rt |
|
|
|
+ * +------+-------+---+-----+-----+--------+----+-----+-----+----+-----+
|
|
|
+ *
|
|
|
+ * Rt: the result register
|
|
|
+ * Rn: base address or SP
|
|
|
+ * Rs: the source register for the operation
|
|
|
+ * V: vector flag (always 0 as of v8.3)
|
|
|
+ * A: acquire flag
|
|
|
+ * R: release flag
|
|
|
+ */
|
|
|
+static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
|
|
|
+ int size, int rt, bool is_vector)
|
|
|
+{
|
|
|
+ int rs = extract32(insn, 16, 5);
|
|
|
+ int rn = extract32(insn, 5, 5);
|
|
|
+ int o3_opc = extract32(insn, 12, 4);
|
|
|
+ int feature = ARM_FEATURE_V8_ATOMICS;
|
|
|
+ TCGv_i64 tcg_rn, tcg_rs;
|
|
|
+ AtomicThreeOpFn *fn;
|
|
|
+
|
|
|
+ if (is_vector) {
|
|
|
+ unallocated_encoding(s);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ switch (o3_opc) {
|
|
|
+ case 000: /* LDADD */
|
|
|
+ fn = tcg_gen_atomic_fetch_add_i64;
|
|
|
+ break;
|
|
|
+ case 001: /* LDCLR */
|
|
|
+ fn = tcg_gen_atomic_fetch_and_i64;
|
|
|
+ break;
|
|
|
+ case 002: /* LDEOR */
|
|
|
+ fn = tcg_gen_atomic_fetch_xor_i64;
|
|
|
+ break;
|
|
|
+ case 003: /* LDSET */
|
|
|
+ fn = tcg_gen_atomic_fetch_or_i64;
|
|
|
+ break;
|
|
|
+ case 004: /* LDSMAX */
|
|
|
+ fn = tcg_gen_atomic_fetch_smax_i64;
|
|
|
+ break;
|
|
|
+ case 005: /* LDSMIN */
|
|
|
+ fn = tcg_gen_atomic_fetch_smin_i64;
|
|
|
+ break;
|
|
|
+ case 006: /* LDUMAX */
|
|
|
+ fn = tcg_gen_atomic_fetch_umax_i64;
|
|
|
+ break;
|
|
|
+ case 007: /* LDUMIN */
|
|
|
+ fn = tcg_gen_atomic_fetch_umin_i64;
|
|
|
+ break;
|
|
|
+ case 010: /* SWP */
|
|
|
+ fn = tcg_gen_atomic_xchg_i64;
|
|
|
+ break;
|
|
|
+ default:
|
|
|
+ unallocated_encoding(s);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ if (!arm_dc_feature(s, feature)) {
|
|
|
+ unallocated_encoding(s);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (rn == 31) {
|
|
|
+ gen_check_sp_alignment(s);
|
|
|
+ }
|
|
|
+ tcg_rn = cpu_reg_sp(s, rn);
|
|
|
+ tcg_rs = read_cpu_reg(s, rs, true);
|
|
|
+
|
|
|
+ if (o3_opc == 1) { /* LDCLR */
|
|
|
+ tcg_gen_not_i64(tcg_rs, tcg_rs);
|
|
|
+ }
|
|
|
+
|
|
|
+ /* The tcg atomic primitives are all full barriers. Therefore we
|
|
|
+ * can ignore the Acquire and Release bits of this instruction.
|
|
|
+ */
|
|
|
+ fn(cpu_reg(s, rt), tcg_rn, tcg_rs, get_mem_index(s),
|
|
|
+ s->be_data | size | MO_ALIGN);
|
|
|
+}
|
|
|
+
|
|
|
/* Load/store register (all forms) */
|
|
|
static void disas_ldst_reg(DisasContext *s, uint32_t insn)
|
|
|
{
|
|
@@ -2725,23 +2957,28 @@ static void disas_ldst_reg(DisasContext *s, uint32_t insn)
|
|
|
|
|
|
switch (extract32(insn, 24, 2)) {
|
|
|
case 0:
|
|
|
- if (extract32(insn, 21, 1) == 1 && extract32(insn, 10, 2) == 2) {
|
|
|
- disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector);
|
|
|
- } else {
|
|
|
+ if (extract32(insn, 21, 1) == 0) {
|
|
|
/* Load/store register (unscaled immediate)
|
|
|
* Load/store immediate pre/post-indexed
|
|
|
* Load/store register unprivileged
|
|
|
*/
|
|
|
disas_ldst_reg_imm9(s, insn, opc, size, rt, is_vector);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ switch (extract32(insn, 10, 2)) {
|
|
|
+ case 0:
|
|
|
+ disas_ldst_atomic(s, insn, size, rt, is_vector);
|
|
|
+ return;
|
|
|
+ case 2:
|
|
|
+ disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector);
|
|
|
+ return;
|
|
|
}
|
|
|
break;
|
|
|
case 1:
|
|
|
disas_ldst_reg_unsigned_imm(s, insn, opc, size, rt, is_vector);
|
|
|
- break;
|
|
|
- default:
|
|
|
- unallocated_encoding(s);
|
|
|
- break;
|
|
|
+ return;
|
|
|
}
|
|
|
+ unallocated_encoding(s);
|
|
|
}
|
|
|
|
|
|
/* AdvSIMD load/store multiple structures
|
|
@@ -5444,31 +5681,24 @@ static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
|
|
|
|
|
|
if (itof) {
|
|
|
TCGv_i64 tcg_rn = cpu_reg(s, rn);
|
|
|
+ TCGv_i64 tmp;
|
|
|
|
|
|
switch (type) {
|
|
|
case 0:
|
|
|
- {
|
|
|
/* 32 bit */
|
|
|
- TCGv_i64 tmp = tcg_temp_new_i64();
|
|
|
+ tmp = tcg_temp_new_i64();
|
|
|
tcg_gen_ext32u_i64(tmp, tcg_rn);
|
|
|
- tcg_gen_st_i64(tmp, cpu_env, fp_reg_offset(s, rd, MO_64));
|
|
|
- tcg_gen_movi_i64(tmp, 0);
|
|
|
- tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd));
|
|
|
+ write_fp_dreg(s, rd, tmp);
|
|
|
tcg_temp_free_i64(tmp);
|
|
|
break;
|
|
|
- }
|
|
|
case 1:
|
|
|
- {
|
|
|
/* 64 bit */
|
|
|
- TCGv_i64 tmp = tcg_const_i64(0);
|
|
|
- tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_offset(s, rd, MO_64));
|
|
|
- tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd));
|
|
|
- tcg_temp_free_i64(tmp);
|
|
|
+ write_fp_dreg(s, rd, tcg_rn);
|
|
|
break;
|
|
|
- }
|
|
|
case 2:
|
|
|
/* 64 bit to top half. */
|
|
|
tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd));
|
|
|
+ clear_vec_high(s, true, rd);
|
|
|
break;
|
|
|
}
|
|
|
} else {
|
|
@@ -6021,15 +6251,18 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
|
|
|
tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
|
|
|
break;
|
|
|
case 0x0a: /* SMAXV / UMAXV */
|
|
|
- tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
|
|
|
- tcg_res,
|
|
|
- tcg_res, tcg_elt, tcg_res, tcg_elt);
|
|
|
+ if (is_u) {
|
|
|
+ tcg_gen_umax_i64(tcg_res, tcg_res, tcg_elt);
|
|
|
+ } else {
|
|
|
+ tcg_gen_smax_i64(tcg_res, tcg_res, tcg_elt);
|
|
|
+ }
|
|
|
break;
|
|
|
case 0x1a: /* SMINV / UMINV */
|
|
|
- tcg_gen_movcond_i64(is_u ? TCG_COND_LEU : TCG_COND_LE,
|
|
|
- tcg_res,
|
|
|
- tcg_res, tcg_elt, tcg_res, tcg_elt);
|
|
|
- break;
|
|
|
+ if (is_u) {
|
|
|
+ tcg_gen_umin_i64(tcg_res, tcg_res, tcg_elt);
|
|
|
+ } else {
|
|
|
+ tcg_gen_smin_i64(tcg_res, tcg_res, tcg_elt);
|
|
|
+ }
|
|
|
break;
|
|
|
default:
|
|
|
g_assert_not_reached();
|
|
@@ -7165,13 +7398,26 @@ static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
|
|
|
int immh, int immb, int opcode,
|
|
|
int rn, int rd)
|
|
|
{
|
|
|
- bool is_double = extract32(immh, 3, 1);
|
|
|
- int size = is_double ? MO_64 : MO_32;
|
|
|
- int elements;
|
|
|
+ int size, elements, fracbits;
|
|
|
int immhb = immh << 3 | immb;
|
|
|
- int fracbits = (is_double ? 128 : 64) - immhb;
|
|
|
|
|
|
- if (!extract32(immh, 2, 2)) {
|
|
|
+ if (immh & 8) {
|
|
|
+ size = MO_64;
|
|
|
+ if (!is_scalar && !is_q) {
|
|
|
+ unallocated_encoding(s);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ } else if (immh & 4) {
|
|
|
+ size = MO_32;
|
|
|
+ } else if (immh & 2) {
|
|
|
+ size = MO_16;
|
|
|
+ if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
|
|
|
+ unallocated_encoding(s);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ /* immh == 0 would be a failure of the decode logic */
|
|
|
+ g_assert(immh == 1);
|
|
|
unallocated_encoding(s);
|
|
|
return;
|
|
|
}
|
|
@@ -7179,20 +7425,14 @@ static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
|
|
|
if (is_scalar) {
|
|
|
elements = 1;
|
|
|
} else {
|
|
|
- elements = is_double ? 2 : is_q ? 4 : 2;
|
|
|
- if (is_double && !is_q) {
|
|
|
- unallocated_encoding(s);
|
|
|
- return;
|
|
|
- }
|
|
|
+ elements = (8 << is_q) >> size;
|
|
|
}
|
|
|
+ fracbits = (16 << size) - immhb;
|
|
|
|
|
|
if (!fp_access_check(s)) {
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
- /* immh == 0 would be a failure of the decode logic */
|
|
|
- g_assert(immh);
|
|
|
-
|
|
|
handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size);
|
|
|
}
|
|
|
|
|
@@ -7201,19 +7441,28 @@ static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
|
|
|
bool is_q, bool is_u,
|
|
|
int immh, int immb, int rn, int rd)
|
|
|
{
|
|
|
- bool is_double = extract32(immh, 3, 1);
|
|
|
int immhb = immh << 3 | immb;
|
|
|
- int fracbits = (is_double ? 128 : 64) - immhb;
|
|
|
- int pass;
|
|
|
+ int pass, size, fracbits;
|
|
|
TCGv_ptr tcg_fpstatus;
|
|
|
TCGv_i32 tcg_rmode, tcg_shift;
|
|
|
|
|
|
- if (!extract32(immh, 2, 2)) {
|
|
|
- unallocated_encoding(s);
|
|
|
- return;
|
|
|
- }
|
|
|
-
|
|
|
- if (!is_scalar && !is_q && is_double) {
|
|
|
+ if (immh & 0x8) {
|
|
|
+ size = MO_64;
|
|
|
+ if (!is_scalar && !is_q) {
|
|
|
+ unallocated_encoding(s);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ } else if (immh & 0x4) {
|
|
|
+ size = MO_32;
|
|
|
+ } else if (immh & 0x2) {
|
|
|
+ size = MO_16;
|
|
|
+ if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
|
|
|
+ unallocated_encoding(s);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ /* Should have split out AdvSIMD modified immediate earlier. */
|
|
|
+ assert(immh == 1);
|
|
|
unallocated_encoding(s);
|
|
|
return;
|
|
|
}
|
|
@@ -7225,11 +7474,12 @@ static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
|
|
|
assert(!(is_scalar && is_q));
|
|
|
|
|
|
tcg_rmode = tcg_const_i32(arm_rmode_to_sf(FPROUNDING_ZERO));
|
|
|
- tcg_fpstatus = get_fpstatus_ptr(false);
|
|
|
+ tcg_fpstatus = get_fpstatus_ptr(size == MO_16);
|
|
|
gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
|
|
|
+ fracbits = (16 << size) - immhb;
|
|
|
tcg_shift = tcg_const_i32(fracbits);
|
|
|
|
|
|
- if (is_double) {
|
|
|
+ if (size == MO_64) {
|
|
|
int maxpass = is_scalar ? 1 : 2;
|
|
|
|
|
|
for (pass = 0; pass < maxpass; pass++) {
|
|
@@ -7246,20 +7496,37 @@ static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
|
|
|
}
|
|
|
clear_vec_high(s, is_q, rd);
|
|
|
} else {
|
|
|
- int maxpass = is_scalar ? 1 : is_q ? 4 : 2;
|
|
|
- for (pass = 0; pass < maxpass; pass++) {
|
|
|
- TCGv_i32 tcg_op = tcg_temp_new_i32();
|
|
|
+ void (*fn)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
|
|
|
+ int maxpass = is_scalar ? 1 : ((8 << is_q) >> size);
|
|
|
|
|
|
- read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
|
|
|
+ switch (size) {
|
|
|
+ case MO_16:
|
|
|
if (is_u) {
|
|
|
- gen_helper_vfp_touls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
|
|
|
+ fn = gen_helper_vfp_touhh;
|
|
|
} else {
|
|
|
- gen_helper_vfp_tosls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
|
|
|
+ fn = gen_helper_vfp_toshh;
|
|
|
}
|
|
|
+ break;
|
|
|
+ case MO_32:
|
|
|
+ if (is_u) {
|
|
|
+ fn = gen_helper_vfp_touls;
|
|
|
+ } else {
|
|
|
+ fn = gen_helper_vfp_tosls;
|
|
|
+ }
|
|
|
+ break;
|
|
|
+ default:
|
|
|
+ g_assert_not_reached();
|
|
|
+ }
|
|
|
+
|
|
|
+ for (pass = 0; pass < maxpass; pass++) {
|
|
|
+ TCGv_i32 tcg_op = tcg_temp_new_i32();
|
|
|
+
|
|
|
+ read_vec_element_i32(s, tcg_op, rn, pass, size);
|
|
|
+ fn(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
|
|
|
if (is_scalar) {
|
|
|
write_fp_sreg(s, rd, tcg_op);
|
|
|
} else {
|
|
|
- write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
|
|
|
+ write_vec_element_i32(s, tcg_op, rd, pass, size);
|
|
|
}
|
|
|
tcg_temp_free_i32(tcg_op);
|
|
|
}
|
|
@@ -9927,27 +10194,6 @@ static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-/* Helper functions for 32 bit comparisons */
|
|
|
-static void gen_max_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
|
|
|
-{
|
|
|
- tcg_gen_movcond_i32(TCG_COND_GE, res, op1, op2, op1, op2);
|
|
|
-}
|
|
|
-
|
|
|
-static void gen_max_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
|
|
|
-{
|
|
|
- tcg_gen_movcond_i32(TCG_COND_GEU, res, op1, op2, op1, op2);
|
|
|
-}
|
|
|
-
|
|
|
-static void gen_min_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
|
|
|
-{
|
|
|
- tcg_gen_movcond_i32(TCG_COND_LE, res, op1, op2, op1, op2);
|
|
|
-}
|
|
|
-
|
|
|
-static void gen_min_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
|
|
|
-{
|
|
|
- tcg_gen_movcond_i32(TCG_COND_LEU, res, op1, op2, op1, op2);
|
|
|
-}
|
|
|
-
|
|
|
/* Pairwise op subgroup of C3.6.16.
|
|
|
*
|
|
|
* This is called directly or via the handle_3same_float for float pairwise
|
|
@@ -10047,7 +10293,7 @@ static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
|
|
|
static NeonGenTwoOpFn * const fns[3][2] = {
|
|
|
{ gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
|
|
|
{ gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
|
|
|
- { gen_max_s32, gen_max_u32 },
|
|
|
+ { tcg_gen_smax_i32, tcg_gen_umax_i32 },
|
|
|
};
|
|
|
genfn = fns[size][u];
|
|
|
break;
|
|
@@ -10057,7 +10303,7 @@ static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
|
|
|
static NeonGenTwoOpFn * const fns[3][2] = {
|
|
|
{ gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
|
|
|
{ gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
|
|
|
- { gen_min_s32, gen_min_u32 },
|
|
|
+ { tcg_gen_smin_i32, tcg_gen_umin_i32 },
|
|
|
};
|
|
|
genfn = fns[size][u];
|
|
|
break;
|
|
@@ -10512,7 +10758,7 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
|
|
|
static NeonGenTwoOpFn * const fns[3][2] = {
|
|
|
{ gen_helper_neon_max_s8, gen_helper_neon_max_u8 },
|
|
|
{ gen_helper_neon_max_s16, gen_helper_neon_max_u16 },
|
|
|
- { gen_max_s32, gen_max_u32 },
|
|
|
+ { tcg_gen_smax_i32, tcg_gen_umax_i32 },
|
|
|
};
|
|
|
genfn = fns[size][u];
|
|
|
break;
|
|
@@ -10523,7 +10769,7 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
|
|
|
static NeonGenTwoOpFn * const fns[3][2] = {
|
|
|
{ gen_helper_neon_min_s8, gen_helper_neon_min_u8 },
|
|
|
{ gen_helper_neon_min_s16, gen_helper_neon_min_u16 },
|
|
|
- { gen_min_s32, gen_min_u32 },
|
|
|
+ { tcg_gen_smin_i32, tcg_gen_umin_i32 },
|
|
|
};
|
|
|
genfn = fns[size][u];
|
|
|
break;
|