소스 검색

tcg/tcti: update code gen for latest TCG changes

osy 10 달 전
부모
커밋
b65c350e24
6개의 변경된 파일190개의 추가작업 그리고 163개의 파일을 삭제
  1. 16 0
      tcg/aarch64-tcti/tcg-target-reg-bits.h
  2. 150 95
      tcg/aarch64-tcti/tcg-target.c.inc
  3. 6 12
      tcg/aarch64-tcti/tcg-target.h
  4. 17 43
      tcg/aarch64-tcti/tcti-gadget-gen.py
  5. 0 12
      tcg/meson.build
  6. 1 1
      tcg/tcg.c

+ 16 - 0
tcg/aarch64-tcti/tcg-target-reg-bits.h

@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define target-specific register size
+ * Copyright (c) 2009, 2011 Stefan Weil
+ */
+
+#ifndef TCG_TARGET_REG_BITS_H
+#define TCG_TARGET_REG_BITS_H
+
+#if UINTPTR_MAX == UINT64_MAX
+# define TCG_TARGET_REG_BITS 64
+#else
+# error Unknown pointer size for tci target
+#endif
+
+#endif

+ 150 - 95
tcg/aarch64-tcti/tcg-target.c.inc

@@ -38,7 +38,7 @@
     do { \
         fprintf(stderr, "TODO %s:%u: %s()\n", \
                 __FILE__, __LINE__, __func__); \
-        tcg_abort(); \
+        g_assert_not_reached(); \
     } while (0)
 
 
@@ -157,11 +157,15 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
     case INDEX_op_brcond_i64:
         return C_O0_I2(r, r);
 
-    case INDEX_op_qemu_ld_i32:
-    case INDEX_op_qemu_ld_i64:
+    case INDEX_op_qemu_ld_a32_i32:
+    case INDEX_op_qemu_ld_a64_i32:
+    case INDEX_op_qemu_ld_a32_i64:
+    case INDEX_op_qemu_ld_a64_i64:
         return C_O1_I2(r, r, r);
-    case INDEX_op_qemu_st_i32:
-    case INDEX_op_qemu_st_i64:
+    case INDEX_op_qemu_st_a32_i32:
+    case INDEX_op_qemu_st_a64_i32:
+    case INDEX_op_qemu_st_a32_i64:
+    case INDEX_op_qemu_st_a64_i64:
         return C_O0_I3(r, r, r);
 
     //
@@ -290,19 +294,14 @@ static const char *const tcg_target_reg_names[TCG_TARGET_GP_REGS] = {
  * Macro that defines a look-up tree for named QEMU_LD gadgets.
  */
 #define LD_MEMOP_LOOKUP(variable, arg, suffix) \
-    switch (get_memop(arg) & (MO_BSWAP | MO_SSIZE)) { \
+    switch (get_memop(arg) & MO_SSIZE) { \
         case MO_UB:   variable = gadget_qemu_ld_ub_   ## suffix; break; \
         case MO_SB:   variable = gadget_qemu_ld_sb_   ## suffix; break; \
-        case MO_LEUW: variable = gadget_qemu_ld_leuw_ ## suffix; break; \
-        case MO_LESW: variable = gadget_qemu_ld_lesw_ ## suffix; break; \
-        case MO_LEUL: variable = gadget_qemu_ld_leul_ ## suffix; break; \
-        case MO_LESL: variable = gadget_qemu_ld_lesl_ ## suffix; break; \
-        case MO_LEUQ:  variable = gadget_qemu_ld_leq_  ## suffix; break; \
-        case MO_BEUW: variable = gadget_qemu_ld_beuw_ ## suffix; break; \
-        case MO_BESW: variable = gadget_qemu_ld_besw_ ## suffix; break; \
-        case MO_BEUL: variable = gadget_qemu_ld_beul_ ## suffix; break; \
-        case MO_BESL: variable = gadget_qemu_ld_besl_ ## suffix; break; \
-        case MO_BEUQ:  variable = gadget_qemu_ld_beq_  ## suffix; break; \
+        case MO_UW: variable = gadget_qemu_ld_leuw_ ## suffix; break; \
+        case MO_SW: variable = gadget_qemu_ld_lesw_ ## suffix; break; \
+        case MO_UL: variable = gadget_qemu_ld_leul_ ## suffix; break; \
+        case MO_SL: variable = gadget_qemu_ld_lesl_ ## suffix; break; \
+        case MO_UQ:  variable = gadget_qemu_ld_leq_  ## suffix; break; \
         default: \
             g_assert_not_reached(); \
     }
@@ -319,14 +318,11 @@ static const char *const tcg_target_reg_names[TCG_TARGET_GP_REGS] = {
  * Macro that defines a look-up tree for named QEMU_ST gadgets.
  */
 #define ST_MEMOP_LOOKUP(variable, arg, suffix) \
-    switch (get_memop(arg) & (MO_BSWAP | MO_SSIZE)) { \
+    switch (get_memop(arg) & MO_SSIZE) { \
         case MO_UB:   variable = gadget_qemu_st_ub_   ## suffix; break; \
-        case MO_LEUW: variable = gadget_qemu_st_leuw_ ## suffix; break; \
-        case MO_LEUL: variable = gadget_qemu_st_leul_ ## suffix; break; \
-        case MO_LEUQ:  variable = gadget_qemu_st_leq_  ## suffix; break; \
-        case MO_BEUW: variable = gadget_qemu_st_beuw_ ## suffix; break; \
-        case MO_BEUL: variable = gadget_qemu_st_beul_ ## suffix; break; \
-        case MO_BEUQ:  variable = gadget_qemu_st_beq_  ## suffix; break; \
+        case MO_UW: variable = gadget_qemu_st_leuw_ ## suffix; break; \
+        case MO_UL: variable = gadget_qemu_st_leul_ ## suffix; break; \
+        case MO_UQ:  variable = gadget_qemu_st_leq_  ## suffix; break; \
         default: \
             g_assert_not_reached(); \
     }
@@ -339,7 +335,7 @@ static const char *const tcg_target_reg_names[TCG_TARGET_GP_REGS] = {
 
 
 #define LOOKUP_SPECIAL_CASE_LDST_GADGET(arg, name, mode) \
-    switch(TLB_MASK_TABLE_OFS(get_mmuidx(arg))) { \
+    switch(tlb_mask_table_ofs(s, get_mmuidx(arg))) { \
         case -32:  \
             gadget = (a_bits >= s_bits) ?  \
                 gadget_qemu_ ## name ## _aligned_ ## mode ## _off32_i64 : \
@@ -426,35 +422,23 @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
 #ifdef CONFIG_SOFTMMU
 
 // TODO: relocate these prototypes?
-tcg_target_ulong helper_ret_ldub_mmu_signed(CPUArchState *env, target_ulong addr, MemOpIdx oi, uintptr_t retaddr);
-tcg_target_ulong helper_le_lduw_mmu_signed(CPUArchState *env, target_ulong addr, MemOpIdx oi, uintptr_t retaddr);
-tcg_target_ulong helper_le_ldul_mmu_signed(CPUArchState *env, target_ulong addr, MemOpIdx oi, uintptr_t retaddr);
-tcg_target_ulong helper_be_lduw_mmu_signed(CPUArchState *env, target_ulong addr, MemOpIdx oi, uintptr_t retaddr);
-tcg_target_ulong helper_be_ldul_mmu_signed(CPUArchState *env, target_ulong addr, MemOpIdx oi, uintptr_t retaddr);
+tcg_target_ulong helper_ldub_mmu_signed(CPUArchState *env, uint64_t addr, MemOpIdx oi, uintptr_t retaddr);
+tcg_target_ulong helper_lduw_mmu_signed(CPUArchState *env, uint64_t addr, MemOpIdx oi, uintptr_t retaddr);
+tcg_target_ulong helper_ldul_mmu_signed(CPUArchState *env, uint64_t addr, MemOpIdx oi, uintptr_t retaddr);
 
-tcg_target_ulong helper_ret_ldub_mmu_signed(CPUArchState *env, target_ulong addr, MemOpIdx oi, uintptr_t retaddr)
+tcg_target_ulong helper_ldub_mmu_signed(CPUArchState *env, uint64_t addr, MemOpIdx oi, uintptr_t retaddr)
 {
-    return (int8_t)helper_ret_ldub_mmu(env, addr, oi, retaddr);
+    return (int8_t)helper_ldub_mmu(env, addr, oi, retaddr);
 }
 
-tcg_target_ulong helper_le_lduw_mmu_signed(CPUArchState *env, target_ulong addr, MemOpIdx oi, uintptr_t retaddr)
+tcg_target_ulong helper_lduw_mmu_signed(CPUArchState *env, uint64_t addr, MemOpIdx oi, uintptr_t retaddr)
 {
-    return (int16_t)helper_le_lduw_mmu(env, addr, oi, retaddr);
+    return (int16_t)helper_lduw_mmu(env, addr, oi, retaddr);
 }
 
-tcg_target_ulong helper_le_ldul_mmu_signed(CPUArchState *env, target_ulong addr, MemOpIdx oi, uintptr_t retaddr)
+tcg_target_ulong helper_ldul_mmu_signed(CPUArchState *env, uint64_t addr, MemOpIdx oi, uintptr_t retaddr)
 {
-    return (int32_t)helper_le_ldul_mmu(env, addr, oi, retaddr);
-}
-
-tcg_target_ulong helper_be_lduw_mmu_signed(CPUArchState *env, target_ulong addr, MemOpIdx oi, uintptr_t retaddr)
-{
-    return (int16_t)helper_be_lduw_mmu(env, addr, oi, retaddr);
-}
-
-tcg_target_ulong helper_be_ldul_mmu_signed(CPUArchState *env, target_ulong addr, MemOpIdx oi, uintptr_t retaddr)
-{
-    return (int32_t)helper_be_ldul_mmu(env, addr, oi, retaddr);
+    return (int32_t)helper_ldul_mmu(env, addr, oi, retaddr);
 }
 
 #else
@@ -708,6 +692,87 @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg t0, tcg_target_long
     }
 }
 
+static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rs)
+{
+    switch (type) {
+    case TCG_TYPE_I32:
+        tcg_debug_assert(TCG_TARGET_HAS_ext8s_i32);
+        tcg_out_binary_gadget(s, gadget_ext8s_i32, rd, rs);
+        break;
+#if TCG_TARGET_REG_BITS == 64
+    case TCG_TYPE_I64:
+        tcg_debug_assert(TCG_TARGET_HAS_ext8s_i64);
+        tcg_out_binary_gadget(s, gadget_ext8s_i64, rd, rs);
+        break;
+#endif
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void tcg_out_ext8u(TCGContext *s, TCGReg rd, TCGReg rs)
+{
+    tcg_out_binary_gadget(s, gadget_ext8u, rd, rs);
+}
+
+static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rs)
+{
+    switch (type) {
+    case TCG_TYPE_I32:
+        tcg_debug_assert(TCG_TARGET_HAS_ext16s_i32);
+        tcg_out_binary_gadget(s, gadget_ext16s_i32, rd, rs);
+        break;
+#if TCG_TARGET_REG_BITS == 64
+    case TCG_TYPE_I64:
+        tcg_debug_assert(TCG_TARGET_HAS_ext16s_i64);
+        tcg_out_binary_gadget(s, gadget_ext16s_i64, rd, rs);
+        break;
+#endif
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void tcg_out_ext16u(TCGContext *s, TCGReg rd, TCGReg rs)
+{
+    tcg_out_binary_gadget(s, gadget_ext16u, rd, rs);
+}
+
+static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rs)
+{
+    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
+    tcg_debug_assert(TCG_TARGET_HAS_ext32s_i64);
+    tcg_out_binary_gadget(s, gadget_ext32s_i64, rd, rs);
+}
+
+static void tcg_out_ext32u(TCGContext *s, TCGReg rd, TCGReg rs)
+{
+    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
+    tcg_debug_assert(TCG_TARGET_HAS_ext32u_i64);
+    tcg_out_binary_gadget(s, gadget_ext32u_i64, rd, rs);
+}
+
+static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg rd, TCGReg rs)
+{
+    tcg_out_ext32s(s, rd, rs);
+}
+
+static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg rd, TCGReg rs)
+{
+    tcg_out_ext32u(s, rd, rs);
+}
+
+static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rs)
+{
+    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
+    tcg_out_binary_gadget(s, gadget_extrl, rd, rs);
+}
+
+static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
+{
+    return false;
+}
+
 static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
                               tcg_target_long imm)
 {
@@ -763,6 +828,9 @@ static void tcg_out_goto_tb(TCGContext *s, int which)
     set_jmp_reset_offset(s, which);
 }
 
+/* We expect to use a 7-bit scaled negative offset from ENV.  */
+#define MIN_TLB_MASK_TABLE_OFS  -512
+
 /**
  * Generate every other operation.
  */
@@ -1106,38 +1174,6 @@ void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *con
         tcg_out_ternary_gadget(s, gadget_ctz_i64, args[0], args[1], args[2]);
         break;
 
-    case INDEX_op_ext8s_i64:    /* Optional (TCG_TARGET_HAS_ext8s_i64). */
-        tcg_out_binary_gadget(s, gadget_ext8s_i64, args[0], args[1]);
-        break;
-
-    case INDEX_op_ext8u_i32:    /* Optional (TCG_TARGET_HAS_ext8u_i32). */
-    case INDEX_op_ext8u_i64:    /* Optional (TCG_TARGET_HAS_ext8u_i64). */
-        tcg_out_binary_gadget(s, gadget_ext8u, args[0], args[1]);
-        break;
-
-    case INDEX_op_ext16s_i64:   /* Optional (TCG_TARGET_HAS_ext16s_i64). */
-        tcg_out_binary_gadget(s, gadget_ext16s_i64, args[0], args[1]);
-        break;
-
-    case INDEX_op_ext16u_i32:   /* Optional (TCG_TARGET_HAS_ext16u_i32). */
-    case INDEX_op_ext16u_i64:   /* Optional (TCG_TARGET_HAS_ext16u_i64). */
-        tcg_out_binary_gadget(s, gadget_ext16u, args[0], args[1]);
-        break;
-
-    case INDEX_op_ext32s_i64:   /* Optional (TCG_TARGET_HAS_ext32s_i64). */
-    case INDEX_op_ext_i32_i64:
-        tcg_out_binary_gadget(s, gadget_ext32s_i64, args[0], args[1]);
-        break;
-
-    case INDEX_op_ext32u_i64:   /* Optional (TCG_TARGET_HAS_ext32u_i64). */
-    case INDEX_op_extu_i32_i64:
-        tcg_out_binary_gadget(s, gadget_ext32u_i64, args[0], args[1]);
-        break;
-
-    case INDEX_op_extrl_i64_i32:
-        tcg_out_binary_gadget(s, gadget_extrl, args[0], args[1]);
-        break;
-
     case INDEX_op_extrh_i64_i32:
         tcg_out_binary_gadget(s, gadget_extrh, args[0], args[1]);
         break;
@@ -1158,14 +1194,6 @@ void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *con
         tcg_out_binary_gadget(s, gadget_not_i32, args[0], args[1]);
         break;
 
-    case INDEX_op_ext8s_i32:    /* Optional (TCG_TARGET_HAS_ext8s_i32). */
-        tcg_out_binary_gadget(s, gadget_ext8s_i32, args[0], args[1]);
-        break;
-
-    case INDEX_op_ext16s_i32:   /* Optional (TCG_TARGET_HAS_ext16s_i32). */
-        tcg_out_binary_gadget(s, gadget_ext16s_i32, args[0], args[1]);
-        break;
-
     case INDEX_op_div_i32:      /* Optional (TCG_TARGET_HAS_div_i32). */
         tcg_out_ternary_gadget(s, gadget_div_i32, args[0], args[1], args[2]);
         break;
@@ -1216,7 +1244,8 @@ void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *con
         break;
     }
 
-    case INDEX_op_qemu_ld_i32:
+    case INDEX_op_qemu_ld_a32_i32:
+    case INDEX_op_qemu_ld_a64_i32:
     {
         MemOp opc = get_memop(args[2]);
         unsigned a_bits = get_alignment_bits(opc);
@@ -1224,7 +1253,7 @@ void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *con
 
         void *gadget;
 
-        switch(TLB_MASK_TABLE_OFS(get_mmuidx(args[2]))) {
+        switch(tlb_mask_table_ofs(s, get_mmuidx(args[2]))) {
             case -32:  LD_MEMOP_HANDLER(gadget, args[2],  off32_i32, a_bits, s_bits); break;
             case -48:  LD_MEMOP_HANDLER(gadget, args[2],  off48_i32, a_bits, s_bits); break;
             case -64:  LD_MEMOP_HANDLER(gadget, args[2],  off64_i32, a_bits, s_bits); break;
@@ -1240,7 +1269,8 @@ void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *con
         break;
     }
 
-    case INDEX_op_qemu_ld_i64:
+    case INDEX_op_qemu_ld_a32_i64:
+    case INDEX_op_qemu_ld_a64_i64:
     {
         MemOp opc = get_memop(args[2]);
         unsigned a_bits = get_alignment_bits(opc);
@@ -1262,7 +1292,7 @@ void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *con
         }
         // Otherwise, handle the generic case.
         else {
-            switch(TLB_MASK_TABLE_OFS(get_mmuidx(args[2]))) {
+            switch(tlb_mask_table_ofs(s, get_mmuidx(args[2]))) {
                 case -32:  LD_MEMOP_HANDLER(gadget, args[2],  off32_i64, a_bits, s_bits); break;
                 case -48:  LD_MEMOP_HANDLER(gadget, args[2],  off48_i64, a_bits, s_bits); break;
                 case -64:  LD_MEMOP_HANDLER(gadget, args[2],  off64_i64, a_bits, s_bits); break;
@@ -1280,7 +1310,8 @@ void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *con
         break;
     }
 
-    case INDEX_op_qemu_st_i32:
+    case INDEX_op_qemu_st_a32_i32:
+    case INDEX_op_qemu_st_a64_i32:
     {
         MemOp opc = get_memop(args[2]);
         unsigned a_bits = get_alignment_bits(opc);
@@ -1288,7 +1319,7 @@ void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *con
 
         void *gadget;
 
-        switch(TLB_MASK_TABLE_OFS(get_mmuidx(args[2]))) {
+        switch(tlb_mask_table_ofs(s, get_mmuidx(args[2]))) {
             case -32:  ST_MEMOP_HANDLER(gadget, args[2],  off32_i32, a_bits, s_bits); break;
             case -48:  ST_MEMOP_HANDLER(gadget, args[2],  off48_i32, a_bits, s_bits); break;
             case -64:  ST_MEMOP_HANDLER(gadget, args[2],  off64_i32, a_bits, s_bits); break;
@@ -1305,7 +1336,8 @@ void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *con
         break;
     }
 
-    case INDEX_op_qemu_st_i64:
+    case INDEX_op_qemu_st_a32_i64:
+    case INDEX_op_qemu_st_a64_i64:
     {
         MemOp opc = get_memop(args[2]);
         unsigned a_bits = get_alignment_bits(opc);
@@ -1327,7 +1359,7 @@ void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *con
         }
         // Otherwise, handle the generic case.
         else {
-            switch(TLB_MASK_TABLE_OFS(get_mmuidx(args[2]))) {
+            switch(tlb_mask_table_ofs(s, get_mmuidx(args[2]))) {
                 case -32:  ST_MEMOP_HANDLER(gadget, args[2],  off32_i64, a_bits, s_bits); break;
                 case -48:  ST_MEMOP_HANDLER(gadget, args[2],  off48_i64, a_bits, s_bits); break;
                 case -64:  ST_MEMOP_HANDLER(gadget, args[2],  off64_i64, a_bits, s_bits); break;
@@ -1366,8 +1398,21 @@ void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *con
     case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
     case INDEX_op_exit_tb:  /* Always emitted via tcg_out_exit_tb.  */
     case INDEX_op_goto_tb:  /* Always emitted via tcg_out_goto_tb.  */
+    case INDEX_op_ext8s_i32:    /* Always emitted via tcg_reg_alloc_op. */
+    case INDEX_op_ext8s_i64:
+    case INDEX_op_ext8u_i32:
+    case INDEX_op_ext8u_i64:
+    case INDEX_op_ext16s_i32:
+    case INDEX_op_ext16s_i64:
+    case INDEX_op_ext16u_i32:
+    case INDEX_op_ext16u_i64:
+    case INDEX_op_ext32s_i64:
+    case INDEX_op_ext32u_i64:
+    case INDEX_op_ext_i32_i64:
+    case INDEX_op_extu_i32_i64:
+    case INDEX_op_extrl_i64_i32:
     default:
-        tcg_abort();
+        g_assert_not_reached();
     }
 }
 
@@ -1391,7 +1436,8 @@ static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
 }
 
 /* Test if a constant matches the constraint. */
-static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
+static bool tcg_target_const_match(int64_t val, int ct,
+                                   TCGType type, TCGCond cond, int vece)
 {
     return ct & TCG_CT_CONST;
 }
@@ -2033,6 +2079,15 @@ static inline void tcg_target_qemu_prologue(TCGContext *s)
     // No prologue; as we're interpreted.
 }
 
+static void tcg_out_tb_start(TCGContext *s)
+{
+    /* nothing to do */
+}
+
+bool tcg_target_has_memory_bswap(MemOp memop)
+{
+    return true;
+}
 
 /**
  * TCTI 'interpreter' bootstrap.

+ 6 - 12
tcg/aarch64-tcti/tcg-target.h

@@ -40,16 +40,7 @@
 #ifndef TCG_TARGET_H
 #define TCG_TARGET_H
 
-#if UINTPTR_MAX == UINT32_MAX
-# error We only support AArch64 running in 64B mode.
-#elif UINTPTR_MAX == UINT64_MAX
-# define TCG_TARGET_REG_BITS 64
-#else
-# error Unknown pointer size for tcti target
-#endif
-
 #define TCG_TARGET_INSN_UNIT_SIZE        1
-#define TCG_TARGET_TLB_DISPLACEMENT_BITS 32
 #define MAX_CODE_GEN_BUFFER_SIZE  ((size_t)-1)
 
 // We're an interpreted target; even if we're JIT-compiling to our interpreter's
@@ -77,15 +68,14 @@
 #define TCG_TARGET_HAS_ext8u_i64        1
 #define TCG_TARGET_HAS_ext16u_i64       1
 #define TCG_TARGET_HAS_ext32u_i64       1
+#define TCG_TARGET_HAS_extr_i64_i32     0
 
 // Register extractions.
 #define TCG_TARGET_HAS_extrl_i64_i32    1
 #define TCG_TARGET_HAS_extrh_i64_i32    1
 
 // Negations.
-#define TCG_TARGET_HAS_neg_i32          1
 #define TCG_TARGET_HAS_not_i32          1
-#define TCG_TARGET_HAS_neg_i64          1
 #define TCG_TARGET_HAS_not_i64          1
 
 // Logicals.
@@ -93,6 +83,8 @@
 #define TCG_TARGET_HAS_orc_i32          1
 #define TCG_TARGET_HAS_eqv_i32          1
 #define TCG_TARGET_HAS_rot_i32          1
+#define TCG_TARGET_HAS_negsetcond_i32   0
+#define TCG_TARGET_HAS_negsetcond_i64   0
 #define TCG_TARGET_HAS_nand_i32         1
 #define TCG_TARGET_HAS_nor_i32          1
 #define TCG_TARGET_HAS_andc_i64         1
@@ -107,6 +99,7 @@
 #define TCG_TARGET_HAS_ctz_i32          1
 #define TCG_TARGET_HAS_clz_i64          1
 #define TCG_TARGET_HAS_ctz_i64          1
+#define TCG_TARGET_HAS_tst              0
 
 // Swaps.
 #define TCG_TARGET_HAS_bswap16_i32      1
@@ -114,7 +107,6 @@
 #define TCG_TARGET_HAS_bswap16_i64      1
 #define TCG_TARGET_HAS_bswap32_i64      1
 #define TCG_TARGET_HAS_bswap64_i64      1
-#define TCG_TARGET_HAS_MEMORY_BSWAP     1
 
 //
 // Supported optional vector instructions.
@@ -143,6 +135,7 @@
 #define TCG_TARGET_HAS_minmax_vec       1
 #define TCG_TARGET_HAS_bitsel_vec       1
 #define TCG_TARGET_HAS_cmpsel_vec       0
+#define TCG_TARGET_HAS_tst_vec          0
 
 //
 // Unsupported instructions.
@@ -166,6 +159,7 @@
 // constraints for 8-bit loads and stores. We don't need to do so, so we'll leave
 // this unimplemented, as we gain nothing by it.
 #define TCG_TARGET_HAS_qemu_st8_i32     0
+#define TCG_TARGET_HAS_qemu_ldst_i128   0
 
 // These should always be zero on our 64B platform.
 #define TCG_TARGET_HAS_muls2_i64        0

+ 17 - 43
tcg/aarch64-tcti/tcti-gadget-gen.py

@@ -931,68 +931,52 @@ def vector_logic_dnm(name, operation):
 
     START_COLLECTION(f"qemu_ld_{subtype}_unsigned_le")
 
-    ld_thunk(f"qemu_ld_ub_{subtype}", is_aligned=is_aligned, slowpath_helper="helper_ret_ldub_mmu",
+    ld_thunk(f"qemu_ld_ub_{subtype}", is_aligned=is_aligned, slowpath_helper="helper_ldub_mmu",
         fastpath_32b=["ldrb Wd, [Xn, x27]"], fastpath_64b=["ldrb Wd, [Xn, x27]"],
         force_slowpath=is_slowpath,
     )
-    ld_thunk(f"qemu_ld_leuw_{subtype}", is_aligned=is_aligned, slowpath_helper="helper_le_lduw_mmu",
+    ld_thunk(f"qemu_ld_leuw_{subtype}", is_aligned=is_aligned, slowpath_helper="helper_lduw_mmu",
         fastpath_32b=["ldrh Wd, [Xn, x27]"], fastpath_64b=["ldrh Wd, [Xn, x27]"],
         force_slowpath=is_slowpath,
     )
-    ld_thunk(f"qemu_ld_leul_{subtype}", is_aligned=is_aligned, slowpath_helper="helper_le_ldul_mmu",
+    ld_thunk(f"qemu_ld_leul_{subtype}", is_aligned=is_aligned, slowpath_helper="helper_ldul_mmu",
         fastpath_32b=["ldr Wd, [Xn, x27]"], fastpath_64b=["ldr Wd, [Xn, x27]"],
         force_slowpath=is_slowpath,
     )
-    ld_thunk(f"qemu_ld_leq_{subtype}", is_aligned=is_aligned, slowpath_helper="helper_le_ldq_mmu",
+    ld_thunk(f"qemu_ld_leq_{subtype}", is_aligned=is_aligned, slowpath_helper="helper_ldq_mmu",
         fastpath_32b=["ldr Xd, [Xn, x27]"], fastpath_64b=["ldr Xd, [Xn, x27]"],
         force_slowpath=is_slowpath,
     )
 
     START_COLLECTION(f"qemu_ld_{subtype}_signed_le")
 
-    ld_thunk(f"qemu_ld_sb_{subtype}", is_aligned=is_aligned, slowpath_helper="helper_ret_ldub_mmu_signed",
+    ld_thunk(f"qemu_ld_sb_{subtype}", is_aligned=is_aligned, slowpath_helper="helper_ldub_mmu_signed",
         fastpath_32b=["ldrsb Wd, [Xn, x27]"], fastpath_64b=["ldrsb Xd, [Xn, x27]"],
         force_slowpath=is_slowpath,
     )
-    ld_thunk(f"qemu_ld_lesw_{subtype}", is_aligned=is_aligned, slowpath_helper="helper_le_lduw_mmu_signed",
+    ld_thunk(f"qemu_ld_lesw_{subtype}", is_aligned=is_aligned, slowpath_helper="helper_lduw_mmu_signed",
         fastpath_32b=["ldrsh Wd, [Xn, x27]"], fastpath_64b=["ldrsh Xd, [Xn, x27]"],
         force_slowpath=is_slowpath,
     )
-    ld_thunk(f"qemu_ld_lesl_{subtype}", is_aligned=is_aligned, slowpath_helper="helper_le_ldul_mmu_signed",
+    ld_thunk(f"qemu_ld_lesl_{subtype}", is_aligned=is_aligned, slowpath_helper="helper_ldul_mmu_signed",
         fastpath_32b=["ldrsw Xd, [Xn, x27]"], fastpath_64b=["ldrsw Xd, [Xn, x27]"],
         force_slowpath=is_slowpath,
     )
 
     # Special variant for the most common modes, as a speedup optimization.
-    ld_thunk(f"qemu_ld_ub_{subtype}_mode02", is_aligned=is_aligned, slowpath_helper="helper_ret_ldub_mmu",
+    ld_thunk(f"qemu_ld_ub_{subtype}_mode02", is_aligned=is_aligned, slowpath_helper="helper_ldub_mmu",
         fastpath_32b=["ldrb Wd, [Xn, x27]"], fastpath_64b=["ldrb Wd, [Xn, x27]"],
         force_slowpath=is_slowpath, immediate=0x02
     )
-    ld_thunk(f"qemu_ld_leq_{subtype}_mode32", is_aligned=is_aligned, slowpath_helper="helper_le_ldq_mmu",
+    ld_thunk(f"qemu_ld_leq_{subtype}_mode32", is_aligned=is_aligned, slowpath_helper="helper_ldq_mmu",
         fastpath_32b=["ldr Xd, [Xn, x27]"], fastpath_64b=["ldr Xd, [Xn, x27]"],
         force_slowpath=is_slowpath, immediate=0x32
     )
-    ld_thunk(f"qemu_ld_leq_{subtype}_mode3a", is_aligned=is_aligned, slowpath_helper="helper_le_ldq_mmu",
+    ld_thunk(f"qemu_ld_leq_{subtype}_mode3a", is_aligned=is_aligned, slowpath_helper="helper_ldq_mmu",
         fastpath_32b=["ldr Xd, [Xn, x27]"], fastpath_64b=["ldr Xd, [Xn, x27]"],
         force_slowpath=is_slowpath, immediate=0x3a
     )
 
-    START_COLLECTION(f"qemu_ld_{subtype}_be")
-
-    # For now, leave the rare/big-endian stuff slow-path only.
-    ld_thunk(f"qemu_ld_beuw_{subtype}", None, None, "helper_be_lduw_mmu",         
-            is_aligned=is_aligned, force_slowpath=is_slowpath)
-    ld_thunk(f"qemu_ld_besw_{subtype}", None, None, "helper_be_lduw_mmu_signed",  
-            is_aligned=is_aligned, force_slowpath=is_slowpath)
-    ld_thunk(f"qemu_ld_beul_{subtype}", None, None, "helper_be_ldul_mmu",         
-            is_aligned=is_aligned, force_slowpath=is_slowpath)
-    ld_thunk(f"qemu_ld_besl_{subtype}", None, None, "helper_be_ldul_mmu_signed",  
-            is_aligned=is_aligned, force_slowpath=is_slowpath)
-    ld_thunk(f"qemu_ld_beq_{subtype}",  None, None, "helper_be_ldq_mmu",          
-            is_aligned=is_aligned, force_slowpath=is_slowpath)
-
-
-
 
 # Handlers for QEMU_ST, which handles guest -> host stores.
 for subtype in ('aligned', 'unaligned', 'slowpath'):
@@ -1001,47 +985,37 @@ def vector_logic_dnm(name, operation):
 
     START_COLLECTION(f"qemu_st_{subtype}_le")
 
-    st_thunk(f"qemu_st_ub_{subtype}", is_aligned=is_aligned, slowpath_helper="helper_ret_stb_mmu",
+    st_thunk(f"qemu_st_ub_{subtype}", is_aligned=is_aligned, slowpath_helper="helper_stb_mmu",
         fastpath_32b=["strb Wd, [Xn, x27]"], fastpath_64b=["strb Wd, [Xn, x27]"],
         force_slowpath=is_slowpath,
     )
-    st_thunk(f"qemu_st_leuw_{subtype}", is_aligned=is_aligned, slowpath_helper="helper_le_stw_mmu",
+    st_thunk(f"qemu_st_leuw_{subtype}", is_aligned=is_aligned, slowpath_helper="helper_stw_mmu",
         fastpath_32b=["strh Wd, [Xn, x27]"], fastpath_64b=["strh Wd, [Xn, x27]"],
         force_slowpath=is_slowpath,
     )
-    st_thunk(f"qemu_st_leul_{subtype}", is_aligned=is_aligned, slowpath_helper="helper_le_stl_mmu",
+    st_thunk(f"qemu_st_leul_{subtype}", is_aligned=is_aligned, slowpath_helper="helper_stl_mmu",
         fastpath_32b=["str Wd, [Xn, x27]"], fastpath_64b=["str Wd, [Xn, x27]"],
         force_slowpath=is_slowpath,
     )
-    st_thunk(f"qemu_st_leq_{subtype}", is_aligned=is_aligned, slowpath_helper="helper_le_stq_mmu",
+    st_thunk(f"qemu_st_leq_{subtype}", is_aligned=is_aligned, slowpath_helper="helper_stq_mmu",
         fastpath_32b=["str Xd, [Xn, x27]"], fastpath_64b=["str Xd, [Xn, x27]"],
         force_slowpath=is_slowpath,
     )
     
     # Special optimization for the most common modes.
-    st_thunk(f"qemu_st_ub_{subtype}_mode02", is_aligned=is_aligned, slowpath_helper="helper_ret_stb_mmu",
+    st_thunk(f"qemu_st_ub_{subtype}_mode02", is_aligned=is_aligned, slowpath_helper="helper_stb_mmu",
         fastpath_32b=["strb Wd, [Xn, x27]"], fastpath_64b=["strb Wd, [Xn, x27]"],
         force_slowpath=is_slowpath, immediate=0x02
     )
-    st_thunk(f"qemu_st_leq_{subtype}_mode32", is_aligned=is_aligned, slowpath_helper="helper_le_stq_mmu",
+    st_thunk(f"qemu_st_leq_{subtype}_mode32", is_aligned=is_aligned, slowpath_helper="helper_stq_mmu",
         fastpath_32b=["str Xd, [Xn, x27]"], fastpath_64b=["str Xd, [Xn, x27]"],
         force_slowpath=is_slowpath, immediate=0x32
     )
-    st_thunk(f"qemu_st_leq_{subtype}_mode3a", is_aligned=is_aligned, slowpath_helper="helper_le_stq_mmu",
+    st_thunk(f"qemu_st_leq_{subtype}_mode3a", is_aligned=is_aligned, slowpath_helper="helper_stq_mmu",
         fastpath_32b=["str Xd, [Xn, x27]"], fastpath_64b=["str Xd, [Xn, x27]"],
         force_slowpath=is_slowpath, immediate=0x3a
     )
 
-    START_COLLECTION(f"qemu_st_{subtype}_be")
-
-    # For now, leave the rare/big-endian stuff slow-path only.
-    st_thunk(f"qemu_st_beuw_{subtype}", None, None, "helper_be_stw_mmu",  
-            is_aligned=is_aligned, force_slowpath=is_slowpath)
-    st_thunk(f"qemu_st_beul_{subtype}", None, None, "helper_be_stl_mmu",
-            is_aligned=is_aligned, force_slowpath=is_slowpath)
-    st_thunk(f"qemu_st_beq_{subtype}",  None, None, "helper_be_stq_mmu",
-            is_aligned=is_aligned, force_slowpath=is_slowpath)
-
 
 #
 # SIMD/Vector ops

+ 0 - 12
tcg/meson.build

@@ -68,24 +68,12 @@ if get_option('tcg_threaded_interpreter')
       'tcti_qemu_ld_unaligned_unsigned_le_gadgets.h',
       'tcti_qemu_ld_slowpath_unsigned_le_gadgets.c',
       'tcti_qemu_ld_slowpath_unsigned_le_gadgets.h',
-      'tcti_qemu_ld_aligned_be_gadgets.c',
-      'tcti_qemu_ld_aligned_be_gadgets.h',
-      'tcti_qemu_ld_unaligned_be_gadgets.c',
-      'tcti_qemu_ld_unaligned_be_gadgets.h',
-      'tcti_qemu_ld_slowpath_be_gadgets.c',
-      'tcti_qemu_ld_slowpath_be_gadgets.h',
       'tcti_qemu_st_aligned_le_gadgets.c',
       'tcti_qemu_st_aligned_le_gadgets.h',
       'tcti_qemu_st_unaligned_le_gadgets.c',
       'tcti_qemu_st_unaligned_le_gadgets.h',
       'tcti_qemu_st_slowpath_le_gadgets.c',
       'tcti_qemu_st_slowpath_le_gadgets.h',
-      'tcti_qemu_st_aligned_be_gadgets.c',
-      'tcti_qemu_st_aligned_be_gadgets.h',
-      'tcti_qemu_st_unaligned_be_gadgets.c',
-      'tcti_qemu_st_unaligned_be_gadgets.h',
-      'tcti_qemu_st_slowpath_be_gadgets.c',
-      'tcti_qemu_st_slowpath_be_gadgets.h',
       'tcti_simd_base_gadgets.c',
       'tcti_simd_base_gadgets.h',
       'tcti_simd_arithmetic_gadgets.c',

+ 1 - 1
tcg/tcg.c

@@ -755,7 +755,7 @@ static const TCGTargetOpDef constraint_sets[] = {
 
 #include "tcg-target.c.inc"
 
-#ifndef CONFIG_TCG_INTERPRETER
+#if !defined(CONFIG_TCG_INTERPRETER) && !defined(CONFIG_TCG_THREADED_INTERPRETER)
 /* Validate CPUTLBDescFast placement. */
 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) -
                         sizeof(CPUNegativeOffsetState))