Selaa lähdekoodia

Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20150212' into staging

Convert to linked list.

# gpg: Signature made Fri 13 Feb 2015 05:40:41 GMT using RSA key ID 4DD0279B
# gpg: Good signature from "Richard Henderson <rth7680@gmail.com>"
# gpg:                 aka "Richard Henderson <rth@redhat.com>"
# gpg:                 aka "Richard Henderson <rth@twiddle.net>"

* remotes/rth/tags/pull-tcg-20150212:
  tcg: Remove unused opcodes
  tcg: Implement insert_op_before
  tcg: Remove opcodes instead of noping them out
  tcg: Put opcodes in a linked list
  tcg: Introduce tcg_op_buf_count and tcg_op_buf_full
  tcg: Move emit of INDEX_op_end into gen_tb_end
  tcg: Reduce ifdefs in tcg-op.c
  tcg: Move some opcode generation functions out of line

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Peter Maydell 10 vuotta sitten
vanhempi
commit
cd2d554127

+ 1 - 1
Makefile.target

@@ -83,7 +83,7 @@ all: $(PROGS) stap
 #########################################################
 # cpu emulator library
 obj-y = exec.o translate-all.o cpu-exec.o
-obj-y += tcg/tcg.o tcg/optimize.o
+obj-y += tcg/tcg.o tcg/tcg-op.o tcg/optimize.o
 obj-$(CONFIG_TCG_INTERPRETER) += tci.o
 obj-$(CONFIG_TCG_INTERPRETER) += disas/tci.o
 obj-y += fpu/softfloat.o

+ 17 - 5
include/exec/gen-icount.h

@@ -11,8 +11,8 @@ static int exitreq_label;
 
 static inline void gen_tb_start(TranslationBlock *tb)
 {
-    TCGv_i32 count;
-    TCGv_i32 flag;
+    TCGv_i32 count, flag, imm;
+    int i;
 
     exitreq_label = gen_new_label();
     flag = tcg_temp_new_i32();
@@ -21,16 +21,25 @@ static inline void gen_tb_start(TranslationBlock *tb)
     tcg_gen_brcondi_i32(TCG_COND_NE, flag, 0, exitreq_label);
     tcg_temp_free_i32(flag);
 
-    if (!(tb->cflags & CF_USE_ICOUNT))
+    if (!(tb->cflags & CF_USE_ICOUNT)) {
         return;
+    }
 
     icount_label = gen_new_label();
     count = tcg_temp_local_new_i32();
     tcg_gen_ld_i32(count, cpu_env,
                    -ENV_OFFSET + offsetof(CPUState, icount_decr.u32));
+
+    imm = tcg_temp_new_i32();
+    tcg_gen_movi_i32(imm, 0xdeadbeef);
+
     /* This is a horrid hack to allow fixing up the value later.  */
-    icount_arg = tcg_ctx.gen_opparam_ptr + 1;
-    tcg_gen_subi_i32(count, count, 0xdeadbeef);
+    i = tcg_ctx.gen_last_op_idx;
+    i = tcg_ctx.gen_op_buf[i].args;
+    icount_arg = &tcg_ctx.gen_opparam_buf[i + 1];
+
+    tcg_gen_sub_i32(count, count, imm);
+    tcg_temp_free_i32(imm);
 
     tcg_gen_brcondi_i32(TCG_COND_LT, count, 0, icount_label);
     tcg_gen_st16_i32(count, cpu_env,
@@ -48,6 +57,9 @@ static void gen_tb_end(TranslationBlock *tb, int num_insns)
         gen_set_label(icount_label);
         tcg_gen_exit_tb((uintptr_t)tb + TB_EXIT_ICOUNT_EXPIRED);
     }
+
+    /* Terminate the linked list.  */
+    tcg_ctx.gen_op_buf[tcg_ctx.gen_last_op_idx].next = -1;
 }
 
 static inline void gen_io_start(void)

+ 8 - 8
target-alpha/translate.c

@@ -2790,7 +2790,6 @@ static inline void gen_intermediate_code_internal(AlphaCPU *cpu,
     target_ulong pc_start;
     target_ulong pc_mask;
     uint32_t insn;
-    uint16_t *gen_opc_end;
     CPUBreakpoint *bp;
     int j, lj = -1;
     ExitStatus ret;
@@ -2798,7 +2797,6 @@ static inline void gen_intermediate_code_internal(AlphaCPU *cpu,
     int max_insns;
 
     pc_start = tb->pc;
-    gen_opc_end = tcg_ctx.gen_opc_buf + OPC_MAX_SIZE;
 
     ctx.tb = tb;
     ctx.pc = pc_start;
@@ -2839,11 +2837,12 @@ static inline void gen_intermediate_code_internal(AlphaCPU *cpu,
             }
         }
         if (search_pc) {
-            j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
+            j = tcg_op_buf_count();
             if (lj < j) {
                 lj++;
-                while (lj < j)
+                while (lj < j) {
                     tcg_ctx.gen_opc_instr_start[lj++] = 0;
+                }
             }
             tcg_ctx.gen_opc_pc[lj] = ctx.pc;
             tcg_ctx.gen_opc_instr_start[lj] = 1;
@@ -2881,7 +2880,7 @@ static inline void gen_intermediate_code_internal(AlphaCPU *cpu,
            or exhaust instruction count, stop generation.  */
         if (ret == NO_EXIT
             && ((ctx.pc & pc_mask) == 0
-                || tcg_ctx.gen_opc_ptr >= gen_opc_end
+                || tcg_op_buf_full()
                 || num_insns >= max_insns
                 || singlestep
                 || ctx.singlestep_enabled)) {
@@ -2912,12 +2911,13 @@ static inline void gen_intermediate_code_internal(AlphaCPU *cpu,
     }
 
     gen_tb_end(tb, num_insns);
-    *tcg_ctx.gen_opc_ptr = INDEX_op_end;
+
     if (search_pc) {
-        j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
+        j = tcg_op_buf_count();
         lj++;
-        while (lj <= j)
+        while (lj <= j) {
             tcg_ctx.gen_opc_instr_start[lj++] = 0;
+        }
     } else {
         tb->size = ctx.pc - pc_start;
         tb->icount = num_insns;

+ 3 - 7
target-arm/translate-a64.c

@@ -10920,7 +10920,6 @@ void gen_intermediate_code_internal_a64(ARMCPU *cpu,
     CPUARMState *env = &cpu->env;
     DisasContext dc1, *dc = &dc1;
     CPUBreakpoint *bp;
-    uint16_t *gen_opc_end;
     int j, lj;
     target_ulong pc_start;
     target_ulong next_page_start;
@@ -10931,8 +10930,6 @@ void gen_intermediate_code_internal_a64(ARMCPU *cpu,
 
     dc->tb = tb;
 
-    gen_opc_end = tcg_ctx.gen_opc_buf + OPC_MAX_SIZE;
-
     dc->is_jmp = DISAS_NEXT;
     dc->pc = pc_start;
     dc->singlestep_enabled = cs->singlestep_enabled;
@@ -11002,7 +10999,7 @@ void gen_intermediate_code_internal_a64(ARMCPU *cpu,
         }
 
         if (search_pc) {
-            j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
+            j = tcg_op_buf_count();
             if (lj < j) {
                 lj++;
                 while (lj < j) {
@@ -11052,7 +11049,7 @@ void gen_intermediate_code_internal_a64(ARMCPU *cpu,
          * ensures prefetch aborts occur at the right place.
          */
         num_insns++;
-    } while (!dc->is_jmp && tcg_ctx.gen_opc_ptr < gen_opc_end &&
+    } while (!dc->is_jmp && !tcg_op_buf_full() &&
              !cs->singlestep_enabled &&
              !singlestep &&
              !dc->ss_active &&
@@ -11112,7 +11109,6 @@ void gen_intermediate_code_internal_a64(ARMCPU *cpu,
 
 done_generating:
     gen_tb_end(tb, num_insns);
-    *tcg_ctx.gen_opc_ptr = INDEX_op_end;
 
 #ifdef DEBUG_DISAS
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
@@ -11124,7 +11120,7 @@ done_generating:
     }
 #endif
     if (search_pc) {
-        j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
+        j = tcg_op_buf_count();
         lj++;
         while (lj <= j) {
             tcg_ctx.gen_opc_instr_start[lj++] = 0;

+ 3 - 7
target-arm/translate.c

@@ -11025,7 +11025,6 @@ static inline void gen_intermediate_code_internal(ARMCPU *cpu,
     CPUARMState *env = &cpu->env;
     DisasContext dc1, *dc = &dc1;
     CPUBreakpoint *bp;
-    uint16_t *gen_opc_end;
     int j, lj;
     target_ulong pc_start;
     target_ulong next_page_start;
@@ -11046,8 +11045,6 @@ static inline void gen_intermediate_code_internal(ARMCPU *cpu,
 
     dc->tb = tb;
 
-    gen_opc_end = tcg_ctx.gen_opc_buf + OPC_MAX_SIZE;
-
     dc->is_jmp = DISAS_NEXT;
     dc->pc = pc_start;
     dc->singlestep_enabled = cs->singlestep_enabled;
@@ -11182,7 +11179,7 @@ static inline void gen_intermediate_code_internal(ARMCPU *cpu,
             }
         }
         if (search_pc) {
-            j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
+            j = tcg_op_buf_count();
             if (lj < j) {
                 lj++;
                 while (lj < j)
@@ -11248,7 +11245,7 @@ static inline void gen_intermediate_code_internal(ARMCPU *cpu,
          * Also stop translation when a page boundary is reached.  This
          * ensures prefetch aborts occur at the right place.  */
         num_insns ++;
-    } while (!dc->is_jmp && tcg_ctx.gen_opc_ptr < gen_opc_end &&
+    } while (!dc->is_jmp && !tcg_op_buf_full() &&
              !cs->singlestep_enabled &&
              !singlestep &&
              !dc->ss_active &&
@@ -11357,7 +11354,6 @@ static inline void gen_intermediate_code_internal(ARMCPU *cpu,
 
 done_generating:
     gen_tb_end(tb, num_insns);
-    *tcg_ctx.gen_opc_ptr = INDEX_op_end;
 
 #ifdef DEBUG_DISAS
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
@@ -11369,7 +11365,7 @@ done_generating:
     }
 #endif
     if (search_pc) {
-        j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
+        j = tcg_op_buf_count();
         lj++;
         while (lj <= j)
             tcg_ctx.gen_opc_instr_start[lj++] = 0;

+ 6 - 9
target-cris/translate.c

@@ -3116,7 +3116,6 @@ gen_intermediate_code_internal(CRISCPU *cpu, TranslationBlock *tb,
 {
     CPUState *cs = CPU(cpu);
     CPUCRISState *env = &cpu->env;
-    uint16_t *gen_opc_end;
     uint32_t pc_start;
     unsigned int insn_len;
     int j, lj;
@@ -3142,8 +3141,6 @@ gen_intermediate_code_internal(CRISCPU *cpu, TranslationBlock *tb,
     dc->cpu = cpu;
     dc->tb = tb;
 
-    gen_opc_end = tcg_ctx.gen_opc_buf + OPC_MAX_SIZE;
-
     dc->is_jmp = DISAS_NEXT;
     dc->ppc = pc_start;
     dc->pc = pc_start;
@@ -3207,7 +3204,7 @@ gen_intermediate_code_internal(CRISCPU *cpu, TranslationBlock *tb,
         check_breakpoint(env, dc);
 
         if (search_pc) {
-            j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
+            j = tcg_op_buf_count();
             if (lj < j) {
                 lj++;
                 while (lj < j) {
@@ -3291,7 +3288,7 @@ gen_intermediate_code_internal(CRISCPU *cpu, TranslationBlock *tb,
             break;
         }
     } while (!dc->is_jmp && !dc->cpustate_changed
-            && tcg_ctx.gen_opc_ptr < gen_opc_end
+            && !tcg_op_buf_full()
             && !singlestep
             && (dc->pc < next_page_start)
             && num_insns < max_insns);
@@ -3344,9 +3341,9 @@ gen_intermediate_code_internal(CRISCPU *cpu, TranslationBlock *tb,
         }
     }
     gen_tb_end(tb, num_insns);
-    *tcg_ctx.gen_opc_ptr = INDEX_op_end;
+
     if (search_pc) {
-        j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
+        j = tcg_op_buf_count();
         lj++;
         while (lj <= j) {
             tcg_ctx.gen_opc_instr_start[lj++] = 0;
@@ -3361,8 +3358,8 @@ gen_intermediate_code_internal(CRISCPU *cpu, TranslationBlock *tb,
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
         log_target_disas(env, pc_start, dc->pc - pc_start,
                          env->pregs[PR_VR]);
-        qemu_log("\nisize=%d osize=%td\n",
-            dc->pc - pc_start, tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf);
+        qemu_log("\nisize=%d osize=%d\n",
+                 dc->pc - pc_start, tcg_op_buf_count());
     }
 #endif
 #endif

+ 4 - 7
target-i386/translate.c

@@ -7913,7 +7913,6 @@ static inline void gen_intermediate_code_internal(X86CPU *cpu,
     CPUX86State *env = &cpu->env;
     DisasContext dc1, *dc = &dc1;
     target_ulong pc_ptr;
-    uint16_t *gen_opc_end;
     CPUBreakpoint *bp;
     int j, lj;
     uint64_t flags;
@@ -7993,8 +7992,6 @@ static inline void gen_intermediate_code_internal(X86CPU *cpu,
     cpu_ptr1 = tcg_temp_new_ptr();
     cpu_cc_srcT = tcg_temp_local_new();
 
-    gen_opc_end = tcg_ctx.gen_opc_buf + OPC_MAX_SIZE;
-
     dc->is_jmp = DISAS_NEXT;
     pc_ptr = pc_start;
     lj = -1;
@@ -8015,7 +8012,7 @@ static inline void gen_intermediate_code_internal(X86CPU *cpu,
             }
         }
         if (search_pc) {
-            j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
+            j = tcg_op_buf_count();
             if (lj < j) {
                 lj++;
                 while (lj < j)
@@ -8060,7 +8057,7 @@ static inline void gen_intermediate_code_internal(X86CPU *cpu,
             break;
         }
         /* if too long translation, stop generation too */
-        if (tcg_ctx.gen_opc_ptr >= gen_opc_end ||
+        if (tcg_op_buf_full() ||
             (pc_ptr - pc_start) >= (TARGET_PAGE_SIZE - 32) ||
             num_insns >= max_insns) {
             gen_jmp_im(pc_ptr - dc->cs_base);
@@ -8077,10 +8074,10 @@ static inline void gen_intermediate_code_internal(X86CPU *cpu,
         gen_io_end();
 done_generating:
     gen_tb_end(tb, num_insns);
-    *tcg_ctx.gen_opc_ptr = INDEX_op_end;
+
     /* we don't forget to fill the last values */
     if (search_pc) {
-        j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
+        j = tcg_op_buf_count();
         lj++;
         while (lj <= j)
             tcg_ctx.gen_opc_instr_start[lj++] = 0;

+ 6 - 10
target-lm32/translate.c

@@ -1062,7 +1062,6 @@ void gen_intermediate_code_internal(LM32CPU *cpu,
     CPUState *cs = CPU(cpu);
     CPULM32State *env = &cpu->env;
     struct DisasContext ctx, *dc = &ctx;
-    uint16_t *gen_opc_end;
     uint32_t pc_start;
     int j, lj;
     uint32_t next_page_start;
@@ -1075,8 +1074,6 @@ void gen_intermediate_code_internal(LM32CPU *cpu,
     dc->num_watchpoints = cpu->num_watchpoints;
     dc->tb = tb;
 
-    gen_opc_end = tcg_ctx.gen_opc_buf + OPC_MAX_SIZE;
-
     dc->is_jmp = DISAS_NEXT;
     dc->pc = pc_start;
     dc->singlestep_enabled = cs->singlestep_enabled;
@@ -1100,7 +1097,7 @@ void gen_intermediate_code_internal(LM32CPU *cpu,
         check_breakpoint(env, dc);
 
         if (search_pc) {
-            j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
+            j = tcg_op_buf_count();
             if (lj < j) {
                 lj++;
                 while (lj < j) {
@@ -1124,7 +1121,7 @@ void gen_intermediate_code_internal(LM32CPU *cpu,
         num_insns++;
 
     } while (!dc->is_jmp
-         && tcg_ctx.gen_opc_ptr < gen_opc_end
+         && !tcg_op_buf_full()
          && !cs->singlestep_enabled
          && !singlestep
          && (dc->pc < next_page_start)
@@ -1158,9 +1155,9 @@ void gen_intermediate_code_internal(LM32CPU *cpu,
     }
 
     gen_tb_end(tb, num_insns);
-    *tcg_ctx.gen_opc_ptr = INDEX_op_end;
+
     if (search_pc) {
-        j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
+        j = tcg_op_buf_count();
         lj++;
         while (lj <= j) {
             tcg_ctx.gen_opc_instr_start[lj++] = 0;
@@ -1174,9 +1171,8 @@ void gen_intermediate_code_internal(LM32CPU *cpu,
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
         qemu_log("\n");
         log_target_disas(env, pc_start, dc->pc - pc_start, 0);
-        qemu_log("\nisize=%d osize=%td\n",
-            dc->pc - pc_start, tcg_ctx.gen_opc_ptr -
-            tcg_ctx.gen_opc_buf);
+        qemu_log("\nisize=%d osize=%d\n",
+                 dc->pc - pc_start, tcg_op_buf_count());
     }
 #endif
 }

+ 3 - 7
target-m68k/translate.c

@@ -2980,7 +2980,6 @@ gen_intermediate_code_internal(M68kCPU *cpu, TranslationBlock *tb,
     CPUState *cs = CPU(cpu);
     CPUM68KState *env = &cpu->env;
     DisasContext dc1, *dc = &dc1;
-    uint16_t *gen_opc_end;
     CPUBreakpoint *bp;
     int j, lj;
     target_ulong pc_start;
@@ -2993,8 +2992,6 @@ gen_intermediate_code_internal(M68kCPU *cpu, TranslationBlock *tb,
 
     dc->tb = tb;
 
-    gen_opc_end = tcg_ctx.gen_opc_buf + OPC_MAX_SIZE;
-
     dc->env = env;
     dc->is_jmp = DISAS_NEXT;
     dc->pc = pc_start;
@@ -3026,7 +3023,7 @@ gen_intermediate_code_internal(M68kCPU *cpu, TranslationBlock *tb,
                 break;
         }
         if (search_pc) {
-            j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
+            j = tcg_op_buf_count();
             if (lj < j) {
                 lj++;
                 while (lj < j)
@@ -3041,7 +3038,7 @@ gen_intermediate_code_internal(M68kCPU *cpu, TranslationBlock *tb,
         dc->insn_pc = dc->pc;
 	disas_m68k_insn(env, dc);
         num_insns++;
-    } while (!dc->is_jmp && tcg_ctx.gen_opc_ptr < gen_opc_end &&
+    } while (!dc->is_jmp && !tcg_op_buf_full() &&
              !cs->singlestep_enabled &&
              !singlestep &&
              (pc_offset) < (TARGET_PAGE_SIZE - 32) &&
@@ -3075,7 +3072,6 @@ gen_intermediate_code_internal(M68kCPU *cpu, TranslationBlock *tb,
         }
     }
     gen_tb_end(tb, num_insns);
-    *tcg_ctx.gen_opc_ptr = INDEX_op_end;
 
 #ifdef DEBUG_DISAS
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
@@ -3086,7 +3082,7 @@ gen_intermediate_code_internal(M68kCPU *cpu, TranslationBlock *tb,
     }
 #endif
     if (search_pc) {
-        j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
+        j = tcg_op_buf_count();
         lj++;
         while (lj <= j)
             tcg_ctx.gen_opc_instr_start[lj++] = 0;

+ 9 - 13
target-microblaze/translate.c

@@ -1673,7 +1673,6 @@ gen_intermediate_code_internal(MicroBlazeCPU *cpu, TranslationBlock *tb,
 {
     CPUState *cs = CPU(cpu);
     CPUMBState *env = &cpu->env;
-    uint16_t *gen_opc_end;
     uint32_t pc_start;
     int j, lj;
     struct DisasContext ctx;
@@ -1688,8 +1687,6 @@ gen_intermediate_code_internal(MicroBlazeCPU *cpu, TranslationBlock *tb,
     dc->tb = tb;
     org_flags = dc->synced_flags = dc->tb_flags = tb->flags;
 
-    gen_opc_end = tcg_ctx.gen_opc_buf + OPC_MAX_SIZE;
-
     dc->is_jmp = DISAS_NEXT;
     dc->jmp = 0;
     dc->delayed_branch = !!(dc->tb_flags & D_FLAG);
@@ -1732,7 +1729,7 @@ gen_intermediate_code_internal(MicroBlazeCPU *cpu, TranslationBlock *tb,
         check_breakpoint(env, dc);
 
         if (search_pc) {
-            j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
+            j = tcg_op_buf_count();
             if (lj < j) {
                 lj++;
                 while (lj < j)
@@ -1795,10 +1792,10 @@ gen_intermediate_code_internal(MicroBlazeCPU *cpu, TranslationBlock *tb,
             break;
         }
     } while (!dc->is_jmp && !dc->cpustate_changed
-         && tcg_ctx.gen_opc_ptr < gen_opc_end
-                 && !singlestep
-         && (dc->pc < next_page_start)
-                 && num_insns < max_insns);
+             && !tcg_op_buf_full()
+             && !singlestep
+             && (dc->pc < next_page_start)
+             && num_insns < max_insns);
 
     npc = dc->pc;
     if (dc->jmp == JMP_DIRECT || dc->jmp == JMP_DIRECT_CC) {
@@ -1846,9 +1843,9 @@ gen_intermediate_code_internal(MicroBlazeCPU *cpu, TranslationBlock *tb,
         }
     }
     gen_tb_end(tb, num_insns);
-    *tcg_ctx.gen_opc_ptr = INDEX_op_end;
+
     if (search_pc) {
-        j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
+        j = tcg_op_buf_count();
         lj++;
         while (lj <= j)
             tcg_ctx.gen_opc_instr_start[lj++] = 0;
@@ -1864,9 +1861,8 @@ gen_intermediate_code_internal(MicroBlazeCPU *cpu, TranslationBlock *tb,
 #if DISAS_GNU
         log_target_disas(env, pc_start, dc->pc - pc_start, 0);
 #endif
-        qemu_log("\nisize=%d osize=%td\n",
-            dc->pc - pc_start, tcg_ctx.gen_opc_ptr -
-            tcg_ctx.gen_opc_buf);
+        qemu_log("\nisize=%d osize=%d\n",
+                 dc->pc - pc_start, tcg_op_buf_count());
     }
 #endif
 #endif

+ 4 - 6
target-mips/translate.c

@@ -19095,7 +19095,6 @@ gen_intermediate_code_internal(MIPSCPU *cpu, TranslationBlock *tb,
     CPUMIPSState *env = &cpu->env;
     DisasContext ctx;
     target_ulong pc_start;
-    uint16_t *gen_opc_end;
     CPUBreakpoint *bp;
     int j, lj = -1;
     int num_insns;
@@ -19107,7 +19106,6 @@ gen_intermediate_code_internal(MIPSCPU *cpu, TranslationBlock *tb,
         qemu_log("search pc %d\n", search_pc);
 
     pc_start = tb->pc;
-    gen_opc_end = tcg_ctx.gen_opc_buf + OPC_MAX_SIZE;
     ctx.pc = pc_start;
     ctx.saved_pc = -1;
     ctx.singlestep_enabled = cs->singlestep_enabled;
@@ -19151,7 +19149,7 @@ gen_intermediate_code_internal(MIPSCPU *cpu, TranslationBlock *tb,
         }
 
         if (search_pc) {
-            j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
+            j = tcg_op_buf_count();
             if (lj < j) {
                 lj++;
                 while (lj < j)
@@ -19209,7 +19207,7 @@ gen_intermediate_code_internal(MIPSCPU *cpu, TranslationBlock *tb,
         if ((ctx.pc & (TARGET_PAGE_SIZE - 1)) == 0)
             break;
 
-        if (tcg_ctx.gen_opc_ptr >= gen_opc_end) {
+        if (tcg_op_buf_full()) {
             break;
         }
 
@@ -19244,9 +19242,9 @@ gen_intermediate_code_internal(MIPSCPU *cpu, TranslationBlock *tb,
     }
 done_generating:
     gen_tb_end(tb, num_insns);
-    *tcg_ctx.gen_opc_ptr = INDEX_op_end;
+
     if (search_pc) {
-        j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
+        j = tcg_op_buf_count();
         lj++;
         while (lj <= j)
             tcg_ctx.gen_opc_instr_start[lj++] = 0;

+ 4 - 6
target-moxie/translate.c

@@ -827,14 +827,12 @@ gen_intermediate_code_internal(MoxieCPU *cpu, TranslationBlock *tb,
     CPUState *cs = CPU(cpu);
     DisasContext ctx;
     target_ulong pc_start;
-    uint16_t *gen_opc_end;
     CPUBreakpoint *bp;
     int j, lj = -1;
     CPUMoxieState *env = &cpu->env;
     int num_insns;
 
     pc_start = tb->pc;
-    gen_opc_end = tcg_ctx.gen_opc_buf + OPC_MAX_SIZE;
     ctx.pc = pc_start;
     ctx.saved_pc = -1;
     ctx.tb = tb;
@@ -857,7 +855,7 @@ gen_intermediate_code_internal(MoxieCPU *cpu, TranslationBlock *tb,
         }
 
         if (search_pc) {
-            j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
+            j = tcg_op_buf_count();
             if (lj < j) {
                 lj++;
                 while (lj < j) {
@@ -879,7 +877,7 @@ gen_intermediate_code_internal(MoxieCPU *cpu, TranslationBlock *tb,
         if ((ctx.pc & (TARGET_PAGE_SIZE - 1)) == 0) {
             break;
         }
-    } while (ctx.bstate == BS_NONE && tcg_ctx.gen_opc_ptr < gen_opc_end);
+    } while (ctx.bstate == BS_NONE && !tcg_op_buf_full());
 
     if (cs->singlestep_enabled) {
         tcg_gen_movi_tl(cpu_pc, ctx.pc);
@@ -900,9 +898,9 @@ gen_intermediate_code_internal(MoxieCPU *cpu, TranslationBlock *tb,
     }
  done_generating:
     gen_tb_end(tb, num_insns);
-    *tcg_ctx.gen_opc_ptr = INDEX_op_end;
+
     if (search_pc) {
-        j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
+        j = tcg_op_buf_count();
         lj++;
         while (lj <= j) {
             tcg_ctx.gen_opc_instr_start[lj++] = 0;

+ 6 - 9
target-openrisc/translate.c

@@ -1642,7 +1642,6 @@ static inline void gen_intermediate_code_internal(OpenRISCCPU *cpu,
 {
     CPUState *cs = CPU(cpu);
     struct DisasContext ctx, *dc = &ctx;
-    uint16_t *gen_opc_end;
     uint32_t pc_start;
     int j, k;
     uint32_t next_page_start;
@@ -1652,7 +1651,6 @@ static inline void gen_intermediate_code_internal(OpenRISCCPU *cpu,
     pc_start = tb->pc;
     dc->tb = tb;
 
-    gen_opc_end = tcg_ctx.gen_opc_buf + OPC_MAX_SIZE;
     dc->is_jmp = DISAS_NEXT;
     dc->ppc = pc_start;
     dc->pc = pc_start;
@@ -1680,7 +1678,7 @@ static inline void gen_intermediate_code_internal(OpenRISCCPU *cpu,
     do {
         check_breakpoint(cpu, dc);
         if (search_pc) {
-            j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
+            j = tcg_op_buf_count();
             if (k < j) {
                 k++;
                 while (k < j) {
@@ -1721,7 +1719,7 @@ static inline void gen_intermediate_code_internal(OpenRISCCPU *cpu,
             }
         }
     } while (!dc->is_jmp
-             && tcg_ctx.gen_opc_ptr < gen_opc_end
+             && !tcg_op_buf_full()
              && !cs->singlestep_enabled
              && !singlestep
              && (dc->pc < next_page_start)
@@ -1759,9 +1757,9 @@ static inline void gen_intermediate_code_internal(OpenRISCCPU *cpu,
     }
 
     gen_tb_end(tb, num_insns);
-    *tcg_ctx.gen_opc_ptr = INDEX_op_end;
+
     if (search_pc) {
-        j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
+        j = tcg_op_buf_count();
         k++;
         while (k <= j) {
             tcg_ctx.gen_opc_instr_start[k++] = 0;
@@ -1775,9 +1773,8 @@ static inline void gen_intermediate_code_internal(OpenRISCCPU *cpu,
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
         qemu_log("\n");
         log_target_disas(&cpu->env, pc_start, dc->pc - pc_start, 0);
-        qemu_log("\nisize=%d osize=%td\n",
-            dc->pc - pc_start, tcg_ctx.gen_opc_ptr -
-            tcg_ctx.gen_opc_buf);
+        qemu_log("\nisize=%d osize=%d\n",
+                 dc->pc - pc_start, tcg_op_buf_count());
     }
 #endif
 }

+ 4 - 7
target-ppc/translate.c

@@ -11415,14 +11415,12 @@ static inline void gen_intermediate_code_internal(PowerPCCPU *cpu,
     DisasContext ctx, *ctxp = &ctx;
     opc_handler_t **table, *handler;
     target_ulong pc_start;
-    uint16_t *gen_opc_end;
     CPUBreakpoint *bp;
     int j, lj = -1;
     int num_insns;
     int max_insns;
 
     pc_start = tb->pc;
-    gen_opc_end = tcg_ctx.gen_opc_buf + OPC_MAX_SIZE;
     ctx.nip = pc_start;
     ctx.tb = tb;
     ctx.exception = POWERPC_EXCP_NONE;
@@ -11481,8 +11479,7 @@ static inline void gen_intermediate_code_internal(PowerPCCPU *cpu,
     gen_tb_start(tb);
     tcg_clear_temp_count();
     /* Set env in case of segfault during code fetch */
-    while (ctx.exception == POWERPC_EXCP_NONE
-            && tcg_ctx.gen_opc_ptr < gen_opc_end) {
+    while (ctx.exception == POWERPC_EXCP_NONE && !tcg_op_buf_full()) {
         if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
             QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
                 if (bp->pc == ctx.nip) {
@@ -11492,7 +11489,7 @@ static inline void gen_intermediate_code_internal(PowerPCCPU *cpu,
             }
         }
         if (unlikely(search_pc)) {
-            j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
+            j = tcg_op_buf_count();
             if (lj < j) {
                 lj++;
                 while (lj < j)
@@ -11598,9 +11595,9 @@ static inline void gen_intermediate_code_internal(PowerPCCPU *cpu,
         tcg_gen_exit_tb(0);
     }
     gen_tb_end(tb, num_insns);
-    *tcg_ctx.gen_opc_ptr = INDEX_op_end;
+
     if (unlikely(search_pc)) {
-        j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
+        j = tcg_op_buf_count();
         lj++;
         while (lj <= j)
             tcg_ctx.gen_opc_instr_start[lj++] = 0;

+ 4 - 7
target-s390x/translate.c

@@ -4832,7 +4832,6 @@ static inline void gen_intermediate_code_internal(S390CPU *cpu,
     DisasContext dc;
     target_ulong pc_start;
     uint64_t next_page_start;
-    uint16_t *gen_opc_end;
     int j, lj = -1;
     int num_insns, max_insns;
     CPUBreakpoint *bp;
@@ -4851,8 +4850,6 @@ static inline void gen_intermediate_code_internal(S390CPU *cpu,
     dc.cc_op = CC_OP_DYNAMIC;
     do_debug = dc.singlestep_enabled = cs->singlestep_enabled;
 
-    gen_opc_end = tcg_ctx.gen_opc_buf + OPC_MAX_SIZE;
-
     next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
 
     num_insns = 0;
@@ -4865,7 +4862,7 @@ static inline void gen_intermediate_code_internal(S390CPU *cpu,
 
     do {
         if (search_pc) {
-            j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
+            j = tcg_op_buf_count();
             if (lj < j) {
                 lj++;
                 while (lj < j) {
@@ -4903,7 +4900,7 @@ static inline void gen_intermediate_code_internal(S390CPU *cpu,
            or exhaust instruction count, stop generation.  */
         if (status == NO_EXIT
             && (dc.pc >= next_page_start
-                || tcg_ctx.gen_opc_ptr >= gen_opc_end
+                || tcg_op_buf_full()
                 || num_insns >= max_insns
                 || singlestep
                 || cs->singlestep_enabled)) {
@@ -4938,9 +4935,9 @@ static inline void gen_intermediate_code_internal(S390CPU *cpu,
     }
 
     gen_tb_end(tb, num_insns);
-    *tcg_ctx.gen_opc_ptr = INDEX_op_end;
+
     if (search_pc) {
-        j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
+        j = tcg_op_buf_count();
         lj++;
         while (lj <= j) {
             tcg_ctx.gen_opc_instr_start[lj++] = 0;

+ 4 - 6
target-sh4/translate.c

@@ -1865,14 +1865,12 @@ gen_intermediate_code_internal(SuperHCPU *cpu, TranslationBlock *tb,
     CPUSH4State *env = &cpu->env;
     DisasContext ctx;
     target_ulong pc_start;
-    static uint16_t *gen_opc_end;
     CPUBreakpoint *bp;
     int i, ii;
     int num_insns;
     int max_insns;
 
     pc_start = tb->pc;
-    gen_opc_end = tcg_ctx.gen_opc_buf + OPC_MAX_SIZE;
     ctx.pc = pc_start;
     ctx.flags = (uint32_t)tb->flags;
     ctx.bstate = BS_NONE;
@@ -1891,7 +1889,7 @@ gen_intermediate_code_internal(SuperHCPU *cpu, TranslationBlock *tb,
     if (max_insns == 0)
         max_insns = CF_COUNT_MASK;
     gen_tb_start(tb);
-    while (ctx.bstate == BS_NONE && tcg_ctx.gen_opc_ptr < gen_opc_end) {
+    while (ctx.bstate == BS_NONE && !tcg_op_buf_full()) {
         if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
             QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
                 if (ctx.pc == bp->pc) {
@@ -1904,7 +1902,7 @@ gen_intermediate_code_internal(SuperHCPU *cpu, TranslationBlock *tb,
 	    }
 	}
         if (search_pc) {
-            i = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
+            i = tcg_op_buf_count();
             if (ii < i) {
                 ii++;
                 while (ii < i)
@@ -1962,9 +1960,9 @@ gen_intermediate_code_internal(SuperHCPU *cpu, TranslationBlock *tb,
     }
 
     gen_tb_end(tb, num_insns);
-    *tcg_ctx.gen_opc_ptr = INDEX_op_end;
+
     if (search_pc) {
-        i = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
+        i = tcg_op_buf_count();
         ii++;
         while (ii <= i)
             tcg_ctx.gen_opc_instr_start[ii++] = 0;

+ 4 - 6
target-sparc/translate.c

@@ -5223,7 +5223,6 @@ static inline void gen_intermediate_code_internal(SPARCCPU *cpu,
     CPUState *cs = CPU(cpu);
     CPUSPARCState *env = &cpu->env;
     target_ulong pc_start, last_pc;
-    uint16_t *gen_opc_end;
     DisasContext dc1, *dc = &dc1;
     CPUBreakpoint *bp;
     int j, lj = -1;
@@ -5243,7 +5242,6 @@ static inline void gen_intermediate_code_internal(SPARCCPU *cpu,
     dc->fpu_enabled = tb_fpu_enabled(tb->flags);
     dc->address_mask_32bit = tb_am_enabled(tb->flags);
     dc->singlestep = (cs->singlestep_enabled || singlestep);
-    gen_opc_end = tcg_ctx.gen_opc_buf + OPC_MAX_SIZE;
 
     num_insns = 0;
     max_insns = tb->cflags & CF_COUNT_MASK;
@@ -5265,7 +5263,7 @@ static inline void gen_intermediate_code_internal(SPARCCPU *cpu,
         }
         if (spc) {
             qemu_log("Search PC...\n");
-            j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
+            j = tcg_op_buf_count();
             if (lj < j) {
                 lj++;
                 while (lj < j)
@@ -5298,7 +5296,7 @@ static inline void gen_intermediate_code_internal(SPARCCPU *cpu,
         if (dc->singlestep) {
             break;
         }
-    } while ((tcg_ctx.gen_opc_ptr < gen_opc_end) &&
+    } while (!tcg_op_buf_full() &&
              (dc->pc - pc_start) < (TARGET_PAGE_SIZE - 32) &&
              num_insns < max_insns);
 
@@ -5320,9 +5318,9 @@ static inline void gen_intermediate_code_internal(SPARCCPU *cpu,
         }
     }
     gen_tb_end(tb, num_insns);
-    *tcg_ctx.gen_opc_ptr = INDEX_op_end;
+
     if (spc) {
-        j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
+        j = tcg_op_buf_count();
         lj++;
         while (lj <= j)
             tcg_ctx.gen_opc_instr_start[lj++] = 0;

+ 1 - 4
target-tricore/translate.c

@@ -5500,7 +5500,6 @@ gen_intermediate_code_internal(TriCoreCPU *cpu, struct TranslationBlock *tb,
     DisasContext ctx;
     target_ulong pc_start;
     int num_insns;
-    uint16_t *gen_opc_end;
 
     if (search_pc) {
         qemu_log("search pc %d\n", search_pc);
@@ -5508,7 +5507,6 @@ gen_intermediate_code_internal(TriCoreCPU *cpu, struct TranslationBlock *tb,
 
     num_insns = 0;
     pc_start = tb->pc;
-    gen_opc_end = tcg_ctx.gen_opc_buf + OPC_MAX_SIZE;
     ctx.pc = pc_start;
     ctx.saved_pc = -1;
     ctx.tb = tb;
@@ -5524,7 +5522,7 @@ gen_intermediate_code_internal(TriCoreCPU *cpu, struct TranslationBlock *tb,
 
         num_insns++;
 
-        if (tcg_ctx.gen_opc_ptr >= gen_opc_end) {
+        if (tcg_op_buf_full()) {
             gen_save_pc(ctx.next_pc);
             tcg_gen_exit_tb(0);
             break;
@@ -5538,7 +5536,6 @@ gen_intermediate_code_internal(TriCoreCPU *cpu, struct TranslationBlock *tb,
     }
 
     gen_tb_end(tb, num_insns);
-    *tcg_ctx.gen_opc_ptr = INDEX_op_end;
     if (search_pc) {
         printf("done_generating search pc\n");
     } else {

+ 3 - 7
target-unicore32/translate.c

@@ -1877,7 +1877,6 @@ static inline void gen_intermediate_code_internal(UniCore32CPU *cpu,
     CPUUniCore32State *env = &cpu->env;
     DisasContext dc1, *dc = &dc1;
     CPUBreakpoint *bp;
-    uint16_t *gen_opc_end;
     int j, lj;
     target_ulong pc_start;
     uint32_t next_page_start;
@@ -1891,8 +1890,6 @@ static inline void gen_intermediate_code_internal(UniCore32CPU *cpu,
 
     dc->tb = tb;
 
-    gen_opc_end = tcg_ctx.gen_opc_buf + OPC_MAX_SIZE;
-
     dc->is_jmp = DISAS_NEXT;
     dc->pc = pc_start;
     dc->singlestep_enabled = cs->singlestep_enabled;
@@ -1933,7 +1930,7 @@ static inline void gen_intermediate_code_internal(UniCore32CPU *cpu,
             }
         }
         if (search_pc) {
-            j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
+            j = tcg_op_buf_count();
             if (lj < j) {
                 lj++;
                 while (lj < j) {
@@ -1965,7 +1962,7 @@ static inline void gen_intermediate_code_internal(UniCore32CPU *cpu,
          * Also stop translation when a page boundary is reached.  This
          * ensures prefetch aborts occur at the right place.  */
         num_insns++;
-    } while (!dc->is_jmp && tcg_ctx.gen_opc_ptr < gen_opc_end &&
+    } while (!dc->is_jmp && !tcg_op_buf_full() &&
              !cs->singlestep_enabled &&
              !singlestep &&
              dc->pc < next_page_start &&
@@ -2037,7 +2034,6 @@ static inline void gen_intermediate_code_internal(UniCore32CPU *cpu,
 
 done_generating:
     gen_tb_end(tb, num_insns);
-    *tcg_ctx.gen_opc_ptr = INDEX_op_end;
 
 #ifdef DEBUG_DISAS
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
@@ -2048,7 +2044,7 @@ done_generating:
     }
 #endif
     if (search_pc) {
-        j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
+        j = tcg_op_buf_count();
         lj++;
         while (lj <= j) {
             tcg_ctx.gen_opc_instr_start[lj++] = 0;

+ 3 - 5
target-xtensa/translate.c

@@ -3021,7 +3021,6 @@ void gen_intermediate_code_internal(XtensaCPU *cpu,
     DisasContext dc;
     int insn_count = 0;
     int j, lj = -1;
-    uint16_t *gen_opc_end = tcg_ctx.gen_opc_buf + OPC_MAX_SIZE;
     int max_insns = tb->cflags & CF_COUNT_MASK;
     uint32_t pc_start = tb->pc;
     uint32_t next_page_start =
@@ -3065,7 +3064,7 @@ void gen_intermediate_code_internal(XtensaCPU *cpu,
         check_breakpoint(env, &dc);
 
         if (search_pc) {
-            j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
+            j = tcg_op_buf_count();
             if (lj < j) {
                 lj++;
                 while (lj < j) {
@@ -3117,7 +3116,7 @@ void gen_intermediate_code_internal(XtensaCPU *cpu,
             insn_count < max_insns &&
             dc.pc < next_page_start &&
             dc.pc + xtensa_insn_len(env, &dc) <= next_page_start &&
-            tcg_ctx.gen_opc_ptr < gen_opc_end);
+            !tcg_op_buf_full());
 
     reset_litbase(&dc);
     reset_sar_tracker(&dc);
@@ -3133,7 +3132,6 @@ void gen_intermediate_code_internal(XtensaCPU *cpu,
         gen_jumpi(&dc, dc.pc, 0);
     }
     gen_tb_end(tb, insn_count);
-    *tcg_ctx.gen_opc_ptr = INDEX_op_end;
 
 #ifdef DEBUG_DISAS
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
@@ -3144,7 +3142,7 @@ void gen_intermediate_code_internal(XtensaCPU *cpu,
     }
 #endif
     if (search_pc) {
-        j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
+        j = tcg_op_buf_count();
         memset(tcg_ctx.gen_opc_instr_start + lj + 1, 0,
                 (j - lj) * sizeof(tcg_ctx.gen_opc_instr_start[0]));
     } else {

+ 133 - 174
tcg/optimize.c

@@ -67,6 +67,37 @@ static void reset_temp(TCGArg temp)
     temps[temp].mask = -1;
 }
 
+static TCGOp *insert_op_before(TCGContext *s, TCGOp *old_op,
+                                TCGOpcode opc, int nargs)
+{
+    int oi = s->gen_next_op_idx;
+    int pi = s->gen_next_parm_idx;
+    int prev = old_op->prev;
+    int next = old_op - s->gen_op_buf;
+    TCGOp *new_op;
+
+    tcg_debug_assert(oi < OPC_BUF_SIZE);
+    tcg_debug_assert(pi + nargs <= OPPARAM_BUF_SIZE);
+    s->gen_next_op_idx = oi + 1;
+    s->gen_next_parm_idx = pi + nargs;
+
+    new_op = &s->gen_op_buf[oi];
+    *new_op = (TCGOp){
+        .opc = opc,
+        .args = pi,
+        .prev = prev,
+        .next = next
+    };
+    if (prev >= 0) {
+        s->gen_op_buf[prev].next = oi;
+    } else {
+        s->gen_first_op_idx = oi;
+    }
+    old_op->prev = oi;
+
+    return new_op;
+}
+
 /* Reset all temporaries, given that there are NB_TEMPS of them.  */
 static void reset_all_temps(int nb_temps)
 {
@@ -162,13 +193,13 @@ static bool temps_are_copies(TCGArg arg1, TCGArg arg2)
     return false;
 }
 
-static void tcg_opt_gen_mov(TCGContext *s, int op_index, TCGArg *gen_args,
+static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg *args,
                             TCGOpcode old_op, TCGArg dst, TCGArg src)
 {
     TCGOpcode new_op = op_to_mov(old_op);
     tcg_target_ulong mask;
 
-    s->gen_opc_buf[op_index] = new_op;
+    op->opc = new_op;
 
     reset_temp(dst);
     mask = temps[src].mask;
@@ -193,17 +224,17 @@ static void tcg_opt_gen_mov(TCGContext *s, int op_index, TCGArg *gen_args,
         temps[src].next_copy = dst;
     }
 
-    gen_args[0] = dst;
-    gen_args[1] = src;
+    args[0] = dst;
+    args[1] = src;
 }
 
-static void tcg_opt_gen_movi(TCGContext *s, int op_index, TCGArg *gen_args,
+static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg *args,
                              TCGOpcode old_op, TCGArg dst, TCGArg val)
 {
     TCGOpcode new_op = op_to_movi(old_op);
     tcg_target_ulong mask;
 
-    s->gen_opc_buf[op_index] = new_op;
+    op->opc = new_op;
 
     reset_temp(dst);
     temps[dst].state = TCG_TEMP_CONST;
@@ -215,8 +246,8 @@ static void tcg_opt_gen_movi(TCGContext *s, int op_index, TCGArg *gen_args,
     }
     temps[dst].mask = mask;
 
-    gen_args[0] = dst;
-    gen_args[1] = val;
+    args[0] = dst;
+    args[1] = val;
 }
 
 static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
@@ -533,11 +564,9 @@ static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
 }
 
 /* Propagate constants and copies, fold constant expressions. */
-static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
-                                    TCGArg *args, TCGOpDef *tcg_op_defs)
+static void tcg_constant_folding(TCGContext *s)
 {
-    int nb_ops, op_index, nb_temps, nb_globals;
-    TCGArg *gen_args;
+    int oi, oi_next, nb_temps, nb_globals;
 
     /* Array VALS has an element for each temp.
        If this temp holds a constant then its value is kept in VALS' element.
@@ -548,24 +577,23 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
     nb_globals = s->nb_globals;
     reset_all_temps(nb_temps);
 
-    nb_ops = tcg_opc_ptr - s->gen_opc_buf;
-    gen_args = args;
-    for (op_index = 0; op_index < nb_ops; op_index++) {
-        TCGOpcode op = s->gen_opc_buf[op_index];
-        const TCGOpDef *def = &tcg_op_defs[op];
+    for (oi = s->gen_first_op_idx; oi >= 0; oi = oi_next) {
         tcg_target_ulong mask, partmask, affected;
-        int nb_oargs, nb_iargs, nb_args, i;
+        int nb_oargs, nb_iargs, i;
         TCGArg tmp;
 
-        if (op == INDEX_op_call) {
-            *gen_args++ = tmp = *args++;
-            nb_oargs = tmp >> 16;
-            nb_iargs = tmp & 0xffff;
-            nb_args = nb_oargs + nb_iargs + def->nb_cargs;
+        TCGOp * const op = &s->gen_op_buf[oi];
+        TCGArg * const args = &s->gen_opparam_buf[op->args];
+        TCGOpcode opc = op->opc;
+        const TCGOpDef *def = &tcg_op_defs[opc];
+
+        oi_next = op->next;
+        if (opc == INDEX_op_call) {
+            nb_oargs = op->callo;
+            nb_iargs = op->calli;
         } else {
             nb_oargs = def->nb_oargs;
             nb_iargs = def->nb_iargs;
-            nb_args = def->nb_args;
         }
 
         /* Do copy propagation */
@@ -576,7 +604,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
         }
 
         /* For commutative operations make constant second argument */
-        switch (op) {
+        switch (opc) {
         CASE_OP_32_64(add):
         CASE_OP_32_64(mul):
         CASE_OP_32_64(and):
@@ -634,7 +662,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
 
         /* Simplify expressions for "shift/rot r, 0, a => movi r, 0",
            and "sub r, 0, a => neg r, a" case.  */
-        switch (op) {
+        switch (opc) {
         CASE_OP_32_64(shl):
         CASE_OP_32_64(shr):
         CASE_OP_32_64(sar):
@@ -642,9 +670,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
         CASE_OP_32_64(rotr):
             if (temps[args[1]].state == TCG_TEMP_CONST
                 && temps[args[1]].val == 0) {
-                tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], 0);
-                args += 3;
-                gen_args += 2;
+                tcg_opt_gen_movi(s, op, args, opc, args[0], 0);
                 continue;
             }
             break;
@@ -657,7 +683,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                     /* Proceed with possible constant folding. */
                     break;
                 }
-                if (op == INDEX_op_sub_i32) {
+                if (opc == INDEX_op_sub_i32) {
                     neg_op = INDEX_op_neg_i32;
                     have_neg = TCG_TARGET_HAS_neg_i32;
                 } else {
@@ -669,12 +695,9 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 }
                 if (temps[args[1]].state == TCG_TEMP_CONST
                     && temps[args[1]].val == 0) {
-                    s->gen_opc_buf[op_index] = neg_op;
+                    op->opc = neg_op;
                     reset_temp(args[0]);
-                    gen_args[0] = args[0];
-                    gen_args[1] = args[2];
-                    args += 3;
-                    gen_args += 2;
+                    args[1] = args[2];
                     continue;
                 }
             }
@@ -728,12 +751,9 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 if (!have_not) {
                     break;
                 }
-                s->gen_opc_buf[op_index] = not_op;
+                op->opc = not_op;
                 reset_temp(args[0]);
-                gen_args[0] = args[0];
-                gen_args[1] = args[i];
-                args += 3;
-                gen_args += 2;
+                args[1] = args[i];
                 continue;
             }
         default:
@@ -741,7 +761,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
         }
 
         /* Simplify expression for "op r, a, const => mov r, a" cases */
-        switch (op) {
+        switch (opc) {
         CASE_OP_32_64(add):
         CASE_OP_32_64(sub):
         CASE_OP_32_64(shl):
@@ -769,12 +789,10 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             break;
         do_mov3:
             if (temps_are_copies(args[0], args[1])) {
-                s->gen_opc_buf[op_index] = INDEX_op_nop;
+                tcg_op_remove(s, op);
             } else {
-                tcg_opt_gen_mov(s, op_index, gen_args, op, args[0], args[1]);
-                gen_args += 2;
+                tcg_opt_gen_mov(s, op, args, opc, args[0], args[1]);
             }
-            args += 3;
             continue;
         default:
             break;
@@ -784,7 +802,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
            output argument is supported. */
         mask = -1;
         affected = -1;
-        switch (op) {
+        switch (opc) {
         CASE_OP_32_64(ext8s):
             if ((temps[args[1]].mask & 0x80) != 0) {
                 break;
@@ -923,38 +941,31 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
 
         if (partmask == 0) {
             assert(nb_oargs == 1);
-            tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], 0);
-            args += nb_args;
-            gen_args += 2;
+            tcg_opt_gen_movi(s, op, args, opc, args[0], 0);
             continue;
         }
         if (affected == 0) {
             assert(nb_oargs == 1);
             if (temps_are_copies(args[0], args[1])) {
-                s->gen_opc_buf[op_index] = INDEX_op_nop;
+                tcg_op_remove(s, op);
             } else if (temps[args[1]].state != TCG_TEMP_CONST) {
-                tcg_opt_gen_mov(s, op_index, gen_args, op, args[0], args[1]);
-                gen_args += 2;
+                tcg_opt_gen_mov(s, op, args, opc, args[0], args[1]);
             } else {
-                tcg_opt_gen_movi(s, op_index, gen_args, op,
+                tcg_opt_gen_movi(s, op, args, opc,
                                  args[0], temps[args[1]].val);
-                gen_args += 2;
             }
-            args += nb_args;
             continue;
         }
 
         /* Simplify expression for "op r, a, 0 => movi r, 0" cases */
-        switch (op) {
+        switch (opc) {
         CASE_OP_32_64(and):
         CASE_OP_32_64(mul):
         CASE_OP_32_64(muluh):
         CASE_OP_32_64(mulsh):
             if ((temps[args[2]].state == TCG_TEMP_CONST
                 && temps[args[2]].val == 0)) {
-                tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], 0);
-                args += 3;
-                gen_args += 2;
+                tcg_opt_gen_movi(s, op, args, opc, args[0], 0);
                 continue;
             }
             break;
@@ -963,18 +974,15 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
         }
 
         /* Simplify expression for "op r, a, a => mov r, a" cases */
-        switch (op) {
+        switch (opc) {
         CASE_OP_32_64(or):
         CASE_OP_32_64(and):
             if (temps_are_copies(args[1], args[2])) {
                 if (temps_are_copies(args[0], args[1])) {
-                    s->gen_opc_buf[op_index] = INDEX_op_nop;
+                    tcg_op_remove(s, op);
                 } else {
-                    tcg_opt_gen_mov(s, op_index, gen_args, op,
-                                    args[0], args[1]);
-                    gen_args += 2;
+                    tcg_opt_gen_mov(s, op, args, opc, args[0], args[1]);
                 }
-                args += 3;
                 continue;
             }
             break;
@@ -983,14 +991,12 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
         }
 
         /* Simplify expression for "op r, a, a => movi r, 0" cases */
-        switch (op) {
+        switch (opc) {
         CASE_OP_32_64(andc):
         CASE_OP_32_64(sub):
         CASE_OP_32_64(xor):
             if (temps_are_copies(args[1], args[2])) {
-                tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], 0);
-                gen_args += 2;
-                args += 3;
+                tcg_opt_gen_movi(s, op, args, opc, args[0], 0);
                 continue;
             }
             break;
@@ -1001,17 +1007,14 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
         /* Propagate constants through copy operations and do constant
            folding.  Constants will be substituted to arguments by register
            allocator where needed and possible.  Also detect copies. */
-        switch (op) {
+        switch (opc) {
         CASE_OP_32_64(mov):
             if (temps_are_copies(args[0], args[1])) {
-                args += 2;
-                s->gen_opc_buf[op_index] = INDEX_op_nop;
+                tcg_op_remove(s, op);
                 break;
             }
             if (temps[args[1]].state != TCG_TEMP_CONST) {
-                tcg_opt_gen_mov(s, op_index, gen_args, op, args[0], args[1]);
-                gen_args += 2;
-                args += 2;
+                tcg_opt_gen_mov(s, op, args, opc, args[0], args[1]);
                 break;
             }
             /* Source argument is constant.  Rewrite the operation and
@@ -1019,9 +1022,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             args[1] = temps[args[1]].val;
             /* fallthrough */
         CASE_OP_32_64(movi):
-            tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], args[1]);
-            gen_args += 2;
-            args += 2;
+            tcg_opt_gen_movi(s, op, args, opc, args[0], args[1]);
             break;
 
         CASE_OP_32_64(not):
@@ -1033,20 +1034,16 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
         case INDEX_op_ext32s_i64:
         case INDEX_op_ext32u_i64:
             if (temps[args[1]].state == TCG_TEMP_CONST) {
-                tmp = do_constant_folding(op, temps[args[1]].val, 0);
-                tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], tmp);
-                gen_args += 2;
-                args += 2;
+                tmp = do_constant_folding(opc, temps[args[1]].val, 0);
+                tcg_opt_gen_movi(s, op, args, opc, args[0], tmp);
                 break;
             }
             goto do_default;
 
         case INDEX_op_trunc_shr_i32:
             if (temps[args[1]].state == TCG_TEMP_CONST) {
-                tmp = do_constant_folding(op, temps[args[1]].val, args[2]);
-                tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], tmp);
-                gen_args += 2;
-                args += 3;
+                tmp = do_constant_folding(opc, temps[args[1]].val, args[2]);
+                tcg_opt_gen_movi(s, op, args, opc, args[0], tmp);
                 break;
             }
             goto do_default;
@@ -1075,11 +1072,9 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
         CASE_OP_32_64(remu):
             if (temps[args[1]].state == TCG_TEMP_CONST
                 && temps[args[2]].state == TCG_TEMP_CONST) {
-                tmp = do_constant_folding(op, temps[args[1]].val,
+                tmp = do_constant_folding(opc, temps[args[1]].val,
                                           temps[args[2]].val);
-                tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], tmp);
-                gen_args += 2;
-                args += 3;
+                tcg_opt_gen_movi(s, op, args, opc, args[0], tmp);
                 break;
             }
             goto do_default;
@@ -1089,54 +1084,44 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 && temps[args[2]].state == TCG_TEMP_CONST) {
                 tmp = deposit64(temps[args[1]].val, args[3], args[4],
                                 temps[args[2]].val);
-                tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], tmp);
-                gen_args += 2;
-                args += 5;
+                tcg_opt_gen_movi(s, op, args, opc, args[0], tmp);
                 break;
             }
             goto do_default;
 
         CASE_OP_32_64(setcond):
-            tmp = do_constant_folding_cond(op, args[1], args[2], args[3]);
+            tmp = do_constant_folding_cond(opc, args[1], args[2], args[3]);
             if (tmp != 2) {
-                tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], tmp);
-                gen_args += 2;
-                args += 4;
+                tcg_opt_gen_movi(s, op, args, opc, args[0], tmp);
                 break;
             }
             goto do_default;
 
         CASE_OP_32_64(brcond):
-            tmp = do_constant_folding_cond(op, args[0], args[1], args[2]);
+            tmp = do_constant_folding_cond(opc, args[0], args[1], args[2]);
             if (tmp != 2) {
                 if (tmp) {
                     reset_all_temps(nb_temps);
-                    s->gen_opc_buf[op_index] = INDEX_op_br;
-                    gen_args[0] = args[3];
-                    gen_args += 1;
+                    op->opc = INDEX_op_br;
+                    args[0] = args[3];
                 } else {
-                    s->gen_opc_buf[op_index] = INDEX_op_nop;
+                    tcg_op_remove(s, op);
                 }
-                args += 4;
                 break;
             }
             goto do_default;
 
         CASE_OP_32_64(movcond):
-            tmp = do_constant_folding_cond(op, args[1], args[2], args[5]);
+            tmp = do_constant_folding_cond(opc, args[1], args[2], args[5]);
             if (tmp != 2) {
                 if (temps_are_copies(args[0], args[4-tmp])) {
-                    s->gen_opc_buf[op_index] = INDEX_op_nop;
+                    tcg_op_remove(s, op);
                 } else if (temps[args[4-tmp]].state == TCG_TEMP_CONST) {
-                    tcg_opt_gen_movi(s, op_index, gen_args, op,
+                    tcg_opt_gen_movi(s, op, args, opc,
                                      args[0], temps[args[4-tmp]].val);
-                    gen_args += 2;
                 } else {
-                    tcg_opt_gen_mov(s, op_index, gen_args, op,
-                                    args[0], args[4-tmp]);
-                    gen_args += 2;
+                    tcg_opt_gen_mov(s, op, args, opc, args[0], args[4-tmp]);
                 }
-                args += 6;
                 break;
             }
             goto do_default;
@@ -1154,24 +1139,22 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 uint64_t a = ((uint64_t)ah << 32) | al;
                 uint64_t b = ((uint64_t)bh << 32) | bl;
                 TCGArg rl, rh;
+                TCGOp *op2 = insert_op_before(s, op, INDEX_op_movi_i32, 2);
+                TCGArg *args2 = &s->gen_opparam_buf[op2->args];
 
-                if (op == INDEX_op_add2_i32) {
+                if (opc == INDEX_op_add2_i32) {
                     a += b;
                 } else {
                     a -= b;
                 }
 
-                /* We emit the extra nop when we emit the add2/sub2.  */
-                assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
-
                 rl = args[0];
                 rh = args[1];
-                tcg_opt_gen_movi(s, op_index, &gen_args[0],
-                                 op, rl, (uint32_t)a);
-                tcg_opt_gen_movi(s, ++op_index, &gen_args[2],
-                                 op, rh, (uint32_t)(a >> 32));
-                gen_args += 4;
-                args += 6;
+                tcg_opt_gen_movi(s, op, args, opc, rl, (uint32_t)a);
+                tcg_opt_gen_movi(s, op2, args2, opc, rh, (uint32_t)(a >> 32));
+
+                /* We've done all we need to do with the movi.  Skip it.  */
+                oi_next = op2->next;
                 break;
             }
             goto do_default;
@@ -1183,18 +1166,16 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 uint32_t b = temps[args[3]].val;
                 uint64_t r = (uint64_t)a * b;
                 TCGArg rl, rh;
-
-                /* We emit the extra nop when we emit the mulu2.  */
-                assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
+                TCGOp *op2 = insert_op_before(s, op, INDEX_op_movi_i32, 2);
+                TCGArg *args2 = &s->gen_opparam_buf[op2->args];
 
                 rl = args[0];
                 rh = args[1];
-                tcg_opt_gen_movi(s, op_index, &gen_args[0],
-                                 op, rl, (uint32_t)r);
-                tcg_opt_gen_movi(s, ++op_index, &gen_args[2],
-                                 op, rh, (uint32_t)(r >> 32));
-                gen_args += 4;
-                args += 4;
+                tcg_opt_gen_movi(s, op, args, opc, rl, (uint32_t)r);
+                tcg_opt_gen_movi(s, op2, args2, opc, rh, (uint32_t)(r >> 32));
+
+                /* We've done all we need to do with the movi.  Skip it.  */
+                oi_next = op2->next;
                 break;
             }
             goto do_default;
@@ -1205,12 +1186,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 if (tmp) {
             do_brcond_true:
                     reset_all_temps(nb_temps);
-                    s->gen_opc_buf[op_index] = INDEX_op_br;
-                    gen_args[0] = args[5];
-                    gen_args += 1;
+                    op->opc = INDEX_op_br;
+                    args[0] = args[5];
                 } else {
             do_brcond_false:
-                    s->gen_opc_buf[op_index] = INDEX_op_nop;
+                    tcg_op_remove(s, op);
                 }
             } else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE)
                        && temps[args[2]].state == TCG_TEMP_CONST
@@ -1221,12 +1201,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                    vs the high word of the input.  */
             do_brcond_high:
                 reset_all_temps(nb_temps);
-                s->gen_opc_buf[op_index] = INDEX_op_brcond_i32;
-                gen_args[0] = args[1];
-                gen_args[1] = args[3];
-                gen_args[2] = args[4];
-                gen_args[3] = args[5];
-                gen_args += 4;
+                op->opc = INDEX_op_brcond_i32;
+                args[0] = args[1];
+                args[1] = args[3];
+                args[2] = args[4];
+                args[3] = args[5];
             } else if (args[4] == TCG_COND_EQ) {
                 /* Simplify EQ comparisons where one of the pairs
                    can be simplified.  */
@@ -1246,12 +1225,10 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 }
             do_brcond_low:
                 reset_all_temps(nb_temps);
-                s->gen_opc_buf[op_index] = INDEX_op_brcond_i32;
-                gen_args[0] = args[0];
-                gen_args[1] = args[2];
-                gen_args[2] = args[4];
-                gen_args[3] = args[5];
-                gen_args += 4;
+                op->opc = INDEX_op_brcond_i32;
+                args[1] = args[2];
+                args[2] = args[4];
+                args[3] = args[5];
             } else if (args[4] == TCG_COND_NE) {
                 /* Simplify NE comparisons where one of the pairs
                    can be simplified.  */
@@ -1273,15 +1250,13 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             } else {
                 goto do_default;
             }
-            args += 6;
             break;
 
         case INDEX_op_setcond2_i32:
             tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]);
             if (tmp != 2) {
             do_setcond_const:
-                tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], tmp);
-                gen_args += 2;
+                tcg_opt_gen_movi(s, op, args, opc, args[0], tmp);
             } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE)
                        && temps[args[3]].state == TCG_TEMP_CONST
                        && temps[args[4]].state == TCG_TEMP_CONST
@@ -1290,14 +1265,12 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 /* Simplify LT/GE comparisons vs zero to a single compare
                    vs the high word of the input.  */
             do_setcond_high:
-                s->gen_opc_buf[op_index] = INDEX_op_setcond_i32;
                 reset_temp(args[0]);
                 temps[args[0]].mask = 1;
-                gen_args[0] = args[0];
-                gen_args[1] = args[2];
-                gen_args[2] = args[4];
-                gen_args[3] = args[5];
-                gen_args += 4;
+                op->opc = INDEX_op_setcond_i32;
+                args[1] = args[2];
+                args[2] = args[4];
+                args[3] = args[5];
             } else if (args[5] == TCG_COND_EQ) {
                 /* Simplify EQ comparisons where one of the pairs
                    can be simplified.  */
@@ -1318,12 +1291,9 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             do_setcond_low:
                 reset_temp(args[0]);
                 temps[args[0]].mask = 1;
-                s->gen_opc_buf[op_index] = INDEX_op_setcond_i32;
-                gen_args[0] = args[0];
-                gen_args[1] = args[1];
-                gen_args[2] = args[3];
-                gen_args[3] = args[5];
-                gen_args += 4;
+                op->opc = INDEX_op_setcond_i32;
+                args[2] = args[3];
+                args[3] = args[5];
             } else if (args[5] == TCG_COND_NE) {
                 /* Simplify NE comparisons where one of the pairs
                    can be simplified.  */
@@ -1345,7 +1315,6 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             } else {
                 goto do_default;
             }
-            args += 6;
             break;
 
         case INDEX_op_call:
@@ -1377,22 +1346,12 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                     }
                 }
             }
-            for (i = 0; i < nb_args; i++) {
-                gen_args[i] = args[i];
-            }
-            args += nb_args;
-            gen_args += nb_args;
             break;
         }
     }
-
-    return gen_args;
 }
 
-TCGArg *tcg_optimize(TCGContext *s, uint16_t *tcg_opc_ptr,
-        TCGArg *args, TCGOpDef *tcg_op_defs)
+void tcg_optimize(TCGContext *s)
 {
-    TCGArg *res;
-    res = tcg_constant_folding(s, tcg_opc_ptr, args, tcg_op_defs);
-    return res;
+    tcg_constant_folding(s);
 }

+ 1934 - 0
tcg/tcg-op.c

@@ -0,0 +1,1934 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "tcg.h"
+#include "tcg-op.h"
+
+/* Reduce the number of ifdefs below.  This assumes that all uses of
+   TCGV_HIGH and TCGV_LOW are properly protected by a conditional that
+   the compiler can eliminate.  */
+#if TCG_TARGET_REG_BITS == 64
+extern TCGv_i32 TCGV_LOW_link_error(TCGv_i64);
+extern TCGv_i32 TCGV_HIGH_link_error(TCGv_i64);
+#define TCGV_LOW  TCGV_LOW_link_error
+#define TCGV_HIGH TCGV_HIGH_link_error
+#endif
+
+/* Note that this is optimized for sequential allocation during translate.
+   Up to and including filling in the forward link immediately.  We'll do
+   proper termination of the end of the list after we finish translation.  */
+
+static void tcg_emit_op(TCGContext *ctx, TCGOpcode opc, int args)
+{
+    int oi = ctx->gen_next_op_idx;
+    int ni = oi + 1;
+    int pi = oi - 1;
+
+    tcg_debug_assert(oi < OPC_BUF_SIZE);
+    ctx->gen_last_op_idx = oi;
+    ctx->gen_next_op_idx = ni;
+
+    ctx->gen_op_buf[oi] = (TCGOp){
+        .opc = opc,
+        .args = args,
+        .prev = pi,
+        .next = ni
+    };
+}
+
+void tcg_gen_op1(TCGContext *ctx, TCGOpcode opc, TCGArg a1)
+{
+    int pi = ctx->gen_next_parm_idx;
+
+    tcg_debug_assert(pi + 1 <= OPPARAM_BUF_SIZE);
+    ctx->gen_next_parm_idx = pi + 1;
+    ctx->gen_opparam_buf[pi] = a1;
+
+    tcg_emit_op(ctx, opc, pi);
+}
+
+void tcg_gen_op2(TCGContext *ctx, TCGOpcode opc, TCGArg a1, TCGArg a2)
+{
+    int pi = ctx->gen_next_parm_idx;
+
+    tcg_debug_assert(pi + 2 <= OPPARAM_BUF_SIZE);
+    ctx->gen_next_parm_idx = pi + 2;
+    ctx->gen_opparam_buf[pi + 0] = a1;
+    ctx->gen_opparam_buf[pi + 1] = a2;
+
+    tcg_emit_op(ctx, opc, pi);
+}
+
+void tcg_gen_op3(TCGContext *ctx, TCGOpcode opc, TCGArg a1,
+                 TCGArg a2, TCGArg a3)
+{
+    int pi = ctx->gen_next_parm_idx;
+
+    tcg_debug_assert(pi + 3 <= OPPARAM_BUF_SIZE);
+    ctx->gen_next_parm_idx = pi + 3;
+    ctx->gen_opparam_buf[pi + 0] = a1;
+    ctx->gen_opparam_buf[pi + 1] = a2;
+    ctx->gen_opparam_buf[pi + 2] = a3;
+
+    tcg_emit_op(ctx, opc, pi);
+}
+
+void tcg_gen_op4(TCGContext *ctx, TCGOpcode opc, TCGArg a1,
+                 TCGArg a2, TCGArg a3, TCGArg a4)
+{
+    int pi = ctx->gen_next_parm_idx;
+
+    tcg_debug_assert(pi + 4 <= OPPARAM_BUF_SIZE);
+    ctx->gen_next_parm_idx = pi + 4;
+    ctx->gen_opparam_buf[pi + 0] = a1;
+    ctx->gen_opparam_buf[pi + 1] = a2;
+    ctx->gen_opparam_buf[pi + 2] = a3;
+    ctx->gen_opparam_buf[pi + 3] = a4;
+
+    tcg_emit_op(ctx, opc, pi);
+}
+
+void tcg_gen_op5(TCGContext *ctx, TCGOpcode opc, TCGArg a1,
+                 TCGArg a2, TCGArg a3, TCGArg a4, TCGArg a5)
+{
+    int pi = ctx->gen_next_parm_idx;
+
+    tcg_debug_assert(pi + 5 <= OPPARAM_BUF_SIZE);
+    ctx->gen_next_parm_idx = pi + 5;
+    ctx->gen_opparam_buf[pi + 0] = a1;
+    ctx->gen_opparam_buf[pi + 1] = a2;
+    ctx->gen_opparam_buf[pi + 2] = a3;
+    ctx->gen_opparam_buf[pi + 3] = a4;
+    ctx->gen_opparam_buf[pi + 4] = a5;
+
+    tcg_emit_op(ctx, opc, pi);
+}
+
+void tcg_gen_op6(TCGContext *ctx, TCGOpcode opc, TCGArg a1, TCGArg a2,
+                 TCGArg a3, TCGArg a4, TCGArg a5, TCGArg a6)
+{
+    int pi = ctx->gen_next_parm_idx;
+
+    tcg_debug_assert(pi + 6 <= OPPARAM_BUF_SIZE);
+    ctx->gen_next_parm_idx = pi + 6;
+    ctx->gen_opparam_buf[pi + 0] = a1;
+    ctx->gen_opparam_buf[pi + 1] = a2;
+    ctx->gen_opparam_buf[pi + 2] = a3;
+    ctx->gen_opparam_buf[pi + 3] = a4;
+    ctx->gen_opparam_buf[pi + 4] = a5;
+    ctx->gen_opparam_buf[pi + 5] = a6;
+
+    tcg_emit_op(ctx, opc, pi);
+}
+
+/* 32 bit ops */
+
+void tcg_gen_addi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
+{
+    /* some cases can be optimized here */
+    if (arg2 == 0) {
+        tcg_gen_mov_i32(ret, arg1);
+    } else {
+        TCGv_i32 t0 = tcg_const_i32(arg2);
+        tcg_gen_add_i32(ret, arg1, t0);
+        tcg_temp_free_i32(t0);
+    }
+}
+
+void tcg_gen_subfi_i32(TCGv_i32 ret, int32_t arg1, TCGv_i32 arg2)
+{
+    if (arg1 == 0 && TCG_TARGET_HAS_neg_i32) {
+        /* Don't recurse with tcg_gen_neg_i32.  */
+        tcg_gen_op2_i32(INDEX_op_neg_i32, ret, arg2);
+    } else {
+        TCGv_i32 t0 = tcg_const_i32(arg1);
+        tcg_gen_sub_i32(ret, t0, arg2);
+        tcg_temp_free_i32(t0);
+    }
+}
+
+void tcg_gen_subi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
+{
+    /* some cases can be optimized here */
+    if (arg2 == 0) {
+        tcg_gen_mov_i32(ret, arg1);
+    } else {
+        TCGv_i32 t0 = tcg_const_i32(arg2);
+        tcg_gen_sub_i32(ret, arg1, t0);
+        tcg_temp_free_i32(t0);
+    }
+}
+
+void tcg_gen_andi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2)
+{
+    TCGv_i32 t0;
+    /* Some cases can be optimized here.  */
+    switch (arg2) {
+    case 0:
+        tcg_gen_movi_i32(ret, 0);
+        return;
+    case 0xffffffffu:
+        tcg_gen_mov_i32(ret, arg1);
+        return;
+    case 0xffu:
+        /* Don't recurse with tcg_gen_ext8u_i32.  */
+        if (TCG_TARGET_HAS_ext8u_i32) {
+            tcg_gen_op2_i32(INDEX_op_ext8u_i32, ret, arg1);
+            return;
+        }
+        break;
+    case 0xffffu:
+        if (TCG_TARGET_HAS_ext16u_i32) {
+            tcg_gen_op2_i32(INDEX_op_ext16u_i32, ret, arg1);
+            return;
+        }
+        break;
+    }
+    t0 = tcg_const_i32(arg2);
+    tcg_gen_and_i32(ret, arg1, t0);
+    tcg_temp_free_i32(t0);
+}
+
+void tcg_gen_ori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
+{
+    /* Some cases can be optimized here.  */
+    if (arg2 == -1) {
+        tcg_gen_movi_i32(ret, -1);
+    } else if (arg2 == 0) {
+        tcg_gen_mov_i32(ret, arg1);
+    } else {
+        TCGv_i32 t0 = tcg_const_i32(arg2);
+        tcg_gen_or_i32(ret, arg1, t0);
+        tcg_temp_free_i32(t0);
+    }
+}
+
+void tcg_gen_xori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
+{
+    /* Some cases can be optimized here.  */
+    if (arg2 == 0) {
+        tcg_gen_mov_i32(ret, arg1);
+    } else if (arg2 == -1 && TCG_TARGET_HAS_not_i32) {
+        /* Don't recurse with tcg_gen_not_i32.  */
+        tcg_gen_op2_i32(INDEX_op_not_i32, ret, arg1);
+    } else {
+        TCGv_i32 t0 = tcg_const_i32(arg2);
+        tcg_gen_xor_i32(ret, arg1, t0);
+        tcg_temp_free_i32(t0);
+    }
+}
+
+void tcg_gen_shli_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2)
+{
+    tcg_debug_assert(arg2 < 32);
+    if (arg2 == 0) {
+        tcg_gen_mov_i32(ret, arg1);
+    } else {
+        TCGv_i32 t0 = tcg_const_i32(arg2);
+        tcg_gen_shl_i32(ret, arg1, t0);
+        tcg_temp_free_i32(t0);
+    }
+}
+
+void tcg_gen_shri_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2)
+{
+    tcg_debug_assert(arg2 < 32);
+    if (arg2 == 0) {
+        tcg_gen_mov_i32(ret, arg1);
+    } else {
+        TCGv_i32 t0 = tcg_const_i32(arg2);
+        tcg_gen_shr_i32(ret, arg1, t0);
+        tcg_temp_free_i32(t0);
+    }
+}
+
+void tcg_gen_sari_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2)
+{
+    tcg_debug_assert(arg2 < 32);
+    if (arg2 == 0) {
+        tcg_gen_mov_i32(ret, arg1);
+    } else {
+        TCGv_i32 t0 = tcg_const_i32(arg2);
+        tcg_gen_sar_i32(ret, arg1, t0);
+        tcg_temp_free_i32(t0);
+    }
+}
+
+void tcg_gen_brcond_i32(TCGCond cond, TCGv_i32 arg1, TCGv_i32 arg2, int label)
+{
+    if (cond == TCG_COND_ALWAYS) {
+        tcg_gen_br(label);
+    } else if (cond != TCG_COND_NEVER) {
+        tcg_gen_op4ii_i32(INDEX_op_brcond_i32, arg1, arg2, cond, label);
+    }
+}
+
+void tcg_gen_brcondi_i32(TCGCond cond, TCGv_i32 arg1, int32_t arg2, int label)
+{
+    TCGv_i32 t0 = tcg_const_i32(arg2);
+    tcg_gen_brcond_i32(cond, arg1, t0, label);
+    tcg_temp_free_i32(t0);
+}
+
+void tcg_gen_setcond_i32(TCGCond cond, TCGv_i32 ret,
+                         TCGv_i32 arg1, TCGv_i32 arg2)
+{
+    if (cond == TCG_COND_ALWAYS) {
+        tcg_gen_movi_i32(ret, 1);
+    } else if (cond == TCG_COND_NEVER) {
+        tcg_gen_movi_i32(ret, 0);
+    } else {
+        tcg_gen_op4i_i32(INDEX_op_setcond_i32, ret, arg1, arg2, cond);
+    }
+}
+
+void tcg_gen_setcondi_i32(TCGCond cond, TCGv_i32 ret,
+                          TCGv_i32 arg1, int32_t arg2)
+{
+    TCGv_i32 t0 = tcg_const_i32(arg2);
+    tcg_gen_setcond_i32(cond, ret, arg1, t0);
+    tcg_temp_free_i32(t0);
+}
+
+void tcg_gen_muli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
+{
+    TCGv_i32 t0 = tcg_const_i32(arg2);
+    tcg_gen_mul_i32(ret, arg1, t0);
+    tcg_temp_free_i32(t0);
+}
+
+void tcg_gen_div_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+    if (TCG_TARGET_HAS_div_i32) {
+        tcg_gen_op3_i32(INDEX_op_div_i32, ret, arg1, arg2);
+    } else if (TCG_TARGET_HAS_div2_i32) {
+        TCGv_i32 t0 = tcg_temp_new_i32();
+        tcg_gen_sari_i32(t0, arg1, 31);
+        tcg_gen_op5_i32(INDEX_op_div2_i32, ret, t0, arg1, t0, arg2);
+        tcg_temp_free_i32(t0);
+    } else {
+        gen_helper_div_i32(ret, arg1, arg2);
+    }
+}
+
+void tcg_gen_rem_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+    if (TCG_TARGET_HAS_rem_i32) {
+        tcg_gen_op3_i32(INDEX_op_rem_i32, ret, arg1, arg2);
+    } else if (TCG_TARGET_HAS_div_i32) {
+        TCGv_i32 t0 = tcg_temp_new_i32();
+        tcg_gen_op3_i32(INDEX_op_div_i32, t0, arg1, arg2);
+        tcg_gen_mul_i32(t0, t0, arg2);
+        tcg_gen_sub_i32(ret, arg1, t0);
+        tcg_temp_free_i32(t0);
+    } else if (TCG_TARGET_HAS_div2_i32) {
+        TCGv_i32 t0 = tcg_temp_new_i32();
+        tcg_gen_sari_i32(t0, arg1, 31);
+        tcg_gen_op5_i32(INDEX_op_div2_i32, t0, ret, arg1, t0, arg2);
+        tcg_temp_free_i32(t0);
+    } else {
+        gen_helper_rem_i32(ret, arg1, arg2);
+    }
+}
+
+void tcg_gen_divu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+    if (TCG_TARGET_HAS_div_i32) {
+        tcg_gen_op3_i32(INDEX_op_divu_i32, ret, arg1, arg2);
+    } else if (TCG_TARGET_HAS_div2_i32) {
+        TCGv_i32 t0 = tcg_temp_new_i32();
+        tcg_gen_movi_i32(t0, 0);
+        tcg_gen_op5_i32(INDEX_op_divu2_i32, ret, t0, arg1, t0, arg2);
+        tcg_temp_free_i32(t0);
+    } else {
+        gen_helper_divu_i32(ret, arg1, arg2);
+    }
+}
+
+void tcg_gen_remu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+    if (TCG_TARGET_HAS_rem_i32) {
+        tcg_gen_op3_i32(INDEX_op_remu_i32, ret, arg1, arg2);
+    } else if (TCG_TARGET_HAS_div_i32) {
+        TCGv_i32 t0 = tcg_temp_new_i32();
+        tcg_gen_op3_i32(INDEX_op_divu_i32, t0, arg1, arg2);
+        tcg_gen_mul_i32(t0, t0, arg2);
+        tcg_gen_sub_i32(ret, arg1, t0);
+        tcg_temp_free_i32(t0);
+    } else if (TCG_TARGET_HAS_div2_i32) {
+        TCGv_i32 t0 = tcg_temp_new_i32();
+        tcg_gen_movi_i32(t0, 0);
+        tcg_gen_op5_i32(INDEX_op_divu2_i32, t0, ret, arg1, t0, arg2);
+        tcg_temp_free_i32(t0);
+    } else {
+        gen_helper_remu_i32(ret, arg1, arg2);
+    }
+}
+
+void tcg_gen_andc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+    if (TCG_TARGET_HAS_andc_i32) {
+        tcg_gen_op3_i32(INDEX_op_andc_i32, ret, arg1, arg2);
+    } else {
+        TCGv_i32 t0 = tcg_temp_new_i32();
+        tcg_gen_not_i32(t0, arg2);
+        tcg_gen_and_i32(ret, arg1, t0);
+        tcg_temp_free_i32(t0);
+    }
+}
+
+void tcg_gen_eqv_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+    if (TCG_TARGET_HAS_eqv_i32) {
+        tcg_gen_op3_i32(INDEX_op_eqv_i32, ret, arg1, arg2);
+    } else {
+        tcg_gen_xor_i32(ret, arg1, arg2);
+        tcg_gen_not_i32(ret, ret);
+    }
+}
+
+void tcg_gen_nand_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+    if (TCG_TARGET_HAS_nand_i32) {
+        tcg_gen_op3_i32(INDEX_op_nand_i32, ret, arg1, arg2);
+    } else {
+        tcg_gen_and_i32(ret, arg1, arg2);
+        tcg_gen_not_i32(ret, ret);
+    }
+}
+
+void tcg_gen_nor_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+    if (TCG_TARGET_HAS_nor_i32) {
+        tcg_gen_op3_i32(INDEX_op_nor_i32, ret, arg1, arg2);
+    } else {
+        tcg_gen_or_i32(ret, arg1, arg2);
+        tcg_gen_not_i32(ret, ret);
+    }
+}
+
+void tcg_gen_orc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+    if (TCG_TARGET_HAS_orc_i32) {
+        tcg_gen_op3_i32(INDEX_op_orc_i32, ret, arg1, arg2);
+    } else {
+        TCGv_i32 t0 = tcg_temp_new_i32();
+        tcg_gen_not_i32(t0, arg2);
+        tcg_gen_or_i32(ret, arg1, t0);
+        tcg_temp_free_i32(t0);
+    }
+}
+
+void tcg_gen_rotl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+    if (TCG_TARGET_HAS_rot_i32) {
+        tcg_gen_op3_i32(INDEX_op_rotl_i32, ret, arg1, arg2);
+    } else {
+        TCGv_i32 t0, t1;
+
+        t0 = tcg_temp_new_i32();
+        t1 = tcg_temp_new_i32();
+        tcg_gen_shl_i32(t0, arg1, arg2);
+        tcg_gen_subfi_i32(t1, 32, arg2);
+        tcg_gen_shr_i32(t1, arg1, t1);
+        tcg_gen_or_i32(ret, t0, t1);
+        tcg_temp_free_i32(t0);
+        tcg_temp_free_i32(t1);
+    }
+}
+
+void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2)
+{
+    tcg_debug_assert(arg2 < 32);
+    /* some cases can be optimized here */
+    if (arg2 == 0) {
+        tcg_gen_mov_i32(ret, arg1);
+    } else if (TCG_TARGET_HAS_rot_i32) {
+        TCGv_i32 t0 = tcg_const_i32(arg2);
+        tcg_gen_rotl_i32(ret, arg1, t0);
+        tcg_temp_free_i32(t0);
+    } else {
+        TCGv_i32 t0, t1;
+        t0 = tcg_temp_new_i32();
+        t1 = tcg_temp_new_i32();
+        tcg_gen_shli_i32(t0, arg1, arg2);
+        tcg_gen_shri_i32(t1, arg1, 32 - arg2);
+        tcg_gen_or_i32(ret, t0, t1);
+        tcg_temp_free_i32(t0);
+        tcg_temp_free_i32(t1);
+    }
+}
+
+void tcg_gen_rotr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+    if (TCG_TARGET_HAS_rot_i32) {
+        tcg_gen_op3_i32(INDEX_op_rotr_i32, ret, arg1, arg2);
+    } else {
+        TCGv_i32 t0, t1;
+
+        t0 = tcg_temp_new_i32();
+        t1 = tcg_temp_new_i32();
+        tcg_gen_shr_i32(t0, arg1, arg2);
+        tcg_gen_subfi_i32(t1, 32, arg2);
+        tcg_gen_shl_i32(t1, arg1, t1);
+        tcg_gen_or_i32(ret, t0, t1);
+        tcg_temp_free_i32(t0);
+        tcg_temp_free_i32(t1);
+    }
+}
+
+void tcg_gen_rotri_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2)
+{
+    tcg_debug_assert(arg2 < 32);
+    /* some cases can be optimized here */
+    if (arg2 == 0) {
+        tcg_gen_mov_i32(ret, arg1);
+    } else {
+        tcg_gen_rotli_i32(ret, arg1, 32 - arg2);
+    }
+}
+
+void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2,
+                         unsigned int ofs, unsigned int len)
+{
+    uint32_t mask;
+    TCGv_i32 t1;
+
+    tcg_debug_assert(ofs < 32);
+    tcg_debug_assert(len <= 32);
+    tcg_debug_assert(ofs + len <= 32);
+
+    if (ofs == 0 && len == 32) {
+        tcg_gen_mov_i32(ret, arg2);
+        return;
+    }
+    if (TCG_TARGET_HAS_deposit_i32 && TCG_TARGET_deposit_i32_valid(ofs, len)) {
+        tcg_gen_op5ii_i32(INDEX_op_deposit_i32, ret, arg1, arg2, ofs, len);
+        return;
+    }
+
+    mask = (1u << len) - 1;
+    t1 = tcg_temp_new_i32();
+
+    if (ofs + len < 32) {
+        tcg_gen_andi_i32(t1, arg2, mask);
+        tcg_gen_shli_i32(t1, t1, ofs);
+    } else {
+        tcg_gen_shli_i32(t1, arg2, ofs);
+    }
+    tcg_gen_andi_i32(ret, arg1, ~(mask << ofs));
+    tcg_gen_or_i32(ret, ret, t1);
+
+    tcg_temp_free_i32(t1);
+}
+
+void tcg_gen_movcond_i32(TCGCond cond, TCGv_i32 ret, TCGv_i32 c1,
+                         TCGv_i32 c2, TCGv_i32 v1, TCGv_i32 v2)
+{
+    if (TCG_TARGET_HAS_movcond_i32) {
+        tcg_gen_op6i_i32(INDEX_op_movcond_i32, ret, c1, c2, v1, v2, cond);
+    } else {
+        TCGv_i32 t0 = tcg_temp_new_i32();
+        TCGv_i32 t1 = tcg_temp_new_i32();
+        tcg_gen_setcond_i32(cond, t0, c1, c2);
+        tcg_gen_neg_i32(t0, t0);
+        tcg_gen_and_i32(t1, v1, t0);
+        tcg_gen_andc_i32(ret, v2, t0);
+        tcg_gen_or_i32(ret, ret, t1);
+        tcg_temp_free_i32(t0);
+        tcg_temp_free_i32(t1);
+    }
+}
+
+void tcg_gen_add2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al,
+                      TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh)
+{
+    if (TCG_TARGET_HAS_add2_i32) {
+        tcg_gen_op6_i32(INDEX_op_add2_i32, rl, rh, al, ah, bl, bh);
+    } else {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        TCGv_i64 t1 = tcg_temp_new_i64();
+        tcg_gen_concat_i32_i64(t0, al, ah);
+        tcg_gen_concat_i32_i64(t1, bl, bh);
+        tcg_gen_add_i64(t0, t0, t1);
+        tcg_gen_extr_i64_i32(rl, rh, t0);
+        tcg_temp_free_i64(t0);
+        tcg_temp_free_i64(t1);
+    }
+}
+
+void tcg_gen_sub2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al,
+                      TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh)
+{
+    if (TCG_TARGET_HAS_sub2_i32) {
+        tcg_gen_op6_i32(INDEX_op_sub2_i32, rl, rh, al, ah, bl, bh);
+    } else {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        TCGv_i64 t1 = tcg_temp_new_i64();
+        tcg_gen_concat_i32_i64(t0, al, ah);
+        tcg_gen_concat_i32_i64(t1, bl, bh);
+        tcg_gen_sub_i64(t0, t0, t1);
+        tcg_gen_extr_i64_i32(rl, rh, t0);
+        tcg_temp_free_i64(t0);
+        tcg_temp_free_i64(t1);
+    }
+}
+
+void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+    if (TCG_TARGET_HAS_mulu2_i32) {
+        tcg_gen_op4_i32(INDEX_op_mulu2_i32, rl, rh, arg1, arg2);
+    } else if (TCG_TARGET_HAS_muluh_i32) {
+        TCGv_i32 t = tcg_temp_new_i32();
+        tcg_gen_op3_i32(INDEX_op_mul_i32, t, arg1, arg2);
+        tcg_gen_op3_i32(INDEX_op_muluh_i32, rh, arg1, arg2);
+        tcg_gen_mov_i32(rl, t);
+        tcg_temp_free_i32(t);
+    } else {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        TCGv_i64 t1 = tcg_temp_new_i64();
+        tcg_gen_extu_i32_i64(t0, arg1);
+        tcg_gen_extu_i32_i64(t1, arg2);
+        tcg_gen_mul_i64(t0, t0, t1);
+        tcg_gen_extr_i64_i32(rl, rh, t0);
+        tcg_temp_free_i64(t0);
+        tcg_temp_free_i64(t1);
+    }
+}
+
+void tcg_gen_muls2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+    if (TCG_TARGET_HAS_muls2_i32) {
+        tcg_gen_op4_i32(INDEX_op_muls2_i32, rl, rh, arg1, arg2);
+    } else if (TCG_TARGET_HAS_mulsh_i32) {
+        TCGv_i32 t = tcg_temp_new_i32();
+        tcg_gen_op3_i32(INDEX_op_mul_i32, t, arg1, arg2);
+        tcg_gen_op3_i32(INDEX_op_mulsh_i32, rh, arg1, arg2);
+        tcg_gen_mov_i32(rl, t);
+        tcg_temp_free_i32(t);
+    } else if (TCG_TARGET_REG_BITS == 32) {
+        TCGv_i32 t0 = tcg_temp_new_i32();
+        TCGv_i32 t1 = tcg_temp_new_i32();
+        TCGv_i32 t2 = tcg_temp_new_i32();
+        TCGv_i32 t3 = tcg_temp_new_i32();
+        tcg_gen_mulu2_i32(t0, t1, arg1, arg2);
+        /* Adjust for negative inputs.  */
+        tcg_gen_sari_i32(t2, arg1, 31);
+        tcg_gen_sari_i32(t3, arg2, 31);
+        tcg_gen_and_i32(t2, t2, arg2);
+        tcg_gen_and_i32(t3, t3, arg1);
+        tcg_gen_sub_i32(rh, t1, t2);
+        tcg_gen_sub_i32(rh, rh, t3);
+        tcg_gen_mov_i32(rl, t0);
+        tcg_temp_free_i32(t0);
+        tcg_temp_free_i32(t1);
+        tcg_temp_free_i32(t2);
+        tcg_temp_free_i32(t3);
+    } else {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        TCGv_i64 t1 = tcg_temp_new_i64();
+        tcg_gen_ext_i32_i64(t0, arg1);
+        tcg_gen_ext_i32_i64(t1, arg2);
+        tcg_gen_mul_i64(t0, t0, t1);
+        tcg_gen_extr_i64_i32(rl, rh, t0);
+        tcg_temp_free_i64(t0);
+        tcg_temp_free_i64(t1);
+    }
+}
+
+void tcg_gen_ext8s_i32(TCGv_i32 ret, TCGv_i32 arg)
+{
+    if (TCG_TARGET_HAS_ext8s_i32) {
+        tcg_gen_op2_i32(INDEX_op_ext8s_i32, ret, arg);
+    } else {
+        tcg_gen_shli_i32(ret, arg, 24);
+        tcg_gen_sari_i32(ret, ret, 24);
+    }
+}
+
+void tcg_gen_ext16s_i32(TCGv_i32 ret, TCGv_i32 arg)
+{
+    if (TCG_TARGET_HAS_ext16s_i32) {
+        tcg_gen_op2_i32(INDEX_op_ext16s_i32, ret, arg);
+    } else {
+        tcg_gen_shli_i32(ret, arg, 16);
+        tcg_gen_sari_i32(ret, ret, 16);
+    }
+}
+
+void tcg_gen_ext8u_i32(TCGv_i32 ret, TCGv_i32 arg)
+{
+    if (TCG_TARGET_HAS_ext8u_i32) {
+        tcg_gen_op2_i32(INDEX_op_ext8u_i32, ret, arg);
+    } else {
+        tcg_gen_andi_i32(ret, arg, 0xffu);
+    }
+}
+
+void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg)
+{
+    if (TCG_TARGET_HAS_ext16u_i32) {
+        tcg_gen_op2_i32(INDEX_op_ext16u_i32, ret, arg);
+    } else {
+        tcg_gen_andi_i32(ret, arg, 0xffffu);
+    }
+}
+
+/* Note: we assume the two high bytes are set to zero */
+void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg)
+{
+    if (TCG_TARGET_HAS_bswap16_i32) {
+        tcg_gen_op2_i32(INDEX_op_bswap16_i32, ret, arg);
+    } else {
+        TCGv_i32 t0 = tcg_temp_new_i32();
+
+        tcg_gen_ext8u_i32(t0, arg);
+        tcg_gen_shli_i32(t0, t0, 8);
+        tcg_gen_shri_i32(ret, arg, 8);
+        tcg_gen_or_i32(ret, ret, t0);
+        tcg_temp_free_i32(t0);
+    }
+}
+
+void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg)
+{
+    if (TCG_TARGET_HAS_bswap32_i32) {
+        tcg_gen_op2_i32(INDEX_op_bswap32_i32, ret, arg);
+    } else {
+        TCGv_i32 t0, t1;
+        t0 = tcg_temp_new_i32();
+        t1 = tcg_temp_new_i32();
+
+        tcg_gen_shli_i32(t0, arg, 24);
+
+        tcg_gen_andi_i32(t1, arg, 0x0000ff00);
+        tcg_gen_shli_i32(t1, t1, 8);
+        tcg_gen_or_i32(t0, t0, t1);
+
+        tcg_gen_shri_i32(t1, arg, 8);
+        tcg_gen_andi_i32(t1, t1, 0x0000ff00);
+        tcg_gen_or_i32(t0, t0, t1);
+
+        tcg_gen_shri_i32(t1, arg, 24);
+        tcg_gen_or_i32(ret, t0, t1);
+        tcg_temp_free_i32(t0);
+        tcg_temp_free_i32(t1);
+    }
+}
+
+/* 64-bit ops */
+
+#if TCG_TARGET_REG_BITS == 32
+/* These are all inline for TCG_TARGET_REG_BITS == 64.  */
+
+void tcg_gen_discard_i64(TCGv_i64 arg)
+{
+    tcg_gen_discard_i32(TCGV_LOW(arg));
+    tcg_gen_discard_i32(TCGV_HIGH(arg));
+}
+
+void tcg_gen_mov_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+    tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg));
+    tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg));
+}
+
+void tcg_gen_movi_i64(TCGv_i64 ret, int64_t arg)
+{
+    tcg_gen_movi_i32(TCGV_LOW(ret), arg);
+    tcg_gen_movi_i32(TCGV_HIGH(ret), arg >> 32);
+}
+
+void tcg_gen_ld8u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
+{
+    tcg_gen_ld8u_i32(TCGV_LOW(ret), arg2, offset);
+    tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+}
+
+void tcg_gen_ld8s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
+{
+    tcg_gen_ld8s_i32(TCGV_LOW(ret), arg2, offset);
+    tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_HIGH(ret), 31);
+}
+
+void tcg_gen_ld16u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
+{
+    tcg_gen_ld16u_i32(TCGV_LOW(ret), arg2, offset);
+    tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+}
+
+void tcg_gen_ld16s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
+{
+    tcg_gen_ld16s_i32(TCGV_LOW(ret), arg2, offset);
+    tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
+}
+
+void tcg_gen_ld32u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
+{
+    tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset);
+    tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+}
+
+void tcg_gen_ld32s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
+{
+    tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset);
+    tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
+}
+
+void tcg_gen_ld_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
+{
+    /* Since arg2 and ret have different types,
+       they cannot be the same temporary */
+#ifdef TCG_TARGET_WORDS_BIGENDIAN
+    tcg_gen_ld_i32(TCGV_HIGH(ret), arg2, offset);
+    tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset + 4);
+#else
+    tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset);
+    tcg_gen_ld_i32(TCGV_HIGH(ret), arg2, offset + 4);
+#endif
+}
+
+void tcg_gen_st_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset)
+{
+#ifdef TCG_TARGET_WORDS_BIGENDIAN
+    tcg_gen_st_i32(TCGV_HIGH(arg1), arg2, offset);
+    tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset + 4);
+#else
+    tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset);
+    tcg_gen_st_i32(TCGV_HIGH(arg1), arg2, offset + 4);
+#endif
+}
+
+void tcg_gen_and_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+    tcg_gen_and_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
+    tcg_gen_and_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
+}
+
+void tcg_gen_or_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+    tcg_gen_or_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
+    tcg_gen_or_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
+}
+
+void tcg_gen_xor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+    tcg_gen_xor_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
+    tcg_gen_xor_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
+}
+
+void tcg_gen_shl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+    gen_helper_shl_i64(ret, arg1, arg2);
+}
+
+void tcg_gen_shr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+    gen_helper_shr_i64(ret, arg1, arg2);
+}
+
+void tcg_gen_sar_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+    gen_helper_sar_i64(ret, arg1, arg2);
+}
+
+void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+    TCGv_i64 t0;
+    TCGv_i32 t1;
+
+    t0 = tcg_temp_new_i64();
+    t1 = tcg_temp_new_i32();
+
+    tcg_gen_mulu2_i32(TCGV_LOW(t0), TCGV_HIGH(t0),
+                      TCGV_LOW(arg1), TCGV_LOW(arg2));
+
+    tcg_gen_mul_i32(t1, TCGV_LOW(arg1), TCGV_HIGH(arg2));
+    tcg_gen_add_i32(TCGV_HIGH(t0), TCGV_HIGH(t0), t1);
+    tcg_gen_mul_i32(t1, TCGV_HIGH(arg1), TCGV_LOW(arg2));
+    tcg_gen_add_i32(TCGV_HIGH(t0), TCGV_HIGH(t0), t1);
+
+    tcg_gen_mov_i64(ret, t0);
+    tcg_temp_free_i64(t0);
+    tcg_temp_free_i32(t1);
+}
+#endif /* TCG_TARGET_REG_SIZE == 32 */
+
+void tcg_gen_addi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
+{
+    /* some cases can be optimized here */
+    if (arg2 == 0) {
+        tcg_gen_mov_i64(ret, arg1);
+    } else {
+        TCGv_i64 t0 = tcg_const_i64(arg2);
+        tcg_gen_add_i64(ret, arg1, t0);
+        tcg_temp_free_i64(t0);
+    }
+}
+
+void tcg_gen_subfi_i64(TCGv_i64 ret, int64_t arg1, TCGv_i64 arg2)
+{
+    if (arg1 == 0 && TCG_TARGET_HAS_neg_i64) {
+        /* Don't recurse with tcg_gen_neg_i64.  */
+        tcg_gen_op2_i64(INDEX_op_neg_i64, ret, arg2);
+    } else {
+        TCGv_i64 t0 = tcg_const_i64(arg1);
+        tcg_gen_sub_i64(ret, t0, arg2);
+        tcg_temp_free_i64(t0);
+    }
+}
+
+void tcg_gen_subi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
+{
+    /* some cases can be optimized here */
+    if (arg2 == 0) {
+        tcg_gen_mov_i64(ret, arg1);
+    } else {
+        TCGv_i64 t0 = tcg_const_i64(arg2);
+        tcg_gen_sub_i64(ret, arg1, t0);
+        tcg_temp_free_i64(t0);
+    }
+}
+
+void tcg_gen_andi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2)
+{
+    TCGv_i64 t0;
+
+    if (TCG_TARGET_REG_BITS == 32) {
+        tcg_gen_andi_i32(TCGV_LOW(ret), TCGV_LOW(arg1), arg2);
+        tcg_gen_andi_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), arg2 >> 32);
+        return;
+    }
+
+    /* Some cases can be optimized here.  */
+    switch (arg2) {
+    case 0:
+        tcg_gen_movi_i64(ret, 0);
+        return;
+    case 0xffffffffffffffffull:
+        tcg_gen_mov_i64(ret, arg1);
+        return;
+    case 0xffull:
+        /* Don't recurse with tcg_gen_ext8u_i64.  */
+        if (TCG_TARGET_HAS_ext8u_i64) {
+            tcg_gen_op2_i64(INDEX_op_ext8u_i64, ret, arg1);
+            return;
+        }
+        break;
+    case 0xffffu:
+        if (TCG_TARGET_HAS_ext16u_i64) {
+            tcg_gen_op2_i64(INDEX_op_ext16u_i64, ret, arg1);
+            return;
+        }
+        break;
+    case 0xffffffffull:
+        if (TCG_TARGET_HAS_ext32u_i64) {
+            tcg_gen_op2_i64(INDEX_op_ext32u_i64, ret, arg1);
+            return;
+        }
+        break;
+    }
+    t0 = tcg_const_i64(arg2);
+    tcg_gen_and_i64(ret, arg1, t0);
+    tcg_temp_free_i64(t0);
+}
+
+void tcg_gen_ori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
+{
+    if (TCG_TARGET_REG_BITS == 32) {
+        tcg_gen_ori_i32(TCGV_LOW(ret), TCGV_LOW(arg1), arg2);
+        tcg_gen_ori_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), arg2 >> 32);
+        return;
+    }
+    /* Some cases can be optimized here.  */
+    if (arg2 == -1) {
+        tcg_gen_movi_i64(ret, -1);
+    } else if (arg2 == 0) {
+        tcg_gen_mov_i64(ret, arg1);
+    } else {
+        TCGv_i64 t0 = tcg_const_i64(arg2);
+        tcg_gen_or_i64(ret, arg1, t0);
+        tcg_temp_free_i64(t0);
+    }
+}
+
+void tcg_gen_xori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
+{
+    if (TCG_TARGET_REG_BITS == 32) {
+        tcg_gen_xori_i32(TCGV_LOW(ret), TCGV_LOW(arg1), arg2);
+        tcg_gen_xori_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), arg2 >> 32);
+        return;
+    }
+    /* Some cases can be optimized here.  */
+    if (arg2 == 0) {
+        tcg_gen_mov_i64(ret, arg1);
+    } else if (arg2 == -1 && TCG_TARGET_HAS_not_i64) {
+        /* Don't recurse with tcg_gen_not_i64.  */
+        tcg_gen_op2_i64(INDEX_op_not_i64, ret, arg1);
+    } else {
+        TCGv_i64 t0 = tcg_const_i64(arg2);
+        tcg_gen_xor_i64(ret, arg1, t0);
+        tcg_temp_free_i64(t0);
+    }
+}
+
+static inline void tcg_gen_shifti_i64(TCGv_i64 ret, TCGv_i64 arg1,
+                                      unsigned c, bool right, bool arith)
+{
+    tcg_debug_assert(c < 64);
+    if (c == 0) {
+        tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg1));
+        tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1));
+    } else if (c >= 32) {
+        c -= 32;
+        if (right) {
+            if (arith) {
+                tcg_gen_sari_i32(TCGV_LOW(ret), TCGV_HIGH(arg1), c);
+                tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), 31);
+            } else {
+                tcg_gen_shri_i32(TCGV_LOW(ret), TCGV_HIGH(arg1), c);
+                tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+            }
+        } else {
+            tcg_gen_shli_i32(TCGV_HIGH(ret), TCGV_LOW(arg1), c);
+            tcg_gen_movi_i32(TCGV_LOW(ret), 0);
+        }
+    } else {
+        TCGv_i32 t0, t1;
+
+        t0 = tcg_temp_new_i32();
+        t1 = tcg_temp_new_i32();
+        if (right) {
+            tcg_gen_shli_i32(t0, TCGV_HIGH(arg1), 32 - c);
+            if (arith) {
+                tcg_gen_sari_i32(t1, TCGV_HIGH(arg1), c);
+            } else {
+                tcg_gen_shri_i32(t1, TCGV_HIGH(arg1), c);
+            }
+            tcg_gen_shri_i32(TCGV_LOW(ret), TCGV_LOW(arg1), c);
+            tcg_gen_or_i32(TCGV_LOW(ret), TCGV_LOW(ret), t0);
+            tcg_gen_mov_i32(TCGV_HIGH(ret), t1);
+        } else {
+            tcg_gen_shri_i32(t0, TCGV_LOW(arg1), 32 - c);
+            /* Note: ret can be the same as arg1, so we use t1 */
+            tcg_gen_shli_i32(t1, TCGV_LOW(arg1), c);
+            tcg_gen_shli_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), c);
+            tcg_gen_or_i32(TCGV_HIGH(ret), TCGV_HIGH(ret), t0);
+            tcg_gen_mov_i32(TCGV_LOW(ret), t1);
+        }
+        tcg_temp_free_i32(t0);
+        tcg_temp_free_i32(t1);
+    }
+}
+
+void tcg_gen_shli_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2)
+{
+    tcg_debug_assert(arg2 < 64);
+    if (TCG_TARGET_REG_BITS == 32) {
+        tcg_gen_shifti_i64(ret, arg1, arg2, 0, 0);
+    } else if (arg2 == 0) {
+        tcg_gen_mov_i64(ret, arg1);
+    } else {
+        TCGv_i64 t0 = tcg_const_i64(arg2);
+        tcg_gen_shl_i64(ret, arg1, t0);
+        tcg_temp_free_i64(t0);
+    }
+}
+
+void tcg_gen_shri_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2)
+{
+    tcg_debug_assert(arg2 < 64);
+    if (TCG_TARGET_REG_BITS == 32) {
+        tcg_gen_shifti_i64(ret, arg1, arg2, 1, 0);
+    } else if (arg2 == 0) {
+        tcg_gen_mov_i64(ret, arg1);
+    } else {
+        TCGv_i64 t0 = tcg_const_i64(arg2);
+        tcg_gen_shr_i64(ret, arg1, t0);
+        tcg_temp_free_i64(t0);
+    }
+}
+
+void tcg_gen_sari_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2)
+{
+    tcg_debug_assert(arg2 < 64);
+    if (TCG_TARGET_REG_BITS == 32) {
+        tcg_gen_shifti_i64(ret, arg1, arg2, 1, 1);
+    } else if (arg2 == 0) {
+        tcg_gen_mov_i64(ret, arg1);
+    } else {
+        TCGv_i64 t0 = tcg_const_i64(arg2);
+        tcg_gen_sar_i64(ret, arg1, t0);
+        tcg_temp_free_i64(t0);
+    }
+}
+
+void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1, TCGv_i64 arg2, int label)
+{
+    if (cond == TCG_COND_ALWAYS) {
+        tcg_gen_br(label);
+    } else if (cond != TCG_COND_NEVER) {
+        if (TCG_TARGET_REG_BITS == 32) {
+            tcg_gen_op6ii_i32(INDEX_op_brcond2_i32, TCGV_LOW(arg1),
+                              TCGV_HIGH(arg1), TCGV_LOW(arg2),
+                              TCGV_HIGH(arg2), cond, label);
+        } else {
+            tcg_gen_op4ii_i64(INDEX_op_brcond_i64, arg1, arg2, cond, label);
+        }
+    }
+}
+
+void tcg_gen_brcondi_i64(TCGCond cond, TCGv_i64 arg1, int64_t arg2, int label)
+{
+    if (cond == TCG_COND_ALWAYS) {
+        tcg_gen_br(label);
+    } else if (cond != TCG_COND_NEVER) {
+        TCGv_i64 t0 = tcg_const_i64(arg2);
+        tcg_gen_brcond_i64(cond, arg1, t0, label);
+        tcg_temp_free_i64(t0);
+    }
+}
+
+void tcg_gen_setcond_i64(TCGCond cond, TCGv_i64 ret,
+                         TCGv_i64 arg1, TCGv_i64 arg2)
+{
+    if (cond == TCG_COND_ALWAYS) {
+        tcg_gen_movi_i64(ret, 1);
+    } else if (cond == TCG_COND_NEVER) {
+        tcg_gen_movi_i64(ret, 0);
+    } else {
+        if (TCG_TARGET_REG_BITS == 32) {
+            tcg_gen_op6i_i32(INDEX_op_setcond2_i32, TCGV_LOW(ret),
+                             TCGV_LOW(arg1), TCGV_HIGH(arg1),
+                             TCGV_LOW(arg2), TCGV_HIGH(arg2), cond);
+            tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+        } else {
+            tcg_gen_op4i_i64(INDEX_op_setcond_i64, ret, arg1, arg2, cond);
+        }
+    }
+}
+
+void tcg_gen_setcondi_i64(TCGCond cond, TCGv_i64 ret,
+                          TCGv_i64 arg1, int64_t arg2)
+{
+    TCGv_i64 t0 = tcg_const_i64(arg2);
+    tcg_gen_setcond_i64(cond, ret, arg1, t0);
+    tcg_temp_free_i64(t0);
+}
+
+void tcg_gen_muli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
+{
+    TCGv_i64 t0 = tcg_const_i64(arg2);
+    tcg_gen_mul_i64(ret, arg1, t0);
+    tcg_temp_free_i64(t0);
+}
+
+void tcg_gen_div_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+    if (TCG_TARGET_HAS_div_i64) {
+        tcg_gen_op3_i64(INDEX_op_div_i64, ret, arg1, arg2);
+    } else if (TCG_TARGET_HAS_div2_i64) {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        tcg_gen_sari_i64(t0, arg1, 63);
+        tcg_gen_op5_i64(INDEX_op_div2_i64, ret, t0, arg1, t0, arg2);
+        tcg_temp_free_i64(t0);
+    } else {
+        gen_helper_div_i64(ret, arg1, arg2);
+    }
+}
+
+void tcg_gen_rem_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+    if (TCG_TARGET_HAS_rem_i64) {
+        tcg_gen_op3_i64(INDEX_op_rem_i64, ret, arg1, arg2);
+    } else if (TCG_TARGET_HAS_div_i64) {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        tcg_gen_op3_i64(INDEX_op_div_i64, t0, arg1, arg2);
+        tcg_gen_mul_i64(t0, t0, arg2);
+        tcg_gen_sub_i64(ret, arg1, t0);
+        tcg_temp_free_i64(t0);
+    } else if (TCG_TARGET_HAS_div2_i64) {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        tcg_gen_sari_i64(t0, arg1, 63);
+        tcg_gen_op5_i64(INDEX_op_div2_i64, t0, ret, arg1, t0, arg2);
+        tcg_temp_free_i64(t0);
+    } else {
+        gen_helper_rem_i64(ret, arg1, arg2);
+    }
+}
+
+void tcg_gen_divu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+    if (TCG_TARGET_HAS_div_i64) {
+        tcg_gen_op3_i64(INDEX_op_divu_i64, ret, arg1, arg2);
+    } else if (TCG_TARGET_HAS_div2_i64) {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        tcg_gen_movi_i64(t0, 0);
+        tcg_gen_op5_i64(INDEX_op_divu2_i64, ret, t0, arg1, t0, arg2);
+        tcg_temp_free_i64(t0);
+    } else {
+        gen_helper_divu_i64(ret, arg1, arg2);
+    }
+}
+
+void tcg_gen_remu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+    if (TCG_TARGET_HAS_rem_i64) {
+        tcg_gen_op3_i64(INDEX_op_remu_i64, ret, arg1, arg2);
+    } else if (TCG_TARGET_HAS_div_i64) {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        tcg_gen_op3_i64(INDEX_op_divu_i64, t0, arg1, arg2);
+        tcg_gen_mul_i64(t0, t0, arg2);
+        tcg_gen_sub_i64(ret, arg1, t0);
+        tcg_temp_free_i64(t0);
+    } else if (TCG_TARGET_HAS_div2_i64) {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        tcg_gen_movi_i64(t0, 0);
+        tcg_gen_op5_i64(INDEX_op_divu2_i64, t0, ret, arg1, t0, arg2);
+        tcg_temp_free_i64(t0);
+    } else {
+        gen_helper_remu_i64(ret, arg1, arg2);
+    }
+}
+
+void tcg_gen_ext8s_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+    if (TCG_TARGET_REG_BITS == 32) {
+        tcg_gen_ext8s_i32(TCGV_LOW(ret), TCGV_LOW(arg));
+        tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
+    } else if (TCG_TARGET_HAS_ext8s_i64) {
+        tcg_gen_op2_i64(INDEX_op_ext8s_i64, ret, arg);
+    } else {
+        tcg_gen_shli_i64(ret, arg, 56);
+        tcg_gen_sari_i64(ret, ret, 56);
+    }
+}
+
+void tcg_gen_ext16s_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+    if (TCG_TARGET_REG_BITS == 32) {
+        tcg_gen_ext16s_i32(TCGV_LOW(ret), TCGV_LOW(arg));
+        tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
+    } else if (TCG_TARGET_HAS_ext16s_i64) {
+        tcg_gen_op2_i64(INDEX_op_ext16s_i64, ret, arg);
+    } else {
+        tcg_gen_shli_i64(ret, arg, 48);
+        tcg_gen_sari_i64(ret, ret, 48);
+    }
+}
+
+void tcg_gen_ext32s_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+    if (TCG_TARGET_REG_BITS == 32) {
+        tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg));
+        tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
+    } else if (TCG_TARGET_HAS_ext32s_i64) {
+        tcg_gen_op2_i64(INDEX_op_ext32s_i64, ret, arg);
+    } else {
+        tcg_gen_shli_i64(ret, arg, 32);
+        tcg_gen_sari_i64(ret, ret, 32);
+    }
+}
+
+void tcg_gen_ext8u_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+    if (TCG_TARGET_REG_BITS == 32) {
+        tcg_gen_ext8u_i32(TCGV_LOW(ret), TCGV_LOW(arg));
+        tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+    } else if (TCG_TARGET_HAS_ext8u_i64) {
+        tcg_gen_op2_i64(INDEX_op_ext8u_i64, ret, arg);
+    } else {
+        tcg_gen_andi_i64(ret, arg, 0xffu);
+    }
+}
+
+void tcg_gen_ext16u_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+    if (TCG_TARGET_REG_BITS == 32) {
+        tcg_gen_ext16u_i32(TCGV_LOW(ret), TCGV_LOW(arg));
+        tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+    } else if (TCG_TARGET_HAS_ext16u_i64) {
+        tcg_gen_op2_i64(INDEX_op_ext16u_i64, ret, arg);
+    } else {
+        tcg_gen_andi_i64(ret, arg, 0xffffu);
+    }
+}
+
+void tcg_gen_ext32u_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+    if (TCG_TARGET_REG_BITS == 32) {
+        tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg));
+        tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+    } else if (TCG_TARGET_HAS_ext32u_i64) {
+        tcg_gen_op2_i64(INDEX_op_ext32u_i64, ret, arg);
+    } else {
+        tcg_gen_andi_i64(ret, arg, 0xffffffffu);
+    }
+}
+
+/* Note: we assume the six high bytes are set to zero */
+void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+    if (TCG_TARGET_REG_BITS == 32) {
+        tcg_gen_bswap16_i32(TCGV_LOW(ret), TCGV_LOW(arg));
+        tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+    } else if (TCG_TARGET_HAS_bswap16_i64) {
+        tcg_gen_op2_i64(INDEX_op_bswap16_i64, ret, arg);
+    } else {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+
+        tcg_gen_ext8u_i64(t0, arg);
+        tcg_gen_shli_i64(t0, t0, 8);
+        tcg_gen_shri_i64(ret, arg, 8);
+        tcg_gen_or_i64(ret, ret, t0);
+        tcg_temp_free_i64(t0);
+    }
+}
+
+/* Note: we assume the four high bytes are set to zero */
+void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+    if (TCG_TARGET_REG_BITS == 32) {
+        tcg_gen_bswap32_i32(TCGV_LOW(ret), TCGV_LOW(arg));
+        tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+    } else if (TCG_TARGET_HAS_bswap32_i64) {
+        tcg_gen_op2_i64(INDEX_op_bswap32_i64, ret, arg);
+    } else {
+        TCGv_i64 t0, t1;
+        t0 = tcg_temp_new_i64();
+        t1 = tcg_temp_new_i64();
+
+        tcg_gen_shli_i64(t0, arg, 24);
+        tcg_gen_ext32u_i64(t0, t0);
+
+        tcg_gen_andi_i64(t1, arg, 0x0000ff00);
+        tcg_gen_shli_i64(t1, t1, 8);
+        tcg_gen_or_i64(t0, t0, t1);
+
+        tcg_gen_shri_i64(t1, arg, 8);
+        tcg_gen_andi_i64(t1, t1, 0x0000ff00);
+        tcg_gen_or_i64(t0, t0, t1);
+
+        tcg_gen_shri_i64(t1, arg, 24);
+        tcg_gen_or_i64(ret, t0, t1);
+        tcg_temp_free_i64(t0);
+        tcg_temp_free_i64(t1);
+    }
+}
+
+void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+    if (TCG_TARGET_REG_BITS == 32) {
+        TCGv_i32 t0, t1;
+        t0 = tcg_temp_new_i32();
+        t1 = tcg_temp_new_i32();
+
+        tcg_gen_bswap32_i32(t0, TCGV_LOW(arg));
+        tcg_gen_bswap32_i32(t1, TCGV_HIGH(arg));
+        tcg_gen_mov_i32(TCGV_LOW(ret), t1);
+        tcg_gen_mov_i32(TCGV_HIGH(ret), t0);
+        tcg_temp_free_i32(t0);
+        tcg_temp_free_i32(t1);
+    } else if (TCG_TARGET_HAS_bswap64_i64) {
+        tcg_gen_op2_i64(INDEX_op_bswap64_i64, ret, arg);
+    } else {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        TCGv_i64 t1 = tcg_temp_new_i64();
+
+        tcg_gen_shli_i64(t0, arg, 56);
+
+        tcg_gen_andi_i64(t1, arg, 0x0000ff00);
+        tcg_gen_shli_i64(t1, t1, 40);
+        tcg_gen_or_i64(t0, t0, t1);
+
+        tcg_gen_andi_i64(t1, arg, 0x00ff0000);
+        tcg_gen_shli_i64(t1, t1, 24);
+        tcg_gen_or_i64(t0, t0, t1);
+
+        tcg_gen_andi_i64(t1, arg, 0xff000000);
+        tcg_gen_shli_i64(t1, t1, 8);
+        tcg_gen_or_i64(t0, t0, t1);
+
+        tcg_gen_shri_i64(t1, arg, 8);
+        tcg_gen_andi_i64(t1, t1, 0xff000000);
+        tcg_gen_or_i64(t0, t0, t1);
+
+        tcg_gen_shri_i64(t1, arg, 24);
+        tcg_gen_andi_i64(t1, t1, 0x00ff0000);
+        tcg_gen_or_i64(t0, t0, t1);
+
+        tcg_gen_shri_i64(t1, arg, 40);
+        tcg_gen_andi_i64(t1, t1, 0x0000ff00);
+        tcg_gen_or_i64(t0, t0, t1);
+
+        tcg_gen_shri_i64(t1, arg, 56);
+        tcg_gen_or_i64(ret, t0, t1);
+        tcg_temp_free_i64(t0);
+        tcg_temp_free_i64(t1);
+    }
+}
+
+void tcg_gen_not_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+    if (TCG_TARGET_REG_BITS == 32) {
+        tcg_gen_not_i32(TCGV_LOW(ret), TCGV_LOW(arg));
+        tcg_gen_not_i32(TCGV_HIGH(ret), TCGV_HIGH(arg));
+    } else if (TCG_TARGET_HAS_not_i64) {
+        tcg_gen_op2_i64(INDEX_op_not_i64, ret, arg);
+    } else {
+        tcg_gen_xori_i64(ret, arg, -1);
+    }
+}
+
+void tcg_gen_andc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+    if (TCG_TARGET_REG_BITS == 32) {
+        tcg_gen_andc_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
+        tcg_gen_andc_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
+    } else if (TCG_TARGET_HAS_andc_i64) {
+        tcg_gen_op3_i64(INDEX_op_andc_i64, ret, arg1, arg2);
+    } else {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        tcg_gen_not_i64(t0, arg2);
+        tcg_gen_and_i64(ret, arg1, t0);
+        tcg_temp_free_i64(t0);
+    }
+}
+
+void tcg_gen_eqv_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+    if (TCG_TARGET_REG_BITS == 32) {
+        tcg_gen_eqv_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
+        tcg_gen_eqv_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
+    } else if (TCG_TARGET_HAS_eqv_i64) {
+        tcg_gen_op3_i64(INDEX_op_eqv_i64, ret, arg1, arg2);
+    } else {
+        tcg_gen_xor_i64(ret, arg1, arg2);
+        tcg_gen_not_i64(ret, ret);
+    }
+}
+
+void tcg_gen_nand_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+    if (TCG_TARGET_REG_BITS == 32) {
+        tcg_gen_nand_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
+        tcg_gen_nand_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
+    } else if (TCG_TARGET_HAS_nand_i64) {
+        tcg_gen_op3_i64(INDEX_op_nand_i64, ret, arg1, arg2);
+    } else {
+        tcg_gen_and_i64(ret, arg1, arg2);
+        tcg_gen_not_i64(ret, ret);
+    }
+}
+
+void tcg_gen_nor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+    if (TCG_TARGET_REG_BITS == 32) {
+        tcg_gen_nor_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
+        tcg_gen_nor_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
+    } else if (TCG_TARGET_HAS_nor_i64) {
+        tcg_gen_op3_i64(INDEX_op_nor_i64, ret, arg1, arg2);
+    } else {
+        tcg_gen_or_i64(ret, arg1, arg2);
+        tcg_gen_not_i64(ret, ret);
+    }
+}
+
+void tcg_gen_orc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+    if (TCG_TARGET_REG_BITS == 32) {
+        tcg_gen_orc_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
+        tcg_gen_orc_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
+    } else if (TCG_TARGET_HAS_orc_i64) {
+        tcg_gen_op3_i64(INDEX_op_orc_i64, ret, arg1, arg2);
+    } else {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        tcg_gen_not_i64(t0, arg2);
+        tcg_gen_or_i64(ret, arg1, t0);
+        tcg_temp_free_i64(t0);
+    }
+}
+
+void tcg_gen_rotl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+    if (TCG_TARGET_HAS_rot_i64) {
+        tcg_gen_op3_i64(INDEX_op_rotl_i64, ret, arg1, arg2);
+    } else {
+        TCGv_i64 t0, t1;
+        t0 = tcg_temp_new_i64();
+        t1 = tcg_temp_new_i64();
+        tcg_gen_shl_i64(t0, arg1, arg2);
+        tcg_gen_subfi_i64(t1, 64, arg2);
+        tcg_gen_shr_i64(t1, arg1, t1);
+        tcg_gen_or_i64(ret, t0, t1);
+        tcg_temp_free_i64(t0);
+        tcg_temp_free_i64(t1);
+    }
+}
+
+void tcg_gen_rotli_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2)
+{
+    tcg_debug_assert(arg2 < 64);
+    /* some cases can be optimized here */
+    if (arg2 == 0) {
+        tcg_gen_mov_i64(ret, arg1);
+    } else if (TCG_TARGET_HAS_rot_i64) {
+        TCGv_i64 t0 = tcg_const_i64(arg2);
+        tcg_gen_rotl_i64(ret, arg1, t0);
+        tcg_temp_free_i64(t0);
+    } else {
+        TCGv_i64 t0, t1;
+        t0 = tcg_temp_new_i64();
+        t1 = tcg_temp_new_i64();
+        tcg_gen_shli_i64(t0, arg1, arg2);
+        tcg_gen_shri_i64(t1, arg1, 64 - arg2);
+        tcg_gen_or_i64(ret, t0, t1);
+        tcg_temp_free_i64(t0);
+        tcg_temp_free_i64(t1);
+    }
+}
+
+void tcg_gen_rotr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+    if (TCG_TARGET_HAS_rot_i64) {
+        tcg_gen_op3_i64(INDEX_op_rotr_i64, ret, arg1, arg2);
+    } else {
+        TCGv_i64 t0, t1;
+        t0 = tcg_temp_new_i64();
+        t1 = tcg_temp_new_i64();
+        tcg_gen_shr_i64(t0, arg1, arg2);
+        tcg_gen_subfi_i64(t1, 64, arg2);
+        tcg_gen_shl_i64(t1, arg1, t1);
+        tcg_gen_or_i64(ret, t0, t1);
+        tcg_temp_free_i64(t0);
+        tcg_temp_free_i64(t1);
+    }
+}
+
+void tcg_gen_rotri_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2)
+{
+    tcg_debug_assert(arg2 < 64);
+    /* some cases can be optimized here */
+    if (arg2 == 0) {
+        tcg_gen_mov_i64(ret, arg1);
+    } else {
+        tcg_gen_rotli_i64(ret, arg1, 64 - arg2);
+    }
+}
+
+void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2,
+                         unsigned int ofs, unsigned int len)
+{
+    uint64_t mask;
+    TCGv_i64 t1;
+
+    tcg_debug_assert(ofs < 64);
+    tcg_debug_assert(len <= 64);
+    tcg_debug_assert(ofs + len <= 64);
+
+    if (ofs == 0 && len == 64) {
+        tcg_gen_mov_i64(ret, arg2);
+        return;
+    }
+    if (TCG_TARGET_HAS_deposit_i64 && TCG_TARGET_deposit_i64_valid(ofs, len)) {
+        tcg_gen_op5ii_i64(INDEX_op_deposit_i64, ret, arg1, arg2, ofs, len);
+        return;
+    }
+
+    if (TCG_TARGET_REG_BITS == 32) {
+        if (ofs >= 32) {
+            tcg_gen_deposit_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1),
+                                TCGV_LOW(arg2), ofs - 32, len);
+            tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg1));
+            return;
+        }
+        if (ofs + len <= 32) {
+            tcg_gen_deposit_i32(TCGV_LOW(ret), TCGV_LOW(arg1),
+                                TCGV_LOW(arg2), ofs, len);
+            tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1));
+            return;
+        }
+    }
+
+    mask = (1ull << len) - 1;
+    t1 = tcg_temp_new_i64();
+
+    if (ofs + len < 64) {
+        tcg_gen_andi_i64(t1, arg2, mask);
+        tcg_gen_shli_i64(t1, t1, ofs);
+    } else {
+        tcg_gen_shli_i64(t1, arg2, ofs);
+    }
+    tcg_gen_andi_i64(ret, arg1, ~(mask << ofs));
+    tcg_gen_or_i64(ret, ret, t1);
+
+    tcg_temp_free_i64(t1);
+}
+
+void tcg_gen_movcond_i64(TCGCond cond, TCGv_i64 ret, TCGv_i64 c1,
+                         TCGv_i64 c2, TCGv_i64 v1, TCGv_i64 v2)
+{
+    if (TCG_TARGET_REG_BITS == 32) {
+        TCGv_i32 t0 = tcg_temp_new_i32();
+        TCGv_i32 t1 = tcg_temp_new_i32();
+        tcg_gen_op6i_i32(INDEX_op_setcond2_i32, t0,
+                         TCGV_LOW(c1), TCGV_HIGH(c1),
+                         TCGV_LOW(c2), TCGV_HIGH(c2), cond);
+
+        if (TCG_TARGET_HAS_movcond_i32) {
+            tcg_gen_movi_i32(t1, 0);
+            tcg_gen_movcond_i32(TCG_COND_NE, TCGV_LOW(ret), t0, t1,
+                                TCGV_LOW(v1), TCGV_LOW(v2));
+            tcg_gen_movcond_i32(TCG_COND_NE, TCGV_HIGH(ret), t0, t1,
+                                TCGV_HIGH(v1), TCGV_HIGH(v2));
+        } else {
+            tcg_gen_neg_i32(t0, t0);
+
+            tcg_gen_and_i32(t1, TCGV_LOW(v1), t0);
+            tcg_gen_andc_i32(TCGV_LOW(ret), TCGV_LOW(v2), t0);
+            tcg_gen_or_i32(TCGV_LOW(ret), TCGV_LOW(ret), t1);
+
+            tcg_gen_and_i32(t1, TCGV_HIGH(v1), t0);
+            tcg_gen_andc_i32(TCGV_HIGH(ret), TCGV_HIGH(v2), t0);
+            tcg_gen_or_i32(TCGV_HIGH(ret), TCGV_HIGH(ret), t1);
+        }
+        tcg_temp_free_i32(t0);
+        tcg_temp_free_i32(t1);
+    } else if (TCG_TARGET_HAS_movcond_i64) {
+        tcg_gen_op6i_i64(INDEX_op_movcond_i64, ret, c1, c2, v1, v2, cond);
+    } else {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        TCGv_i64 t1 = tcg_temp_new_i64();
+        tcg_gen_setcond_i64(cond, t0, c1, c2);
+        tcg_gen_neg_i64(t0, t0);
+        tcg_gen_and_i64(t1, v1, t0);
+        tcg_gen_andc_i64(ret, v2, t0);
+        tcg_gen_or_i64(ret, ret, t1);
+        tcg_temp_free_i64(t0);
+        tcg_temp_free_i64(t1);
+    }
+}
+
+void tcg_gen_add2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al,
+                      TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh)
+{
+    if (TCG_TARGET_HAS_add2_i64) {
+        tcg_gen_op6_i64(INDEX_op_add2_i64, rl, rh, al, ah, bl, bh);
+    } else {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        TCGv_i64 t1 = tcg_temp_new_i64();
+        tcg_gen_add_i64(t0, al, bl);
+        tcg_gen_setcond_i64(TCG_COND_LTU, t1, t0, al);
+        tcg_gen_add_i64(rh, ah, bh);
+        tcg_gen_add_i64(rh, rh, t1);
+        tcg_gen_mov_i64(rl, t0);
+        tcg_temp_free_i64(t0);
+        tcg_temp_free_i64(t1);
+    }
+}
+
+void tcg_gen_sub2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al,
+                      TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh)
+{
+    if (TCG_TARGET_HAS_sub2_i64) {
+        tcg_gen_op6_i64(INDEX_op_sub2_i64, rl, rh, al, ah, bl, bh);
+    } else {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        TCGv_i64 t1 = tcg_temp_new_i64();
+        tcg_gen_sub_i64(t0, al, bl);
+        tcg_gen_setcond_i64(TCG_COND_LTU, t1, al, bl);
+        tcg_gen_sub_i64(rh, ah, bh);
+        tcg_gen_sub_i64(rh, rh, t1);
+        tcg_gen_mov_i64(rl, t0);
+        tcg_temp_free_i64(t0);
+        tcg_temp_free_i64(t1);
+    }
+}
+
+void tcg_gen_mulu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+    if (TCG_TARGET_HAS_mulu2_i64) {
+        tcg_gen_op4_i64(INDEX_op_mulu2_i64, rl, rh, arg1, arg2);
+    } else if (TCG_TARGET_HAS_muluh_i64) {
+        TCGv_i64 t = tcg_temp_new_i64();
+        tcg_gen_op3_i64(INDEX_op_mul_i64, t, arg1, arg2);
+        tcg_gen_op3_i64(INDEX_op_muluh_i64, rh, arg1, arg2);
+        tcg_gen_mov_i64(rl, t);
+        tcg_temp_free_i64(t);
+    } else {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        tcg_gen_mul_i64(t0, arg1, arg2);
+        gen_helper_muluh_i64(rh, arg1, arg2);
+        tcg_gen_mov_i64(rl, t0);
+        tcg_temp_free_i64(t0);
+    }
+}
+
+void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+    if (TCG_TARGET_HAS_muls2_i64) {
+        tcg_gen_op4_i64(INDEX_op_muls2_i64, rl, rh, arg1, arg2);
+    } else if (TCG_TARGET_HAS_mulsh_i64) {
+        TCGv_i64 t = tcg_temp_new_i64();
+        tcg_gen_op3_i64(INDEX_op_mul_i64, t, arg1, arg2);
+        tcg_gen_op3_i64(INDEX_op_mulsh_i64, rh, arg1, arg2);
+        tcg_gen_mov_i64(rl, t);
+        tcg_temp_free_i64(t);
+    } else if (TCG_TARGET_HAS_mulu2_i64 || TCG_TARGET_HAS_muluh_i64) {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        TCGv_i64 t1 = tcg_temp_new_i64();
+        TCGv_i64 t2 = tcg_temp_new_i64();
+        TCGv_i64 t3 = tcg_temp_new_i64();
+        tcg_gen_mulu2_i64(t0, t1, arg1, arg2);
+        /* Adjust for negative inputs.  */
+        tcg_gen_sari_i64(t2, arg1, 63);
+        tcg_gen_sari_i64(t3, arg2, 63);
+        tcg_gen_and_i64(t2, t2, arg2);
+        tcg_gen_and_i64(t3, t3, arg1);
+        tcg_gen_sub_i64(rh, t1, t2);
+        tcg_gen_sub_i64(rh, rh, t3);
+        tcg_gen_mov_i64(rl, t0);
+        tcg_temp_free_i64(t0);
+        tcg_temp_free_i64(t1);
+        tcg_temp_free_i64(t2);
+        tcg_temp_free_i64(t3);
+    } else {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        tcg_gen_mul_i64(t0, arg1, arg2);
+        gen_helper_mulsh_i64(rh, arg1, arg2);
+        tcg_gen_mov_i64(rl, t0);
+        tcg_temp_free_i64(t0);
+    }
+}
+
+/* Size changing operations.  */
+
+void tcg_gen_trunc_shr_i64_i32(TCGv_i32 ret, TCGv_i64 arg, unsigned count)
+{
+    tcg_debug_assert(count < 64);
+    if (TCG_TARGET_REG_BITS == 32) {
+        if (count >= 32) {
+            tcg_gen_shri_i32(ret, TCGV_HIGH(arg), count - 32);
+        } else if (count == 0) {
+            tcg_gen_mov_i32(ret, TCGV_LOW(arg));
+        } else {
+            TCGv_i64 t = tcg_temp_new_i64();
+            tcg_gen_shri_i64(t, arg, count);
+            tcg_gen_mov_i32(ret, TCGV_LOW(t));
+            tcg_temp_free_i64(t);
+        }
+    } else if (TCG_TARGET_HAS_trunc_shr_i32) {
+        tcg_gen_op3i_i32(INDEX_op_trunc_shr_i32, ret,
+                         MAKE_TCGV_I32(GET_TCGV_I64(arg)), count);
+    } else if (count == 0) {
+        tcg_gen_mov_i32(ret, MAKE_TCGV_I32(GET_TCGV_I64(arg)));
+    } else {
+        TCGv_i64 t = tcg_temp_new_i64();
+        tcg_gen_shri_i64(t, arg, count);
+        tcg_gen_mov_i32(ret, MAKE_TCGV_I32(GET_TCGV_I64(t)));
+        tcg_temp_free_i64(t);
+    }
+}
+
+void tcg_gen_extu_i32_i64(TCGv_i64 ret, TCGv_i32 arg)
+{
+    if (TCG_TARGET_REG_BITS == 32) {
+        tcg_gen_mov_i32(TCGV_LOW(ret), arg);
+        tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+    } else {
+        /* Note: we assume the target supports move between
+           32 and 64 bit registers.  */
+        tcg_gen_ext32u_i64(ret, MAKE_TCGV_I64(GET_TCGV_I32(arg)));
+    }
+}
+
+void tcg_gen_ext_i32_i64(TCGv_i64 ret, TCGv_i32 arg)
+{
+    if (TCG_TARGET_REG_BITS == 32) {
+        tcg_gen_mov_i32(TCGV_LOW(ret), arg);
+        tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
+    } else {
+        /* Note: we assume the target supports move between
+           32 and 64 bit registers.  */
+        tcg_gen_ext32s_i64(ret, MAKE_TCGV_I64(GET_TCGV_I32(arg)));
+    }
+}
+
+void tcg_gen_concat_i32_i64(TCGv_i64 dest, TCGv_i32 low, TCGv_i32 high)
+{
+    TCGv_i64 tmp;
+
+    if (TCG_TARGET_REG_BITS == 32) {
+        tcg_gen_mov_i32(TCGV_LOW(dest), low);
+        tcg_gen_mov_i32(TCGV_HIGH(dest), high);
+        return;
+    }
+
+    tmp = tcg_temp_new_i64();
+    /* These extensions are only needed for type correctness.
+       We may be able to do better given target specific information.  */
+    tcg_gen_extu_i32_i64(tmp, high);
+    tcg_gen_extu_i32_i64(dest, low);
+    /* If deposit is available, use it.  Otherwise use the extra
+       knowledge that we have of the zero-extensions above.  */
+    if (TCG_TARGET_HAS_deposit_i64 && TCG_TARGET_deposit_i64_valid(32, 32)) {
+        tcg_gen_deposit_i64(dest, dest, tmp, 32, 32);
+    } else {
+        tcg_gen_shli_i64(tmp, tmp, 32);
+        tcg_gen_or_i64(dest, dest, tmp);
+    }
+    tcg_temp_free_i64(tmp);
+}
+
+void tcg_gen_extr_i64_i32(TCGv_i32 lo, TCGv_i32 hi, TCGv_i64 arg)
+{
+    if (TCG_TARGET_REG_BITS == 32) {
+        tcg_gen_mov_i32(lo, TCGV_LOW(arg));
+        tcg_gen_mov_i32(hi, TCGV_HIGH(arg));
+    } else {
+        tcg_gen_trunc_shr_i64_i32(lo, arg, 0);
+        tcg_gen_trunc_shr_i64_i32(hi, arg, 32);
+    }
+}
+
+void tcg_gen_extr32_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i64 arg)
+{
+    tcg_gen_ext32u_i64(lo, arg);
+    tcg_gen_shri_i64(hi, arg, 32);
+}
+
+/* QEMU specific operations.  */
+
+void tcg_gen_goto_tb(unsigned idx)
+{
+    /* We only support two chained exits.  */
+    tcg_debug_assert(idx <= 1);
+#ifdef CONFIG_DEBUG_TCG
+    /* Verify that we havn't seen this numbered exit before.  */
+    tcg_debug_assert((tcg_ctx.goto_tb_issue_mask & (1 << idx)) == 0);
+    tcg_ctx.goto_tb_issue_mask |= 1 << idx;
+#endif
+    tcg_gen_op1i(INDEX_op_goto_tb, idx);
+}
+
+static inline TCGMemOp tcg_canonicalize_memop(TCGMemOp op, bool is64, bool st)
+{
+    switch (op & MO_SIZE) {
+    case MO_8:
+        op &= ~MO_BSWAP;
+        break;
+    case MO_16:
+        break;
+    case MO_32:
+        if (!is64) {
+            op &= ~MO_SIGN;
+        }
+        break;
+    case MO_64:
+        if (!is64) {
+            tcg_abort();
+        }
+        break;
+    }
+    if (st) {
+        op &= ~MO_SIGN;
+    }
+    return op;
+}
+
+static void gen_ldst_i32(TCGOpcode opc, TCGv_i32 val, TCGv addr,
+                         TCGMemOp memop, TCGArg idx)
+{
+#if TARGET_LONG_BITS == 32
+    tcg_gen_op4ii_i32(opc, val, addr, memop, idx);
+#else
+    if (TCG_TARGET_REG_BITS == 32) {
+        tcg_gen_op5ii_i32(opc, val, TCGV_LOW(addr), TCGV_HIGH(addr),
+                          memop, idx);
+    } else {
+        tcg_gen_op4(&tcg_ctx, opc, GET_TCGV_I32(val), GET_TCGV_I64(addr),
+                    memop, idx);
+    }
+#endif
+}
+
+static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 val, TCGv addr,
+                         TCGMemOp memop, TCGArg idx)
+{
+#if TARGET_LONG_BITS == 32
+    if (TCG_TARGET_REG_BITS == 32) {
+        tcg_gen_op5ii_i32(opc, TCGV_LOW(val), TCGV_HIGH(val),
+                          addr, memop, idx);
+    } else {
+        tcg_gen_op4(&tcg_ctx, opc, GET_TCGV_I64(val), GET_TCGV_I32(addr),
+                    memop, idx);
+    }
+#else
+    if (TCG_TARGET_REG_BITS == 32) {
+        tcg_gen_op6ii_i32(opc, TCGV_LOW(val), TCGV_HIGH(val),
+                          TCGV_LOW(addr), TCGV_HIGH(addr), memop, idx);
+    } else {
+        tcg_gen_op4ii_i64(opc, val, addr, memop, idx);
+    }
+#endif
+}
+
+void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop)
+{
+    memop = tcg_canonicalize_memop(memop, 0, 0);
+    gen_ldst_i32(INDEX_op_qemu_ld_i32, val, addr, memop, idx);
+}
+
+void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop)
+{
+    memop = tcg_canonicalize_memop(memop, 0, 1);
+    gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx);
+}
+
+void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop)
+{
+    if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
+        tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop);
+        if (memop & MO_SIGN) {
+            tcg_gen_sari_i32(TCGV_HIGH(val), TCGV_LOW(val), 31);
+        } else {
+            tcg_gen_movi_i32(TCGV_HIGH(val), 0);
+        }
+        return;
+    }
+
+    memop = tcg_canonicalize_memop(memop, 1, 0);
+    gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, memop, idx);
+}
+
+void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop)
+{
+    if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
+        tcg_gen_qemu_st_i32(TCGV_LOW(val), addr, idx, memop);
+        return;
+    }
+
+    memop = tcg_canonicalize_memop(memop, 1, 1);
+    gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, memop, idx);
+}

Tiedoston diff-näkymää rajattu, sillä se on liian suuri
+ 359 - 2096
tcg/tcg-op.h


+ 0 - 9
tcg/tcg-opc.h

@@ -27,15 +27,6 @@
  */
 
 /* predefined ops */
-DEF(end, 0, 0, 0, TCG_OPF_NOT_PRESENT) /* must be kept first */
-DEF(nop, 0, 0, 0, TCG_OPF_NOT_PRESENT)
-DEF(nop1, 0, 0, 1, TCG_OPF_NOT_PRESENT)
-DEF(nop2, 0, 0, 2, TCG_OPF_NOT_PRESENT)
-DEF(nop3, 0, 0, 3, TCG_OPF_NOT_PRESENT)
-
-/* variable number of parameters */
-DEF(nopn, 0, 0, 1, TCG_OPF_NOT_PRESENT)
-
 DEF(discard, 1, 0, 0, TCG_OPF_NOT_PRESENT)
 DEF(set_label, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_NOT_PRESENT)
 

+ 175 - 357
tcg/tcg.c

@@ -407,7 +407,6 @@ void tcg_func_start(TCGContext *s)
     /* No temps have been previously allocated for size or locality.  */
     memset(s->free_temps, 0, sizeof(s->free_temps));
 
-    s->labels = tcg_malloc(sizeof(TCGLabel) * TCG_MAX_LABELS);
     s->nb_labels = 0;
     s->current_frame_offset = s->frame_start;
 
@@ -415,8 +414,10 @@ void tcg_func_start(TCGContext *s)
     s->goto_tb_issue_mask = 0;
 #endif
 
-    s->gen_opc_ptr = s->gen_opc_buf;
-    s->gen_opparam_ptr = s->gen_opparam_buf;
+    s->gen_first_op_idx = 0;
+    s->gen_last_op_idx = -1;
+    s->gen_next_op_idx = 0;
+    s->gen_next_parm_idx = 0;
 
     s->be = tcg_malloc(sizeof(TCGBackendData));
 }
@@ -703,9 +704,8 @@ int tcg_check_temp_count(void)
 void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret,
                    int nargs, TCGArg *args)
 {
-    int i, real_args, nb_rets;
+    int i, real_args, nb_rets, pi, pi_first;
     unsigned sizemask, flags;
-    TCGArg *nparam;
     TCGHelperInfo *info;
 
     info = g_hash_table_lookup(s->helpers, (gpointer)func);
@@ -758,8 +758,7 @@ void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret,
     }
 #endif /* TCG_TARGET_EXTEND_ARGS */
 
-    *s->gen_opc_ptr++ = INDEX_op_call;
-    nparam = s->gen_opparam_ptr++;
+    pi_first = pi = s->gen_next_parm_idx;
     if (ret != TCG_CALL_DUMMY_ARG) {
 #if defined(__sparc__) && !defined(__arch64__) \
     && !defined(CONFIG_TCG_INTERPRETER)
@@ -769,25 +768,25 @@ void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret,
                two return temporaries, and reassemble below.  */
             retl = tcg_temp_new_i64();
             reth = tcg_temp_new_i64();
-            *s->gen_opparam_ptr++ = GET_TCGV_I64(reth);
-            *s->gen_opparam_ptr++ = GET_TCGV_I64(retl);
+            s->gen_opparam_buf[pi++] = GET_TCGV_I64(reth);
+            s->gen_opparam_buf[pi++] = GET_TCGV_I64(retl);
             nb_rets = 2;
         } else {
-            *s->gen_opparam_ptr++ = ret;
+            s->gen_opparam_buf[pi++] = ret;
             nb_rets = 1;
         }
 #else
         if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
 #ifdef HOST_WORDS_BIGENDIAN
-            *s->gen_opparam_ptr++ = ret + 1;
-            *s->gen_opparam_ptr++ = ret;
+            s->gen_opparam_buf[pi++] = ret + 1;
+            s->gen_opparam_buf[pi++] = ret;
 #else
-            *s->gen_opparam_ptr++ = ret;
-            *s->gen_opparam_ptr++ = ret + 1;
+            s->gen_opparam_buf[pi++] = ret;
+            s->gen_opparam_buf[pi++] = ret + 1;
 #endif
             nb_rets = 2;
         } else {
-            *s->gen_opparam_ptr++ = ret;
+            s->gen_opparam_buf[pi++] = ret;
             nb_rets = 1;
         }
 #endif
@@ -801,7 +800,7 @@ void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret,
 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
             /* some targets want aligned 64 bit args */
             if (real_args & 1) {
-                *s->gen_opparam_ptr++ = TCG_CALL_DUMMY_ARG;
+                s->gen_opparam_buf[pi++] = TCG_CALL_DUMMY_ARG;
                 real_args++;
             }
 #endif
@@ -816,26 +815,42 @@ void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret,
 	       have to get more complicated to differentiate between
 	       stack arguments and register arguments.  */
 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
-            *s->gen_opparam_ptr++ = args[i] + 1;
-            *s->gen_opparam_ptr++ = args[i];
+            s->gen_opparam_buf[pi++] = args[i] + 1;
+            s->gen_opparam_buf[pi++] = args[i];
 #else
-            *s->gen_opparam_ptr++ = args[i];
-            *s->gen_opparam_ptr++ = args[i] + 1;
+            s->gen_opparam_buf[pi++] = args[i];
+            s->gen_opparam_buf[pi++] = args[i] + 1;
 #endif
             real_args += 2;
             continue;
         }
 
-        *s->gen_opparam_ptr++ = args[i];
+        s->gen_opparam_buf[pi++] = args[i];
         real_args++;
     }
-    *s->gen_opparam_ptr++ = (uintptr_t)func;
-    *s->gen_opparam_ptr++ = flags;
+    s->gen_opparam_buf[pi++] = (uintptr_t)func;
+    s->gen_opparam_buf[pi++] = flags;
+
+    i = s->gen_next_op_idx;
+    tcg_debug_assert(i < OPC_BUF_SIZE);
+    tcg_debug_assert(pi <= OPPARAM_BUF_SIZE);
+
+    /* Set links for sequential allocation during translation.  */
+    s->gen_op_buf[i] = (TCGOp){
+        .opc = INDEX_op_call,
+        .callo = nb_rets,
+        .calli = real_args,
+        .args = pi_first,
+        .prev = i - 1,
+        .next = i + 1
+    };
 
-    *nparam = (nb_rets << 16) | real_args;
+    /* Make sure the calli field didn't overflow.  */
+    tcg_debug_assert(s->gen_op_buf[i].calli == real_args);
 
-    /* total parameters, needed to go backward in the instruction stream */
-    *s->gen_opparam_ptr++ = 1 + nb_rets + real_args + 3;
+    s->gen_last_op_idx = i;
+    s->gen_next_op_idx = i + 1;
+    s->gen_next_parm_idx = pi;
 
 #if defined(__sparc__) && !defined(__arch64__) \
     && !defined(CONFIG_TCG_INTERPRETER)
@@ -870,143 +885,6 @@ void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret,
 #endif /* TCG_TARGET_EXTEND_ARGS */
 }
 
-#if TCG_TARGET_REG_BITS == 32
-void tcg_gen_shifti_i64(TCGv_i64 ret, TCGv_i64 arg1,
-                        int c, int right, int arith)
-{
-    if (c == 0) {
-        tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg1));
-        tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1));
-    } else if (c >= 32) {
-        c -= 32;
-        if (right) {
-            if (arith) {
-                tcg_gen_sari_i32(TCGV_LOW(ret), TCGV_HIGH(arg1), c);
-                tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), 31);
-            } else {
-                tcg_gen_shri_i32(TCGV_LOW(ret), TCGV_HIGH(arg1), c);
-                tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
-            }
-        } else {
-            tcg_gen_shli_i32(TCGV_HIGH(ret), TCGV_LOW(arg1), c);
-            tcg_gen_movi_i32(TCGV_LOW(ret), 0);
-        }
-    } else {
-        TCGv_i32 t0, t1;
-
-        t0 = tcg_temp_new_i32();
-        t1 = tcg_temp_new_i32();
-        if (right) {
-            tcg_gen_shli_i32(t0, TCGV_HIGH(arg1), 32 - c);
-            if (arith)
-                tcg_gen_sari_i32(t1, TCGV_HIGH(arg1), c);
-            else
-                tcg_gen_shri_i32(t1, TCGV_HIGH(arg1), c);
-            tcg_gen_shri_i32(TCGV_LOW(ret), TCGV_LOW(arg1), c);
-            tcg_gen_or_i32(TCGV_LOW(ret), TCGV_LOW(ret), t0);
-            tcg_gen_mov_i32(TCGV_HIGH(ret), t1);
-        } else {
-            tcg_gen_shri_i32(t0, TCGV_LOW(arg1), 32 - c);
-            /* Note: ret can be the same as arg1, so we use t1 */
-            tcg_gen_shli_i32(t1, TCGV_LOW(arg1), c);
-            tcg_gen_shli_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), c);
-            tcg_gen_or_i32(TCGV_HIGH(ret), TCGV_HIGH(ret), t0);
-            tcg_gen_mov_i32(TCGV_LOW(ret), t1);
-        }
-        tcg_temp_free_i32(t0);
-        tcg_temp_free_i32(t1);
-    }
-}
-#endif
-
-static inline TCGMemOp tcg_canonicalize_memop(TCGMemOp op, bool is64, bool st)
-{
-    switch (op & MO_SIZE) {
-    case MO_8:
-        op &= ~MO_BSWAP;
-        break;
-    case MO_16:
-        break;
-    case MO_32:
-        if (!is64) {
-            op &= ~MO_SIGN;
-        }
-        break;
-    case MO_64:
-        if (!is64) {
-            tcg_abort();
-        }
-        break;
-    }
-    if (st) {
-        op &= ~MO_SIGN;
-    }
-    return op;
-}
-
-void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop)
-{
-    memop = tcg_canonicalize_memop(memop, 0, 0);
-
-    *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_ld_i32;
-    tcg_add_param_i32(val);
-    tcg_add_param_tl(addr);
-    *tcg_ctx.gen_opparam_ptr++ = memop;
-    *tcg_ctx.gen_opparam_ptr++ = idx;
-}
-
-void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop)
-{
-    memop = tcg_canonicalize_memop(memop, 0, 1);
-
-    *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_st_i32;
-    tcg_add_param_i32(val);
-    tcg_add_param_tl(addr);
-    *tcg_ctx.gen_opparam_ptr++ = memop;
-    *tcg_ctx.gen_opparam_ptr++ = idx;
-}
-
-void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop)
-{
-    memop = tcg_canonicalize_memop(memop, 1, 0);
-
-#if TCG_TARGET_REG_BITS == 32
-    if ((memop & MO_SIZE) < MO_64) {
-        tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop);
-        if (memop & MO_SIGN) {
-            tcg_gen_sari_i32(TCGV_HIGH(val), TCGV_LOW(val), 31);
-        } else {
-            tcg_gen_movi_i32(TCGV_HIGH(val), 0);
-        }
-        return;
-    }
-#endif
-
-    *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_ld_i64;
-    tcg_add_param_i64(val);
-    tcg_add_param_tl(addr);
-    *tcg_ctx.gen_opparam_ptr++ = memop;
-    *tcg_ctx.gen_opparam_ptr++ = idx;
-}
-
-void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop)
-{
-    memop = tcg_canonicalize_memop(memop, 1, 1);
-
-#if TCG_TARGET_REG_BITS == 32
-    if ((memop & MO_SIZE) < MO_64) {
-        tcg_gen_qemu_st_i32(TCGV_LOW(val), addr, idx, memop);
-        return;
-    }
-#endif
-
-    *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_st_i64;
-    tcg_add_param_i64(val);
-    tcg_add_param_tl(addr);
-    *tcg_ctx.gen_opparam_ptr++ = memop;
-    *tcg_ctx.gen_opparam_ptr++ = idx;
-}
-
 static void tcg_reg_alloc_start(TCGContext *s)
 {
     int i;
@@ -1109,20 +987,21 @@ static const char * const ldst_name[] =
 
 void tcg_dump_ops(TCGContext *s)
 {
-    const uint16_t *opc_ptr;
-    const TCGArg *args;
-    TCGArg arg;
-    TCGOpcode c;
-    int i, k, nb_oargs, nb_iargs, nb_cargs, first_insn;
-    const TCGOpDef *def;
     char buf[128];
+    TCGOp *op;
+    int oi;
 
-    first_insn = 1;
-    opc_ptr = s->gen_opc_buf;
-    args = s->gen_opparam_buf;
-    while (opc_ptr < s->gen_opc_ptr) {
-        c = *opc_ptr++;
+    for (oi = s->gen_first_op_idx; oi >= 0; oi = op->next) {
+        int i, k, nb_oargs, nb_iargs, nb_cargs;
+        const TCGOpDef *def;
+        const TCGArg *args;
+        TCGOpcode c;
+
+        op = &s->gen_op_buf[oi];
+        c = op->opc;
         def = &tcg_op_defs[c];
+        args = &s->gen_opparam_buf[op->args];
+
         if (c == INDEX_op_debug_insn_start) {
             uint64_t pc;
 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
@@ -1130,21 +1009,14 @@ void tcg_dump_ops(TCGContext *s)
 #else
             pc = args[0];
 #endif
-            if (!first_insn) {
+            if (oi != s->gen_first_op_idx) {
                 qemu_log("\n");
             }
             qemu_log(" ---- 0x%" PRIx64, pc);
-            first_insn = 0;
-            nb_oargs = def->nb_oargs;
-            nb_iargs = def->nb_iargs;
-            nb_cargs = def->nb_cargs;
         } else if (c == INDEX_op_call) {
-            TCGArg arg;
-
             /* variable number of arguments */
-            arg = *args++;
-            nb_oargs = arg >> 16;
-            nb_iargs = arg & 0xffff;
+            nb_oargs = op->callo;
+            nb_iargs = op->calli;
             nb_cargs = def->nb_cargs;
 
             /* function name, flags, out args */
@@ -1165,26 +1037,20 @@ void tcg_dump_ops(TCGContext *s)
             }
         } else {
             qemu_log(" %s ", def->name);
-            if (c == INDEX_op_nopn) {
-                /* variable number of arguments */
-                nb_cargs = *args;
-                nb_oargs = 0;
-                nb_iargs = 0;
-            } else {
-                nb_oargs = def->nb_oargs;
-                nb_iargs = def->nb_iargs;
-                nb_cargs = def->nb_cargs;
-            }
-            
+
+            nb_oargs = def->nb_oargs;
+            nb_iargs = def->nb_iargs;
+            nb_cargs = def->nb_cargs;
+
             k = 0;
-            for(i = 0; i < nb_oargs; i++) {
+            for (i = 0; i < nb_oargs; i++) {
                 if (k != 0) {
                     qemu_log(",");
                 }
                 qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
                                                    args[k++]));
             }
-            for(i = 0; i < nb_iargs; i++) {
+            for (i = 0; i < nb_iargs; i++) {
                 if (k != 0) {
                     qemu_log(",");
                 }
@@ -1222,16 +1088,14 @@ void tcg_dump_ops(TCGContext *s)
                 i = 0;
                 break;
             }
-            for(; i < nb_cargs; i++) {
+            for (; i < nb_cargs; i++) {
                 if (k != 0) {
                     qemu_log(",");
                 }
-                arg = args[k++];
-                qemu_log("$0x%" TCG_PRIlx, arg);
+                qemu_log("$0x%" TCG_PRIlx, args[k++]);
             }
         }
         qemu_log("\n");
-        args += nb_iargs + nb_oargs + nb_cargs;
     }
 }
 
@@ -1380,21 +1244,30 @@ void tcg_add_target_add_op_defs(const TCGTargetOpDef *tdefs)
 #endif
 }
 
-#ifdef USE_LIVENESS_ANALYSIS
-
-/* set a nop for an operation using 'nb_args' */
-static inline void tcg_set_nop(TCGContext *s, uint16_t *opc_ptr, 
-                               TCGArg *args, int nb_args)
+void tcg_op_remove(TCGContext *s, TCGOp *op)
 {
-    if (nb_args == 0) {
-        *opc_ptr = INDEX_op_nop;
+    int next = op->next;
+    int prev = op->prev;
+
+    if (next >= 0) {
+        s->gen_op_buf[next].prev = prev;
+    } else {
+        s->gen_last_op_idx = prev;
+    }
+    if (prev >= 0) {
+        s->gen_op_buf[prev].next = next;
     } else {
-        *opc_ptr = INDEX_op_nopn;
-        args[0] = nb_args;
-        args[nb_args - 1] = nb_args;
+        s->gen_first_op_idx = next;
     }
+
+    memset(op, -1, sizeof(*op));
+
+#ifdef CONFIG_PROFILER
+    s->del_op_count++;
+#endif
 }
 
+#ifdef USE_LIVENESS_ANALYSIS
 /* liveness analysis: end of function: all temps are dead, and globals
    should be in memory. */
 static inline void tcg_la_func_end(TCGContext *s, uint8_t *dead_temps,
@@ -1424,19 +1297,10 @@ static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps,
    temporaries are removed. */
 static void tcg_liveness_analysis(TCGContext *s)
 {
-    int i, op_index, nb_args, nb_iargs, nb_oargs, nb_ops;
-    TCGOpcode op, op_new, op_new2;
-    TCGArg *args, arg;
-    const TCGOpDef *def;
     uint8_t *dead_temps, *mem_temps;
-    uint16_t dead_args;
-    uint8_t sync_args;
-    bool have_op_new2;
-    
-    s->gen_opc_ptr++; /* skip end */
-
-    nb_ops = s->gen_opc_ptr - s->gen_opc_buf;
+    int oi, oi_prev, nb_ops;
 
+    nb_ops = s->gen_next_op_idx;
     s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t));
     s->op_sync_args = tcg_malloc(nb_ops * sizeof(uint8_t));
     
@@ -1444,25 +1308,31 @@ static void tcg_liveness_analysis(TCGContext *s)
     mem_temps = tcg_malloc(s->nb_temps);
     tcg_la_func_end(s, dead_temps, mem_temps);
 
-    args = s->gen_opparam_ptr;
-    op_index = nb_ops - 1;
-    while (op_index >= 0) {
-        op = s->gen_opc_buf[op_index];
-        def = &tcg_op_defs[op];
-        switch(op) {
+    for (oi = s->gen_last_op_idx; oi >= 0; oi = oi_prev) {
+        int i, nb_iargs, nb_oargs;
+        TCGOpcode opc_new, opc_new2;
+        bool have_opc_new2;
+        uint16_t dead_args;
+        uint8_t sync_args;
+        TCGArg arg;
+
+        TCGOp * const op = &s->gen_op_buf[oi];
+        TCGArg * const args = &s->gen_opparam_buf[op->args];
+        TCGOpcode opc = op->opc;
+        const TCGOpDef *def = &tcg_op_defs[opc];
+
+        oi_prev = op->prev;
+
+        switch (opc) {
         case INDEX_op_call:
             {
                 int call_flags;
 
-                nb_args = args[-1];
-                args -= nb_args;
-                arg = *args++;
-                nb_iargs = arg & 0xffff;
-                nb_oargs = arg >> 16;
+                nb_oargs = op->callo;
+                nb_iargs = op->calli;
                 call_flags = args[nb_oargs + nb_iargs + 1];
 
-                /* pure functions can be removed if their result is not
-                   used */
+                /* pure functions can be removed if their result is unused */
                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
                     for (i = 0; i < nb_oargs; i++) {
                         arg = args[i];
@@ -1470,8 +1340,7 @@ static void tcg_liveness_analysis(TCGContext *s)
                             goto do_not_remove_call;
                         }
                     }
-                    tcg_set_nop(s, s->gen_opc_buf + op_index,
-                                args - 1, nb_args);
+                    goto do_remove;
                 } else {
                 do_not_remove_call:
 
@@ -1510,41 +1379,31 @@ static void tcg_liveness_analysis(TCGContext *s)
                             dead_temps[arg] = 0;
                         }
                     }
-                    s->op_dead_args[op_index] = dead_args;
-                    s->op_sync_args[op_index] = sync_args;
+                    s->op_dead_args[oi] = dead_args;
+                    s->op_sync_args[oi] = sync_args;
                 }
-                args--;
             }
             break;
         case INDEX_op_debug_insn_start:
-            args -= def->nb_args;
-            break;
-        case INDEX_op_nopn:
-            nb_args = args[-1];
-            args -= nb_args;
             break;
         case INDEX_op_discard:
-            args--;
             /* mark the temporary as dead */
             dead_temps[args[0]] = 1;
             mem_temps[args[0]] = 0;
             break;
-        case INDEX_op_end:
-            break;
 
         case INDEX_op_add2_i32:
-            op_new = INDEX_op_add_i32;
+            opc_new = INDEX_op_add_i32;
             goto do_addsub2;
         case INDEX_op_sub2_i32:
-            op_new = INDEX_op_sub_i32;
+            opc_new = INDEX_op_sub_i32;
             goto do_addsub2;
         case INDEX_op_add2_i64:
-            op_new = INDEX_op_add_i64;
+            opc_new = INDEX_op_add_i64;
             goto do_addsub2;
         case INDEX_op_sub2_i64:
-            op_new = INDEX_op_sub_i64;
+            opc_new = INDEX_op_sub_i64;
         do_addsub2:
-            args -= 6;
             nb_iargs = 4;
             nb_oargs = 2;
             /* Test if the high part of the operation is dead, but not
@@ -1555,12 +1414,11 @@ static void tcg_liveness_analysis(TCGContext *s)
                 if (dead_temps[args[0]] && !mem_temps[args[0]]) {
                     goto do_remove;
                 }
-                /* Create the single operation plus nop.  */
-                s->gen_opc_buf[op_index] = op = op_new;
+                /* Replace the opcode and adjust the args in place,
+                   leaving 3 unused args at the end.  */
+                op->opc = opc = opc_new;
                 args[1] = args[2];
                 args[2] = args[4];
-                assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
-                tcg_set_nop(s, s->gen_opc_buf + op_index + 1, args + 3, 3);
                 /* Fall through and mark the single-word operation live.  */
                 nb_iargs = 2;
                 nb_oargs = 1;
@@ -1568,27 +1426,26 @@ static void tcg_liveness_analysis(TCGContext *s)
             goto do_not_remove;
 
         case INDEX_op_mulu2_i32:
-            op_new = INDEX_op_mul_i32;
-            op_new2 = INDEX_op_muluh_i32;
-            have_op_new2 = TCG_TARGET_HAS_muluh_i32;
+            opc_new = INDEX_op_mul_i32;
+            opc_new2 = INDEX_op_muluh_i32;
+            have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
             goto do_mul2;
         case INDEX_op_muls2_i32:
-            op_new = INDEX_op_mul_i32;
-            op_new2 = INDEX_op_mulsh_i32;
-            have_op_new2 = TCG_TARGET_HAS_mulsh_i32;
+            opc_new = INDEX_op_mul_i32;
+            opc_new2 = INDEX_op_mulsh_i32;
+            have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
             goto do_mul2;
         case INDEX_op_mulu2_i64:
-            op_new = INDEX_op_mul_i64;
-            op_new2 = INDEX_op_muluh_i64;
-            have_op_new2 = TCG_TARGET_HAS_muluh_i64;
+            opc_new = INDEX_op_mul_i64;
+            opc_new2 = INDEX_op_muluh_i64;
+            have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
             goto do_mul2;
         case INDEX_op_muls2_i64:
-            op_new = INDEX_op_mul_i64;
-            op_new2 = INDEX_op_mulsh_i64;
-            have_op_new2 = TCG_TARGET_HAS_mulsh_i64;
+            opc_new = INDEX_op_mul_i64;
+            opc_new2 = INDEX_op_mulsh_i64;
+            have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
             goto do_mul2;
         do_mul2:
-            args -= 4;
             nb_iargs = 2;
             nb_oargs = 2;
             if (dead_temps[args[1]] && !mem_temps[args[1]]) {
@@ -1597,28 +1454,25 @@ static void tcg_liveness_analysis(TCGContext *s)
                     goto do_remove;
                 }
                 /* The high part of the operation is dead; generate the low. */
-                s->gen_opc_buf[op_index] = op = op_new;
+                op->opc = opc = opc_new;
                 args[1] = args[2];
                 args[2] = args[3];
-            } else if (have_op_new2 && dead_temps[args[0]]
+            } else if (have_opc_new2 && dead_temps[args[0]]
                        && !mem_temps[args[0]]) {
-                /* The low part of the operation is dead; generate the high.  */
-                s->gen_opc_buf[op_index] = op = op_new2;
+                /* The low part of the operation is dead; generate the high. */
+                op->opc = opc = opc_new2;
                 args[0] = args[1];
                 args[1] = args[2];
                 args[2] = args[3];
             } else {
                 goto do_not_remove;
             }
-            assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
-            tcg_set_nop(s, s->gen_opc_buf + op_index + 1, args + 3, 1);
             /* Mark the single-word operation live.  */
             nb_oargs = 1;
             goto do_not_remove;
 
         default:
             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
-            args -= def->nb_args;
             nb_iargs = def->nb_iargs;
             nb_oargs = def->nb_oargs;
 
@@ -1626,24 +1480,20 @@ static void tcg_liveness_analysis(TCGContext *s)
                its outputs are dead. We assume that nb_oargs == 0
                implies side effects */
             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
-                for(i = 0; i < nb_oargs; i++) {
+                for (i = 0; i < nb_oargs; i++) {
                     arg = args[i];
                     if (!dead_temps[arg] || mem_temps[arg]) {
                         goto do_not_remove;
                     }
                 }
             do_remove:
-                tcg_set_nop(s, s->gen_opc_buf + op_index, args, def->nb_args);
-#ifdef CONFIG_PROFILER
-                s->del_op_count++;
-#endif
+                tcg_op_remove(s, op);
             } else {
             do_not_remove:
-
                 /* output args are dead */
                 dead_args = 0;
                 sync_args = 0;
-                for(i = 0; i < nb_oargs; i++) {
+                for (i = 0; i < nb_oargs; i++) {
                     arg = args[i];
                     if (dead_temps[arg]) {
                         dead_args |= (1 << i);
@@ -1664,23 +1514,18 @@ static void tcg_liveness_analysis(TCGContext *s)
                 }
 
                 /* input args are live */
-                for(i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
+                for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
                     arg = args[i];
                     if (dead_temps[arg]) {
                         dead_args |= (1 << i);
                     }
                     dead_temps[arg] = 0;
                 }
-                s->op_dead_args[op_index] = dead_args;
-                s->op_sync_args[op_index] = sync_args;
+                s->op_dead_args[oi] = dead_args;
+                s->op_sync_args[oi] = sync_args;
             }
             break;
         }
-        op_index--;
-    }
-
-    if (args != s->gen_opparam_buf) {
-        tcg_abort();
     }
 }
 #else
@@ -2247,11 +2092,11 @@ static void tcg_reg_alloc_op(TCGContext *s,
 #define STACK_DIR(x) (x)
 #endif
 
-static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def,
-                              TCGOpcode opc, const TCGArg *args,
-                              uint16_t dead_args, uint8_t sync_args)
+static void tcg_reg_alloc_call(TCGContext *s, int nb_oargs, int nb_iargs,
+                               const TCGArg * const args, uint16_t dead_args,
+                               uint8_t sync_args)
 {
-    int nb_iargs, nb_oargs, flags, nb_regs, i, reg, nb_params;
+    int flags, nb_regs, i, reg;
     TCGArg arg;
     TCGTemp *ts;
     intptr_t stack_offset;
@@ -2260,22 +2105,16 @@ static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def,
     int allocate_args;
     TCGRegSet allocated_regs;
 
-    arg = *args++;
-
-    nb_oargs = arg >> 16;
-    nb_iargs = arg & 0xffff;
-    nb_params = nb_iargs;
-
     func_addr = (tcg_insn_unit *)(intptr_t)args[nb_oargs + nb_iargs];
     flags = args[nb_oargs + nb_iargs + 1];
 
     nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
-    if (nb_regs > nb_params) {
-        nb_regs = nb_params;
+    if (nb_regs > nb_iargs) {
+        nb_regs = nb_iargs;
     }
 
     /* assign stack slots first */
-    call_stack_size = (nb_params - nb_regs) * sizeof(tcg_target_long);
+    call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
     call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) & 
         ~(TCG_TARGET_STACK_ALIGN - 1);
     allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
@@ -2286,7 +2125,7 @@ static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def,
     }
 
     stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
-    for(i = nb_regs; i < nb_params; i++) {
+    for(i = nb_regs; i < nb_iargs; i++) {
         arg = args[nb_oargs + i];
 #ifdef TCG_TARGET_STACK_GROWSUP
         stack_offset -= sizeof(tcg_target_long);
@@ -2393,8 +2232,6 @@ static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def,
             }
         }
     }
-    
-    return nb_iargs + nb_oargs + def->nb_cargs + 1;
 }
 
 #ifdef CONFIG_PROFILER
@@ -2405,7 +2242,7 @@ void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf)
 {
     int i;
 
-    for(i = INDEX_op_end; i < NB_OPS; i++) {
+    for (i = 0; i < NB_OPS; i++) {
         cpu_fprintf(f, "%s %" PRId64 "\n", tcg_op_defs[i].name,
                     tcg_table_op_count[i]);
     }
@@ -2422,10 +2259,7 @@ static inline int tcg_gen_code_common(TCGContext *s,
                                       tcg_insn_unit *gen_code_buf,
                                       long search_pc)
 {
-    TCGOpcode opc;
-    int op_index;
-    const TCGOpDef *def;
-    const TCGArg *args;
+    int oi, oi_next;
 
 #ifdef DEBUG_DISAS
     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP))) {
@@ -2440,8 +2274,7 @@ static inline int tcg_gen_code_common(TCGContext *s,
 #endif
 
 #ifdef USE_TCG_OPTIMIZATIONS
-    s->gen_opparam_ptr =
-        tcg_optimize(s, s->gen_opc_ptr, s->gen_opparam_buf, tcg_op_defs);
+    tcg_optimize(s);
 #endif
 
 #ifdef CONFIG_PROFILER
@@ -2470,42 +2303,30 @@ static inline int tcg_gen_code_common(TCGContext *s,
 
     tcg_out_tb_init(s);
 
-    args = s->gen_opparam_buf;
-    op_index = 0;
+    for (oi = s->gen_first_op_idx; oi >= 0; oi = oi_next) {
+        TCGOp * const op = &s->gen_op_buf[oi];
+        TCGArg * const args = &s->gen_opparam_buf[op->args];
+        TCGOpcode opc = op->opc;
+        const TCGOpDef *def = &tcg_op_defs[opc];
+        uint16_t dead_args = s->op_dead_args[oi];
+        uint8_t sync_args = s->op_sync_args[oi];
 
-    for(;;) {
-        opc = s->gen_opc_buf[op_index];
+        oi_next = op->next;
 #ifdef CONFIG_PROFILER
         tcg_table_op_count[opc]++;
 #endif
-        def = &tcg_op_defs[opc];
-#if 0
-        printf("%s: %d %d %d\n", def->name,
-               def->nb_oargs, def->nb_iargs, def->nb_cargs);
-        //        dump_regs(s);
-#endif
-        switch(opc) {
+
+        switch (opc) {
         case INDEX_op_mov_i32:
         case INDEX_op_mov_i64:
-            tcg_reg_alloc_mov(s, def, args, s->op_dead_args[op_index],
-                              s->op_sync_args[op_index]);
+            tcg_reg_alloc_mov(s, def, args, dead_args, sync_args);
             break;
         case INDEX_op_movi_i32:
         case INDEX_op_movi_i64:
-            tcg_reg_alloc_movi(s, args, s->op_dead_args[op_index],
-                               s->op_sync_args[op_index]);
+            tcg_reg_alloc_movi(s, args, dead_args, sync_args);
             break;
         case INDEX_op_debug_insn_start:
-            /* debug instruction */
             break;
-        case INDEX_op_nop:
-        case INDEX_op_nop1:
-        case INDEX_op_nop2:
-        case INDEX_op_nop3:
-            break;
-        case INDEX_op_nopn:
-            args += args[0];
-            goto next;
         case INDEX_op_discard:
             temp_dead(s, args[0]);
             break;
@@ -2514,12 +2335,9 @@ static inline int tcg_gen_code_common(TCGContext *s,
             tcg_out_label(s, args[0], s->code_ptr);
             break;
         case INDEX_op_call:
-            args += tcg_reg_alloc_call(s, def, opc, args,
-                                       s->op_dead_args[op_index],
-                                       s->op_sync_args[op_index]);
-            goto next;
-        case INDEX_op_end:
-            goto the_end;
+            tcg_reg_alloc_call(s, op->callo, op->calli, args,
+                               dead_args, sync_args);
+            break;
         default:
             /* Sanity check that we've not introduced any unhandled opcodes. */
             if (def->flags & TCG_OPF_NOT_PRESENT) {
@@ -2528,21 +2346,17 @@ static inline int tcg_gen_code_common(TCGContext *s,
             /* Note: in order to speed up the code, it would be much
                faster to have specialized register allocator functions for
                some common argument patterns */
-            tcg_reg_alloc_op(s, def, opc, args, s->op_dead_args[op_index],
-                             s->op_sync_args[op_index]);
+            tcg_reg_alloc_op(s, def, opc, args, dead_args, sync_args);
             break;
         }
-        args += def->nb_args;
-    next:
         if (search_pc >= 0 && search_pc < tcg_current_code_size(s)) {
-            return op_index;
+            return oi;
         }
-        op_index++;
 #ifndef NDEBUG
         check_regs(s);
 #endif
     }
- the_end:
+
     /* Generate TB finalization at the end of block */
     tcg_out_tb_finalize(s);
     return -1;
@@ -2553,14 +2367,18 @@ int tcg_gen_code(TCGContext *s, tcg_insn_unit *gen_code_buf)
 #ifdef CONFIG_PROFILER
     {
         int n;
-        n = (s->gen_opc_ptr - s->gen_opc_buf);
+
+        n = s->gen_last_op_idx + 1;
         s->op_count += n;
-        if (n > s->op_count_max)
+        if (n > s->op_count_max) {
             s->op_count_max = n;
+        }
 
-        s->temp_count += s->nb_temps;
-        if (s->nb_temps > s->temp_count_max)
-            s->temp_count_max = s->nb_temps;
+        n = s->nb_temps;
+        s->temp_count += n;
+        if (n > s->temp_count_max) {
+            s->temp_count_max = n;
+        }
     }
 #endif
 

+ 53 - 19
tcg/tcg.h

@@ -448,10 +448,28 @@ typedef struct TCGTempSet {
     unsigned long l[BITS_TO_LONGS(TCG_MAX_TEMPS)];
 } TCGTempSet;
 
+typedef struct TCGOp {
+    TCGOpcode opc   : 8;
+
+    /* The number of out and in parameter for a call.  */
+    unsigned callo  : 2;
+    unsigned calli  : 6;
+
+    /* Index of the arguments for this op, or -1 for zero-operand ops.  */
+    signed args     : 16;
+
+    /* Index of the prex/next op, or -1 for the end of the list.  */
+    signed prev     : 16;
+    signed next     : 16;
+} TCGOp;
+
+QEMU_BUILD_BUG_ON(NB_OPS > 0xff);
+QEMU_BUILD_BUG_ON(OPC_BUF_SIZE >= 0x7fff);
+QEMU_BUILD_BUG_ON(OPPARAM_BUF_SIZE >= 0x7fff);
+
 struct TCGContext {
     uint8_t *pool_cur, *pool_end;
     TCGPool *pool_first, *pool_current, *pool_first_large;
-    TCGLabel *labels;
     int nb_labels;
     int nb_globals;
     int nb_temps;
@@ -469,9 +487,6 @@ struct TCGContext {
                                corresponding output argument needs to be
                                sync to memory. */
     
-    /* tells in which temporary a given register is. It does not take
-       into account fixed registers */
-    int reg_to_temp[TCG_TARGET_NB_REGS];
     TCGRegSet reserved_regs;
     intptr_t current_frame_offset;
     intptr_t frame_start;
@@ -479,8 +494,6 @@ struct TCGContext {
     int frame_reg;
 
     tcg_insn_unit *code_ptr;
-    TCGTemp temps[TCG_MAX_TEMPS]; /* globals first, temps after */
-    TCGTempSet free_temps[TCG_TYPE_COUNT * 2];
 
     GHashTable *helpers;
 
@@ -508,14 +521,10 @@ struct TCGContext {
     int goto_tb_issue_mask;
 #endif
 
-    uint16_t gen_opc_buf[OPC_BUF_SIZE];
-    TCGArg gen_opparam_buf[OPPARAM_BUF_SIZE];
-
-    uint16_t *gen_opc_ptr;
-    TCGArg *gen_opparam_ptr;
-    target_ulong gen_opc_pc[OPC_BUF_SIZE];
-    uint16_t gen_opc_icount[OPC_BUF_SIZE];
-    uint8_t gen_opc_instr_start[OPC_BUF_SIZE];
+    int gen_first_op_idx;
+    int gen_last_op_idx;
+    int gen_next_op_idx;
+    int gen_next_parm_idx;
 
     /* Code generation.  Note that we specifically do not use tcg_insn_unit
        here, because there's too much arithmetic throughout that relies
@@ -533,10 +542,38 @@ struct TCGContext {
 
     /* The TCGBackendData structure is private to tcg-target.c.  */
     struct TCGBackendData *be;
+
+    TCGTempSet free_temps[TCG_TYPE_COUNT * 2];
+    TCGTemp temps[TCG_MAX_TEMPS]; /* globals first, temps after */
+
+    /* tells in which temporary a given register is. It does not take
+       into account fixed registers */
+    int reg_to_temp[TCG_TARGET_NB_REGS];
+
+    TCGOp gen_op_buf[OPC_BUF_SIZE];
+    TCGArg gen_opparam_buf[OPPARAM_BUF_SIZE];
+
+    target_ulong gen_opc_pc[OPC_BUF_SIZE];
+    uint16_t gen_opc_icount[OPC_BUF_SIZE];
+    uint8_t gen_opc_instr_start[OPC_BUF_SIZE];
+
+    TCGLabel labels[TCG_MAX_LABELS];
 };
 
 extern TCGContext tcg_ctx;
 
+/* The number of opcodes emitted so far.  */
+static inline int tcg_op_buf_count(void)
+{
+    return tcg_ctx.gen_next_op_idx;
+}
+
+/* Test for whether to terminate the TB for using too many opcodes.  */
+static inline bool tcg_op_buf_full(void)
+{
+    return tcg_op_buf_count() >= OPC_MAX_SIZE;
+}
+
 /* pool based memory allocation */
 
 void *tcg_malloc_internal(TCGContext *s, int size);
@@ -706,11 +743,8 @@ void tcg_add_target_add_op_defs(const TCGTargetOpDef *tdefs);
 void tcg_gen_callN(TCGContext *s, void *func,
                    TCGArg ret, int nargs, TCGArg *args);
 
-void tcg_gen_shifti_i64(TCGv_i64 ret, TCGv_i64 arg1,
-                        int c, int right, int arith);
-
-TCGArg *tcg_optimize(TCGContext *s, uint16_t *tcg_opc_ptr, TCGArg *args,
-                     TCGOpDef *tcg_op_def);
+void tcg_op_remove(TCGContext *s, TCGOp *op);
+void tcg_optimize(TCGContext *s);
 
 /* only used for debugging purposes */
 void tcg_dump_ops(TCGContext *s);

+ 0 - 13
tci.c

@@ -506,19 +506,6 @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr)
         tb_ptr += 2;
 
         switch (opc) {
-        case INDEX_op_end:
-        case INDEX_op_nop:
-            break;
-        case INDEX_op_nop1:
-        case INDEX_op_nop2:
-        case INDEX_op_nop3:
-        case INDEX_op_nopn:
-        case INDEX_op_discard:
-            TODO();
-            break;
-        case INDEX_op_set_label:
-            TODO();
-            break;
         case INDEX_op_call:
             t0 = tci_read_ri(&tb_ptr);
 #if TCG_TARGET_REG_BITS == 32

Kaikkia tiedostoja ei voida näyttää, sillä liian monta tiedostoa muuttui tässä diffissä