123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665 |
- /*
- * Host code generation
- *
- * Copyright (c) 2003 Fabrice Bellard
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
- #include "qemu/osdep.h"
- #include "trace.h"
- #include "disas/disas.h"
- #include "exec/exec-all.h"
- #include "tcg/tcg.h"
- #if defined(CONFIG_USER_ONLY)
- #include "qemu.h"
- #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
- #include <sys/param.h>
- #if __FreeBSD_version >= 700104
- #define HAVE_KINFO_GETVMMAP
- #define sigqueue sigqueue_freebsd /* avoid redefinition */
- #include <sys/proc.h>
- #include <machine/profile.h>
- #define _KERNEL
- #include <sys/user.h>
- #undef _KERNEL
- #undef sigqueue
- #include <libutil.h>
- #endif
- #endif
- #else
- #include "exec/ram_addr.h"
- #endif
- #include "exec/cputlb.h"
- #include "exec/translate-all.h"
- #include "exec/translator.h"
- #include "exec/tb-flush.h"
- #include "qemu/bitmap.h"
- #include "qemu/qemu-print.h"
- #include "qemu/main-loop.h"
- #include "qemu/cacheinfo.h"
- #include "qemu/timer.h"
- #include "exec/log.h"
- #include "system/cpus.h"
- #include "system/cpu-timers.h"
- #include "system/tcg.h"
- #include "qapi/error.h"
- #include "hw/core/tcg-cpu-ops.h"
- #include "tb-jmp-cache.h"
- #include "tb-hash.h"
- #include "tb-context.h"
- #include "internal-common.h"
- #include "internal-target.h"
- #include "tcg/perf.h"
- #include "tcg/insn-start-words.h"
- TBContext tb_ctx;
- /*
- * Encode VAL as a signed leb128 sequence at P.
- * Return P incremented past the encoded value.
- */
- static uint8_t *encode_sleb128(uint8_t *p, int64_t val)
- {
- int more, byte;
- do {
- byte = val & 0x7f;
- val >>= 7;
- more = !((val == 0 && (byte & 0x40) == 0)
- || (val == -1 && (byte & 0x40) != 0));
- if (more) {
- byte |= 0x80;
- }
- *p++ = byte;
- } while (more);
- return p;
- }
- /*
- * Decode a signed leb128 sequence at *PP; increment *PP past the
- * decoded value. Return the decoded value.
- */
- static int64_t decode_sleb128(const uint8_t **pp)
- {
- const uint8_t *p = *pp;
- int64_t val = 0;
- int byte, shift = 0;
- do {
- byte = *p++;
- val |= (int64_t)(byte & 0x7f) << shift;
- shift += 7;
- } while (byte & 0x80);
- if (shift < TARGET_LONG_BITS && (byte & 0x40)) {
- val |= -(int64_t)1 << shift;
- }
- *pp = p;
- return val;
- }
- /* Encode the data collected about the instructions while compiling TB.
- Place the data at BLOCK, and return the number of bytes consumed.
- The logical table consists of TARGET_INSN_START_WORDS target_ulong's,
- which come from the target's insn_start data, followed by a uintptr_t
- which comes from the host pc of the end of the code implementing the insn.
- Each line of the table is encoded as sleb128 deltas from the previous
- line. The seed for the first line is { tb->pc, 0..., tb->tc.ptr }.
- That is, the first column is seeded with the guest pc, the last column
- with the host pc, and the middle columns with zeros. */
- static int encode_search(TranslationBlock *tb, uint8_t *block)
- {
- uint8_t *highwater = tcg_ctx->code_gen_highwater;
- uint64_t *insn_data = tcg_ctx->gen_insn_data;
- uint16_t *insn_end_off = tcg_ctx->gen_insn_end_off;
- uint8_t *p = block;
- int i, j, n;
- for (i = 0, n = tb->icount; i < n; ++i) {
- uint64_t prev, curr;
- for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
- if (i == 0) {
- prev = (!(tb_cflags(tb) & CF_PCREL) && j == 0 ? tb->pc : 0);
- } else {
- prev = insn_data[(i - 1) * TARGET_INSN_START_WORDS + j];
- }
- curr = insn_data[i * TARGET_INSN_START_WORDS + j];
- p = encode_sleb128(p, curr - prev);
- }
- prev = (i == 0 ? 0 : insn_end_off[i - 1]);
- curr = insn_end_off[i];
- p = encode_sleb128(p, curr - prev);
- /* Test for (pending) buffer overflow. The assumption is that any
- one row beginning below the high water mark cannot overrun
- the buffer completely. Thus we can test for overflow after
- encoding a row without having to check during encoding. */
- if (unlikely(p > highwater)) {
- return -1;
- }
- }
- return p - block;
- }
- static int cpu_unwind_data_from_tb(TranslationBlock *tb, uintptr_t host_pc,
- uint64_t *data)
- {
- uintptr_t iter_pc = (uintptr_t)tb->tc.ptr;
- const uint8_t *p = tb->tc.ptr + tb->tc.size;
- int i, j, num_insns = tb->icount;
- host_pc -= GETPC_ADJ;
- if (host_pc < iter_pc) {
- return -1;
- }
- memset(data, 0, sizeof(uint64_t) * TARGET_INSN_START_WORDS);
- if (!(tb_cflags(tb) & CF_PCREL)) {
- data[0] = tb->pc;
- }
- /*
- * Reconstruct the stored insn data while looking for the point
- * at which the end of the insn exceeds host_pc.
- */
- for (i = 0; i < num_insns; ++i) {
- for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
- data[j] += decode_sleb128(&p);
- }
- iter_pc += decode_sleb128(&p);
- if (iter_pc > host_pc) {
- return num_insns - i;
- }
- }
- return -1;
- }
- /*
- * The cpu state corresponding to 'host_pc' is restored in
- * preparation for exiting the TB.
- */
- void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
- uintptr_t host_pc)
- {
- uint64_t data[TARGET_INSN_START_WORDS];
- int insns_left = cpu_unwind_data_from_tb(tb, host_pc, data);
- if (insns_left < 0) {
- return;
- }
- if (tb_cflags(tb) & CF_USE_ICOUNT) {
- assert(icount_enabled());
- /*
- * Reset the cycle counter to the start of the block and
- * shift if to the number of actually executed instructions.
- */
- cpu->neg.icount_decr.u16.low += insns_left;
- }
- cpu->cc->tcg_ops->restore_state_to_opc(cpu, tb, data);
- }
- bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc)
- {
- /*
- * The host_pc has to be in the rx region of the code buffer.
- * If it is not we will not be able to resolve it here.
- * The two cases where host_pc will not be correct are:
- *
- * - fault during translation (instruction fetch)
- * - fault from helper (not using GETPC() macro)
- *
- * Either way we need return early as we can't resolve it here.
- */
- if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
- TranslationBlock *tb = tcg_tb_lookup(host_pc);
- if (tb) {
- cpu_restore_state_from_tb(cpu, tb, host_pc);
- return true;
- }
- }
- return false;
- }
- bool cpu_unwind_state_data(CPUState *cpu, uintptr_t host_pc, uint64_t *data)
- {
- if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
- TranslationBlock *tb = tcg_tb_lookup(host_pc);
- if (tb) {
- return cpu_unwind_data_from_tb(tb, host_pc, data) >= 0;
- }
- }
- return false;
- }
- void page_init(void)
- {
- page_table_config_init();
- }
- /*
- * Isolate the portion of code gen which can setjmp/longjmp.
- * Return the size of the generated code, or negative on error.
- */
- static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb,
- vaddr pc, void *host_pc,
- int *max_insns, int64_t *ti)
- {
- int ret = sigsetjmp(tcg_ctx->jmp_trans, 0);
- if (unlikely(ret != 0)) {
- return ret;
- }
- tcg_func_start(tcg_ctx);
- tcg_ctx->cpu = env_cpu(env);
- gen_intermediate_code(env_cpu(env), tb, max_insns, pc, host_pc);
- assert(tb->size != 0);
- tcg_ctx->cpu = NULL;
- *max_insns = tb->icount;
- return tcg_gen_code(tcg_ctx, tb, pc);
- }
- /* Called with mmap_lock held for user mode emulation. */
- TranslationBlock *tb_gen_code(CPUState *cpu,
- vaddr pc, uint64_t cs_base,
- uint32_t flags, int cflags)
- {
- CPUArchState *env = cpu_env(cpu);
- TranslationBlock *tb, *existing_tb;
- tb_page_addr_t phys_pc, phys_p2;
- tcg_insn_unit *gen_code_buf;
- int gen_code_size, search_size, max_insns;
- int64_t ti;
- void *host_pc;
- assert_memory_lock();
- qemu_thread_jit_write();
- phys_pc = get_page_addr_code_hostp(env, pc, &host_pc);
- if (phys_pc == -1) {
- /* Generate a one-shot TB with 1 insn in it */
- cflags = (cflags & ~CF_COUNT_MASK) | 1;
- }
- max_insns = cflags & CF_COUNT_MASK;
- if (max_insns == 0) {
- max_insns = TCG_MAX_INSNS;
- }
- QEMU_BUILD_BUG_ON(CF_COUNT_MASK + 1 != TCG_MAX_INSNS);
- buffer_overflow:
- assert_no_pages_locked();
- tb = tcg_tb_alloc(tcg_ctx);
- if (unlikely(!tb)) {
- /* flush must be done */
- tb_flush(cpu);
- mmap_unlock();
- /* Make the execution loop process the flush as soon as possible. */
- cpu->exception_index = EXCP_INTERRUPT;
- cpu_loop_exit(cpu);
- }
- gen_code_buf = tcg_ctx->code_gen_ptr;
- tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf);
- if (!(cflags & CF_PCREL)) {
- tb->pc = pc;
- }
- tb->cs_base = cs_base;
- tb->flags = flags;
- tb->cflags = cflags;
- tb_set_page_addr0(tb, phys_pc);
- tb_set_page_addr1(tb, -1);
- if (phys_pc != -1) {
- tb_lock_page0(phys_pc);
- }
- tcg_ctx->gen_tb = tb;
- tcg_ctx->addr_type = TARGET_LONG_BITS == 32 ? TCG_TYPE_I32 : TCG_TYPE_I64;
- #ifdef CONFIG_SOFTMMU
- tcg_ctx->page_bits = TARGET_PAGE_BITS;
- tcg_ctx->page_mask = TARGET_PAGE_MASK;
- tcg_ctx->tlb_dyn_max_bits = CPU_TLB_DYN_MAX_BITS;
- #endif
- tcg_ctx->insn_start_words = TARGET_INSN_START_WORDS;
- #ifdef TCG_GUEST_DEFAULT_MO
- tcg_ctx->guest_mo = TCG_GUEST_DEFAULT_MO;
- #else
- tcg_ctx->guest_mo = TCG_MO_ALL;
- #endif
- restart_translate:
- trace_translate_block(tb, pc, tb->tc.ptr);
- gen_code_size = setjmp_gen_code(env, tb, pc, host_pc, &max_insns, &ti);
- if (unlikely(gen_code_size < 0)) {
- switch (gen_code_size) {
- case -1:
- /*
- * Overflow of code_gen_buffer, or the current slice of it.
- *
- * TODO: We don't need to re-do gen_intermediate_code, nor
- * should we re-do the tcg optimization currently hidden
- * inside tcg_gen_code. All that should be required is to
- * flush the TBs, allocate a new TB, re-initialize it per
- * above, and re-do the actual code generation.
- */
- qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
- "Restarting code generation for "
- "code_gen_buffer overflow\n");
- tb_unlock_pages(tb);
- tcg_ctx->gen_tb = NULL;
- goto buffer_overflow;
- case -2:
- /*
- * The code generated for the TranslationBlock is too large.
- * The maximum size allowed by the unwind info is 64k.
- * There may be stricter constraints from relocations
- * in the tcg backend.
- *
- * Try again with half as many insns as we attempted this time.
- * If a single insn overflows, there's a bug somewhere...
- */
- assert(max_insns > 1);
- max_insns /= 2;
- qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
- "Restarting code generation with "
- "smaller translation block (max %d insns)\n",
- max_insns);
- /*
- * The half-sized TB may not cross pages.
- * TODO: Fix all targets that cross pages except with
- * the first insn, at which point this can't be reached.
- */
- phys_p2 = tb_page_addr1(tb);
- if (unlikely(phys_p2 != -1)) {
- tb_unlock_page1(phys_pc, phys_p2);
- tb_set_page_addr1(tb, -1);
- }
- goto restart_translate;
- case -3:
- /*
- * We had a page lock ordering problem. In order to avoid
- * deadlock we had to drop the lock on page0, which means
- * that everything we translated so far is compromised.
- * Restart with locks held on both pages.
- */
- qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
- "Restarting code generation with re-locked pages");
- goto restart_translate;
- default:
- g_assert_not_reached();
- }
- }
- tcg_ctx->gen_tb = NULL;
- search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size);
- if (unlikely(search_size < 0)) {
- tb_unlock_pages(tb);
- goto buffer_overflow;
- }
- tb->tc.size = gen_code_size;
- /*
- * For CF_PCREL, attribute all executions of the generated code
- * to its first mapping.
- */
- perf_report_code(pc, tb, tcg_splitwx_to_rx(gen_code_buf));
- if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
- qemu_log_in_addr_range(pc)) {
- FILE *logfile = qemu_log_trylock();
- if (logfile) {
- int code_size, data_size;
- const tcg_target_ulong *rx_data_gen_ptr;
- size_t chunk_start;
- int insn = 0;
- if (tcg_ctx->data_gen_ptr) {
- rx_data_gen_ptr = tcg_splitwx_to_rx(tcg_ctx->data_gen_ptr);
- code_size = (const void *)rx_data_gen_ptr - tb->tc.ptr;
- data_size = gen_code_size - code_size;
- } else {
- rx_data_gen_ptr = 0;
- code_size = gen_code_size;
- data_size = 0;
- }
- /* Dump header and the first instruction */
- fprintf(logfile, "OUT: [size=%d]\n", gen_code_size);
- fprintf(logfile,
- " -- guest addr 0x%016" PRIx64 " + tb prologue\n",
- tcg_ctx->gen_insn_data[insn * TARGET_INSN_START_WORDS]);
- chunk_start = tcg_ctx->gen_insn_end_off[insn];
- disas(logfile, tb->tc.ptr, chunk_start);
- /*
- * Dump each instruction chunk, wrapping up empty chunks into
- * the next instruction. The whole array is offset so the
- * first entry is the beginning of the 2nd instruction.
- */
- while (insn < tb->icount) {
- size_t chunk_end = tcg_ctx->gen_insn_end_off[insn];
- if (chunk_end > chunk_start) {
- fprintf(logfile, " -- guest addr 0x%016" PRIx64 "\n",
- tcg_ctx->gen_insn_data[insn * TARGET_INSN_START_WORDS]);
- disas(logfile, tb->tc.ptr + chunk_start,
- chunk_end - chunk_start);
- chunk_start = chunk_end;
- }
- insn++;
- }
- if (chunk_start < code_size) {
- fprintf(logfile, " -- tb slow paths + alignment\n");
- disas(logfile, tb->tc.ptr + chunk_start,
- code_size - chunk_start);
- }
- /* Finally dump any data we may have after the block */
- if (data_size) {
- int i;
- fprintf(logfile, " data: [size=%d]\n", data_size);
- for (i = 0; i < data_size / sizeof(tcg_target_ulong); i++) {
- if (sizeof(tcg_target_ulong) == 8) {
- fprintf(logfile,
- "0x%08" PRIxPTR ": .quad 0x%016" TCG_PRIlx "\n",
- (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
- } else if (sizeof(tcg_target_ulong) == 4) {
- fprintf(logfile,
- "0x%08" PRIxPTR ": .long 0x%08" TCG_PRIlx "\n",
- (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
- } else {
- qemu_build_not_reached();
- }
- }
- }
- fprintf(logfile, "\n");
- qemu_log_unlock(logfile);
- }
- }
- qatomic_set(&tcg_ctx->code_gen_ptr, (void *)
- ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size,
- CODE_GEN_ALIGN));
- /* init jump list */
- qemu_spin_init(&tb->jmp_lock);
- tb->jmp_list_head = (uintptr_t)NULL;
- tb->jmp_list_next[0] = (uintptr_t)NULL;
- tb->jmp_list_next[1] = (uintptr_t)NULL;
- tb->jmp_dest[0] = (uintptr_t)NULL;
- tb->jmp_dest[1] = (uintptr_t)NULL;
- /* init original jump addresses which have been set during tcg_gen_code() */
- if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) {
- tb_reset_jump(tb, 0);
- }
- if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) {
- tb_reset_jump(tb, 1);
- }
- /*
- * If the TB is not associated with a physical RAM page then it must be
- * a temporary one-insn TB, and we have nothing left to do. Return early
- * before attempting to link to other TBs or add to the lookup table.
- */
- if (tb_page_addr0(tb) == -1) {
- assert_no_pages_locked();
- return tb;
- }
- /*
- * Insert TB into the corresponding region tree before publishing it
- * through QHT. Otherwise rewinding happened in the TB might fail to
- * lookup itself using host PC.
- */
- tcg_tb_insert(tb);
- /*
- * No explicit memory barrier is required -- tb_link_page() makes the
- * TB visible in a consistent state.
- */
- existing_tb = tb_link_page(tb);
- assert_no_pages_locked();
- /* if the TB already exists, discard what we just translated */
- if (unlikely(existing_tb != tb)) {
- uintptr_t orig_aligned = (uintptr_t)gen_code_buf;
- orig_aligned -= ROUND_UP(sizeof(*tb), qemu_icache_linesize);
- qatomic_set(&tcg_ctx->code_gen_ptr, (void *)orig_aligned);
- tcg_tb_remove(tb);
- return existing_tb;
- }
- return tb;
- }
- /* user-mode: call with mmap_lock held */
- void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr)
- {
- TranslationBlock *tb;
- assert_memory_lock();
- tb = tcg_tb_lookup(retaddr);
- if (tb) {
- /* We can use retranslation to find the PC. */
- cpu_restore_state_from_tb(cpu, tb, retaddr);
- tb_phys_invalidate(tb, -1);
- } else {
- /* The exception probably happened in a helper. The CPU state should
- have been saved before calling it. Fetch the PC from there. */
- CPUArchState *env = cpu_env(cpu);
- vaddr pc;
- uint64_t cs_base;
- tb_page_addr_t addr;
- uint32_t flags;
- cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
- addr = get_page_addr_code(env, pc);
- if (addr != -1) {
- tb_invalidate_phys_range(addr, addr);
- }
- }
- }
- #ifndef CONFIG_USER_ONLY
- /*
- * In deterministic execution mode, instructions doing device I/Os
- * must be at the end of the TB.
- *
- * Called by softmmu_template.h, with iothread mutex not held.
- */
- void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
- {
- TranslationBlock *tb;
- CPUClass *cc;
- uint32_t n;
- tb = tcg_tb_lookup(retaddr);
- if (!tb) {
- cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p",
- (void *)retaddr);
- }
- cpu_restore_state_from_tb(cpu, tb, retaddr);
- /*
- * Some guests must re-execute the branch when re-executing a delay
- * slot instruction. When this is the case, adjust icount and N
- * to account for the re-execution of the branch.
- */
- n = 1;
- cc = CPU_GET_CLASS(cpu);
- if (cc->tcg_ops->io_recompile_replay_branch &&
- cc->tcg_ops->io_recompile_replay_branch(cpu, tb)) {
- cpu->neg.icount_decr.u16.low++;
- n = 2;
- }
- /*
- * Exit the loop and potentially generate a new TB executing the
- * just the I/O insns. We also limit instrumentation to memory
- * operations only (which execute after completion) so we don't
- * double instrument the instruction.
- */
- cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | n;
- if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
- vaddr pc = cpu->cc->get_pc(cpu);
- if (qemu_log_in_addr_range(pc)) {
- qemu_log("cpu_io_recompile: rewound execution of TB to %016"
- VADDR_PRIx "\n", pc);
- }
- }
- cpu_loop_exit_noexc(cpu);
- }
- #endif /* CONFIG_USER_ONLY */
- /*
- * Called by generic code at e.g. cpu reset after cpu creation,
- * therefore we must be prepared to allocate the jump cache.
- */
- void tcg_flush_jmp_cache(CPUState *cpu)
- {
- CPUJumpCache *jc = cpu->tb_jmp_cache;
- /* During early initialization, the cache may not yet be allocated. */
- if (unlikely(jc == NULL)) {
- return;
- }
- for (int i = 0; i < TB_JMP_CACHE_SIZE; i++) {
- qatomic_set(&jc->array[i].tb, NULL);
- }
- }
|