123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501 |
- /*
- * QEMU System Emulator
- *
- * Copyright (c) 2003-2008 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
- #include "qemu/osdep.h"
- #include "qemu/cutils.h"
- #include "migration/vmstate.h"
- #include "qapi/error.h"
- #include "qemu/error-report.h"
- #include "system/cpus.h"
- #include "system/qtest.h"
- #include "qemu/main-loop.h"
- #include "qemu/option.h"
- #include "qemu/seqlock.h"
- #include "system/replay.h"
- #include "system/runstate.h"
- #include "hw/core/cpu.h"
- #include "system/cpu-timers.h"
- #include "system/cpu-timers-internal.h"
- /*
- * ICOUNT: Instruction Counter
- *
- * this module is split off from cpu-timers because the icount part
- * is TCG-specific, and does not need to be built for other accels.
- */
- static bool icount_sleep = true;
- /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
- #define MAX_ICOUNT_SHIFT 10
- /* Do not count executed instructions */
- ICountMode use_icount = ICOUNT_DISABLED;
- static void icount_enable_precise(void)
- {
- /* Fixed conversion of insn to ns via "shift" option */
- use_icount = ICOUNT_PRECISE;
- }
- static void icount_enable_adaptive(void)
- {
- /* Runtime adaptive algorithm to compute shift */
- use_icount = ICOUNT_ADAPTATIVE;
- }
- /*
- * The current number of executed instructions is based on what we
- * originally budgeted minus the current state of the decrementing
- * icount counters in extra/u16.low.
- */
- static int64_t icount_get_executed(CPUState *cpu)
- {
- return (cpu->icount_budget -
- (cpu->neg.icount_decr.u16.low + cpu->icount_extra));
- }
- /*
- * Update the global shared timer_state.qemu_icount to take into
- * account executed instructions. This is done by the TCG vCPU
- * thread so the main-loop can see time has moved forward.
- */
- static void icount_update_locked(CPUState *cpu)
- {
- int64_t executed = icount_get_executed(cpu);
- cpu->icount_budget -= executed;
- qatomic_set_i64(&timers_state.qemu_icount,
- timers_state.qemu_icount + executed);
- }
- /*
- * Update the global shared timer_state.qemu_icount to take into
- * account executed instructions. This is done by the TCG vCPU
- * thread so the main-loop can see time has moved forward.
- */
- void icount_update(CPUState *cpu)
- {
- seqlock_write_lock(&timers_state.vm_clock_seqlock,
- &timers_state.vm_clock_lock);
- icount_update_locked(cpu);
- seqlock_write_unlock(&timers_state.vm_clock_seqlock,
- &timers_state.vm_clock_lock);
- }
- static int64_t icount_get_raw_locked(void)
- {
- CPUState *cpu = current_cpu;
- if (cpu && cpu->running) {
- if (!cpu->neg.can_do_io) {
- error_report("Bad icount read");
- exit(1);
- }
- /* Take into account what has run */
- icount_update_locked(cpu);
- }
- /* The read is protected by the seqlock, but needs atomic64 to avoid UB */
- return qatomic_read_i64(&timers_state.qemu_icount);
- }
- static int64_t icount_get_locked(void)
- {
- int64_t icount = icount_get_raw_locked();
- return qatomic_read_i64(&timers_state.qemu_icount_bias) +
- icount_to_ns(icount);
- }
- int64_t icount_get_raw(void)
- {
- int64_t icount;
- unsigned start;
- do {
- start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
- icount = icount_get_raw_locked();
- } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
- return icount;
- }
- /* Return the virtual CPU time, based on the instruction counter. */
- int64_t icount_get(void)
- {
- int64_t icount;
- unsigned start;
- do {
- start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
- icount = icount_get_locked();
- } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
- return icount;
- }
- int64_t icount_to_ns(int64_t icount)
- {
- return icount << qatomic_read(&timers_state.icount_time_shift);
- }
- /*
- * Correlation between real and virtual time is always going to be
- * fairly approximate, so ignore small variation.
- * When the guest is idle real and virtual time will be aligned in
- * the IO wait loop.
- */
- #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
- static void icount_adjust(void)
- {
- int64_t cur_time;
- int64_t cur_icount;
- int64_t delta;
- /* If the VM is not running, then do nothing. */
- if (!runstate_is_running()) {
- return;
- }
- seqlock_write_lock(&timers_state.vm_clock_seqlock,
- &timers_state.vm_clock_lock);
- cur_time = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
- cpu_get_clock_locked());
- cur_icount = icount_get_locked();
- delta = cur_icount - cur_time;
- /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
- if (delta > 0
- && timers_state.last_delta + ICOUNT_WOBBLE < delta * 2
- && timers_state.icount_time_shift > 0) {
- /* The guest is getting too far ahead. Slow time down. */
- qatomic_set(&timers_state.icount_time_shift,
- timers_state.icount_time_shift - 1);
- }
- if (delta < 0
- && timers_state.last_delta - ICOUNT_WOBBLE > delta * 2
- && timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) {
- /* The guest is getting too far behind. Speed time up. */
- qatomic_set(&timers_state.icount_time_shift,
- timers_state.icount_time_shift + 1);
- }
- timers_state.last_delta = delta;
- qatomic_set_i64(&timers_state.qemu_icount_bias,
- cur_icount - (timers_state.qemu_icount
- << timers_state.icount_time_shift));
- seqlock_write_unlock(&timers_state.vm_clock_seqlock,
- &timers_state.vm_clock_lock);
- }
- static void icount_adjust_rt(void *opaque)
- {
- timer_mod(timers_state.icount_rt_timer,
- qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
- icount_adjust();
- }
- static void icount_adjust_vm(void *opaque)
- {
- timer_mod(timers_state.icount_vm_timer,
- qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
- NANOSECONDS_PER_SECOND / 10);
- icount_adjust();
- }
- int64_t icount_round(int64_t count)
- {
- int shift = qatomic_read(&timers_state.icount_time_shift);
- return (count + (1 << shift) - 1) >> shift;
- }
- static void icount_warp_rt(void)
- {
- unsigned seq;
- int64_t warp_start;
- /*
- * The icount_warp_timer is rescheduled soon after vm_clock_warp_start
- * changes from -1 to another value, so the race here is okay.
- */
- do {
- seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
- warp_start = timers_state.vm_clock_warp_start;
- } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
- if (warp_start == -1) {
- return;
- }
- seqlock_write_lock(&timers_state.vm_clock_seqlock,
- &timers_state.vm_clock_lock);
- if (runstate_is_running()) {
- int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
- cpu_get_clock_locked());
- int64_t warp_delta;
- warp_delta = clock - timers_state.vm_clock_warp_start;
- if (icount_enabled() == ICOUNT_ADAPTATIVE) {
- /*
- * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too far
- * ahead of real time (it might already be ahead so careful not
- * to go backwards).
- */
- int64_t cur_icount = icount_get_locked();
- int64_t delta = clock - cur_icount;
- if (delta < 0) {
- delta = 0;
- }
- warp_delta = MIN(warp_delta, delta);
- }
- qatomic_set_i64(&timers_state.qemu_icount_bias,
- timers_state.qemu_icount_bias + warp_delta);
- }
- timers_state.vm_clock_warp_start = -1;
- seqlock_write_unlock(&timers_state.vm_clock_seqlock,
- &timers_state.vm_clock_lock);
- if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
- qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
- }
- }
- static void icount_timer_cb(void *opaque)
- {
- /*
- * No need for a checkpoint because the timer already synchronizes
- * with CHECKPOINT_CLOCK_VIRTUAL_RT.
- */
- icount_warp_rt();
- }
- void icount_start_warp_timer(void)
- {
- int64_t clock;
- int64_t deadline;
- assert(icount_enabled());
- /*
- * Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
- * do not fire, so computing the deadline does not make sense.
- */
- if (!runstate_is_running()) {
- return;
- }
- if (replay_mode != REPLAY_MODE_PLAY) {
- if (!all_cpu_threads_idle()) {
- return;
- }
- if (qtest_enabled()) {
- /* When testing, qtest commands advance icount. */
- return;
- }
- replay_checkpoint(CHECKPOINT_CLOCK_WARP_START);
- } else {
- /* warp clock deterministically in record/replay mode */
- if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
- /*
- * vCPU is sleeping and warp can't be started.
- * It is probably a race condition: notification sent
- * to vCPU was processed in advance and vCPU went to sleep.
- * Therefore we have to wake it up for doing something.
- */
- if (replay_has_event()) {
- qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
- }
- return;
- }
- }
- /* We want to use the earliest deadline from ALL vm_clocks */
- clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
- deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
- ~QEMU_TIMER_ATTR_EXTERNAL);
- if (deadline < 0) {
- if (!icount_sleep) {
- warn_report_once("icount sleep disabled and no active timers");
- }
- return;
- }
- if (deadline > 0) {
- /*
- * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
- * sleep. Otherwise, the CPU might be waiting for a future timer
- * interrupt to wake it up, but the interrupt never comes because
- * the vCPU isn't running any insns and thus doesn't advance the
- * QEMU_CLOCK_VIRTUAL.
- */
- if (!icount_sleep) {
- /*
- * We never let VCPUs sleep in no sleep icount mode.
- * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
- * to the next QEMU_CLOCK_VIRTUAL event and notify it.
- * It is useful when we want a deterministic execution time,
- * isolated from host latencies.
- */
- seqlock_write_lock(&timers_state.vm_clock_seqlock,
- &timers_state.vm_clock_lock);
- qatomic_set_i64(&timers_state.qemu_icount_bias,
- timers_state.qemu_icount_bias + deadline);
- seqlock_write_unlock(&timers_state.vm_clock_seqlock,
- &timers_state.vm_clock_lock);
- qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
- } else {
- /*
- * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
- * "real" time, (related to the time left until the next event) has
- * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
- * This avoids that the warps are visible externally; for example,
- * you will not be sending network packets continuously instead of
- * every 100ms.
- */
- seqlock_write_lock(&timers_state.vm_clock_seqlock,
- &timers_state.vm_clock_lock);
- if (timers_state.vm_clock_warp_start == -1
- || timers_state.vm_clock_warp_start > clock) {
- timers_state.vm_clock_warp_start = clock;
- }
- seqlock_write_unlock(&timers_state.vm_clock_seqlock,
- &timers_state.vm_clock_lock);
- timer_mod_anticipate(timers_state.icount_warp_timer,
- clock + deadline);
- }
- } else if (deadline == 0) {
- qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
- }
- }
- void icount_account_warp_timer(void)
- {
- if (!icount_sleep) {
- return;
- }
- /*
- * Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
- * do not fire, so computing the deadline does not make sense.
- */
- if (!runstate_is_running()) {
- return;
- }
- replay_async_events();
- /* warp clock deterministically in record/replay mode */
- if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
- return;
- }
- timer_del(timers_state.icount_warp_timer);
- icount_warp_rt();
- }
- bool icount_configure(QemuOpts *opts, Error **errp)
- {
- const char *option = qemu_opt_get(opts, "shift");
- bool sleep = qemu_opt_get_bool(opts, "sleep", true);
- bool align = qemu_opt_get_bool(opts, "align", false);
- long time_shift = -1;
- if (!option) {
- if (qemu_opt_get(opts, "align") != NULL) {
- error_setg(errp, "Please specify shift option when using align");
- return false;
- }
- return true;
- }
- if (align && !sleep) {
- error_setg(errp, "align=on and sleep=off are incompatible");
- return false;
- }
- if (strcmp(option, "auto") != 0) {
- if (qemu_strtol(option, NULL, 0, &time_shift) < 0
- || time_shift < 0 || time_shift > MAX_ICOUNT_SHIFT) {
- error_setg(errp, "icount: Invalid shift value");
- return false;
- }
- } else if (icount_align_option) {
- error_setg(errp, "shift=auto and align=on are incompatible");
- return false;
- } else if (!icount_sleep) {
- error_setg(errp, "shift=auto and sleep=off are incompatible");
- return false;
- }
- icount_sleep = sleep;
- if (icount_sleep) {
- timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
- icount_timer_cb, NULL);
- }
- icount_align_option = align;
- if (time_shift >= 0) {
- timers_state.icount_time_shift = time_shift;
- icount_enable_precise();
- return true;
- }
- icount_enable_adaptive();
- /*
- * 125MIPS seems a reasonable initial guess at the guest speed.
- * It will be corrected fairly quickly anyway.
- */
- timers_state.icount_time_shift = 3;
- /*
- * Have both realtime and virtual time triggers for speed adjustment.
- * The realtime trigger catches emulated time passing too slowly,
- * the virtual time trigger catches emulated time passing too fast.
- * Realtime triggers occur even when idle, so use them less frequently
- * than VM triggers.
- */
- timers_state.vm_clock_warp_start = -1;
- timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
- icount_adjust_rt, NULL);
- timer_mod(timers_state.icount_rt_timer,
- qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
- timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
- icount_adjust_vm, NULL);
- timer_mod(timers_state.icount_vm_timer,
- qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
- NANOSECONDS_PER_SECOND / 10);
- return true;
- }
- void icount_notify_exit(void)
- {
- assert(icount_enabled());
- if (current_cpu) {
- qemu_cpu_kick(current_cpu);
- qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
- }
- }
|