icount-common.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503
  1. /*
  2. * QEMU System Emulator
  3. *
  4. * Copyright (c) 2003-2008 Fabrice Bellard
  5. *
  6. * Permission is hereby granted, free of charge, to any person obtaining a copy
  7. * of this software and associated documentation files (the "Software"), to deal
  8. * in the Software without restriction, including without limitation the rights
  9. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10. * copies of the Software, and to permit persons to whom the Software is
  11. * furnished to do so, subject to the following conditions:
  12. *
  13. * The above copyright notice and this permission notice shall be included in
  14. * all copies or substantial portions of the Software.
  15. *
  16. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22. * THE SOFTWARE.
  23. */
  24. #include "qemu/osdep.h"
  25. #include "qemu/cutils.h"
  26. #include "migration/vmstate.h"
  27. #include "qapi/error.h"
  28. #include "qemu/error-report.h"
  29. #include "system/cpus.h"
  30. #include "system/qtest.h"
  31. #include "qemu/main-loop.h"
  32. #include "qemu/option.h"
  33. #include "qemu/seqlock.h"
  34. #include "system/replay.h"
  35. #include "system/runstate.h"
  36. #include "hw/core/cpu.h"
  37. #include "system/cpu-timers.h"
  38. #include "system/cpu-timers-internal.h"
  39. /*
  40. * ICOUNT: Instruction Counter
  41. *
  42. * this module is split off from cpu-timers because the icount part
  43. * is TCG-specific, and does not need to be built for other accels.
  44. */
  45. static bool icount_sleep = true;
  46. /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
  47. #define MAX_ICOUNT_SHIFT 10
  48. bool icount_align_option;
  49. /* Do not count executed instructions */
  50. ICountMode use_icount = ICOUNT_DISABLED;
  51. static void icount_enable_precise(void)
  52. {
  53. /* Fixed conversion of insn to ns via "shift" option */
  54. use_icount = ICOUNT_PRECISE;
  55. }
  56. static void icount_enable_adaptive(void)
  57. {
  58. /* Runtime adaptive algorithm to compute shift */
  59. use_icount = ICOUNT_ADAPTATIVE;
  60. }
  61. /*
  62. * The current number of executed instructions is based on what we
  63. * originally budgeted minus the current state of the decrementing
  64. * icount counters in extra/u16.low.
  65. */
  66. static int64_t icount_get_executed(CPUState *cpu)
  67. {
  68. return (cpu->icount_budget -
  69. (cpu->neg.icount_decr.u16.low + cpu->icount_extra));
  70. }
  71. /*
  72. * Update the global shared timer_state.qemu_icount to take into
  73. * account executed instructions. This is done by the TCG vCPU
  74. * thread so the main-loop can see time has moved forward.
  75. */
  76. static void icount_update_locked(CPUState *cpu)
  77. {
  78. int64_t executed = icount_get_executed(cpu);
  79. cpu->icount_budget -= executed;
  80. qatomic_set_i64(&timers_state.qemu_icount,
  81. timers_state.qemu_icount + executed);
  82. }
  83. /*
  84. * Update the global shared timer_state.qemu_icount to take into
  85. * account executed instructions. This is done by the TCG vCPU
  86. * thread so the main-loop can see time has moved forward.
  87. */
  88. void icount_update(CPUState *cpu)
  89. {
  90. seqlock_write_lock(&timers_state.vm_clock_seqlock,
  91. &timers_state.vm_clock_lock);
  92. icount_update_locked(cpu);
  93. seqlock_write_unlock(&timers_state.vm_clock_seqlock,
  94. &timers_state.vm_clock_lock);
  95. }
  96. static int64_t icount_get_raw_locked(void)
  97. {
  98. CPUState *cpu = current_cpu;
  99. if (cpu && cpu->running) {
  100. if (!cpu->neg.can_do_io) {
  101. error_report("Bad icount read");
  102. exit(1);
  103. }
  104. /* Take into account what has run */
  105. icount_update_locked(cpu);
  106. }
  107. /* The read is protected by the seqlock, but needs atomic64 to avoid UB */
  108. return qatomic_read_i64(&timers_state.qemu_icount);
  109. }
  110. static int64_t icount_get_locked(void)
  111. {
  112. int64_t icount = icount_get_raw_locked();
  113. return qatomic_read_i64(&timers_state.qemu_icount_bias) +
  114. icount_to_ns(icount);
  115. }
  116. int64_t icount_get_raw(void)
  117. {
  118. int64_t icount;
  119. unsigned start;
  120. do {
  121. start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
  122. icount = icount_get_raw_locked();
  123. } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
  124. return icount;
  125. }
  126. /* Return the virtual CPU time, based on the instruction counter. */
  127. int64_t icount_get(void)
  128. {
  129. int64_t icount;
  130. unsigned start;
  131. do {
  132. start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
  133. icount = icount_get_locked();
  134. } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
  135. return icount;
  136. }
  137. int64_t icount_to_ns(int64_t icount)
  138. {
  139. return icount << qatomic_read(&timers_state.icount_time_shift);
  140. }
  141. /*
  142. * Correlation between real and virtual time is always going to be
  143. * fairly approximate, so ignore small variation.
  144. * When the guest is idle real and virtual time will be aligned in
  145. * the IO wait loop.
  146. */
  147. #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
  148. static void icount_adjust(void)
  149. {
  150. int64_t cur_time;
  151. int64_t cur_icount;
  152. int64_t delta;
  153. /* If the VM is not running, then do nothing. */
  154. if (!runstate_is_running()) {
  155. return;
  156. }
  157. seqlock_write_lock(&timers_state.vm_clock_seqlock,
  158. &timers_state.vm_clock_lock);
  159. cur_time = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
  160. cpu_get_clock_locked());
  161. cur_icount = icount_get_locked();
  162. delta = cur_icount - cur_time;
  163. /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
  164. if (delta > 0
  165. && timers_state.last_delta + ICOUNT_WOBBLE < delta * 2
  166. && timers_state.icount_time_shift > 0) {
  167. /* The guest is getting too far ahead. Slow time down. */
  168. qatomic_set(&timers_state.icount_time_shift,
  169. timers_state.icount_time_shift - 1);
  170. }
  171. if (delta < 0
  172. && timers_state.last_delta - ICOUNT_WOBBLE > delta * 2
  173. && timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) {
  174. /* The guest is getting too far behind. Speed time up. */
  175. qatomic_set(&timers_state.icount_time_shift,
  176. timers_state.icount_time_shift + 1);
  177. }
  178. timers_state.last_delta = delta;
  179. qatomic_set_i64(&timers_state.qemu_icount_bias,
  180. cur_icount - (timers_state.qemu_icount
  181. << timers_state.icount_time_shift));
  182. seqlock_write_unlock(&timers_state.vm_clock_seqlock,
  183. &timers_state.vm_clock_lock);
  184. }
  185. static void icount_adjust_rt(void *opaque)
  186. {
  187. timer_mod(timers_state.icount_rt_timer,
  188. qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
  189. icount_adjust();
  190. }
  191. static void icount_adjust_vm(void *opaque)
  192. {
  193. timer_mod(timers_state.icount_vm_timer,
  194. qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
  195. NANOSECONDS_PER_SECOND / 10);
  196. icount_adjust();
  197. }
  198. int64_t icount_round(int64_t count)
  199. {
  200. int shift = qatomic_read(&timers_state.icount_time_shift);
  201. return (count + (1 << shift) - 1) >> shift;
  202. }
  203. static void icount_warp_rt(void)
  204. {
  205. unsigned seq;
  206. int64_t warp_start;
  207. /*
  208. * The icount_warp_timer is rescheduled soon after vm_clock_warp_start
  209. * changes from -1 to another value, so the race here is okay.
  210. */
  211. do {
  212. seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
  213. warp_start = timers_state.vm_clock_warp_start;
  214. } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
  215. if (warp_start == -1) {
  216. return;
  217. }
  218. seqlock_write_lock(&timers_state.vm_clock_seqlock,
  219. &timers_state.vm_clock_lock);
  220. if (runstate_is_running()) {
  221. int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
  222. cpu_get_clock_locked());
  223. int64_t warp_delta;
  224. warp_delta = clock - timers_state.vm_clock_warp_start;
  225. if (icount_enabled() == ICOUNT_ADAPTATIVE) {
  226. /*
  227. * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too far
  228. * ahead of real time (it might already be ahead so careful not
  229. * to go backwards).
  230. */
  231. int64_t cur_icount = icount_get_locked();
  232. int64_t delta = clock - cur_icount;
  233. if (delta < 0) {
  234. delta = 0;
  235. }
  236. warp_delta = MIN(warp_delta, delta);
  237. }
  238. qatomic_set_i64(&timers_state.qemu_icount_bias,
  239. timers_state.qemu_icount_bias + warp_delta);
  240. }
  241. timers_state.vm_clock_warp_start = -1;
  242. seqlock_write_unlock(&timers_state.vm_clock_seqlock,
  243. &timers_state.vm_clock_lock);
  244. if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
  245. qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
  246. }
  247. }
  248. static void icount_timer_cb(void *opaque)
  249. {
  250. /*
  251. * No need for a checkpoint because the timer already synchronizes
  252. * with CHECKPOINT_CLOCK_VIRTUAL_RT.
  253. */
  254. icount_warp_rt();
  255. }
  256. void icount_start_warp_timer(void)
  257. {
  258. int64_t clock;
  259. int64_t deadline;
  260. assert(icount_enabled());
  261. /*
  262. * Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
  263. * do not fire, so computing the deadline does not make sense.
  264. */
  265. if (!runstate_is_running()) {
  266. return;
  267. }
  268. if (replay_mode != REPLAY_MODE_PLAY) {
  269. if (!all_cpu_threads_idle()) {
  270. return;
  271. }
  272. if (qtest_enabled()) {
  273. /* When testing, qtest commands advance icount. */
  274. return;
  275. }
  276. replay_checkpoint(CHECKPOINT_CLOCK_WARP_START);
  277. } else {
  278. /* warp clock deterministically in record/replay mode */
  279. if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
  280. /*
  281. * vCPU is sleeping and warp can't be started.
  282. * It is probably a race condition: notification sent
  283. * to vCPU was processed in advance and vCPU went to sleep.
  284. * Therefore we have to wake it up for doing something.
  285. */
  286. if (replay_has_event()) {
  287. qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
  288. }
  289. return;
  290. }
  291. }
  292. /* We want to use the earliest deadline from ALL vm_clocks */
  293. clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
  294. deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
  295. ~QEMU_TIMER_ATTR_EXTERNAL);
  296. if (deadline < 0) {
  297. if (!icount_sleep) {
  298. warn_report_once("icount sleep disabled and no active timers");
  299. }
  300. return;
  301. }
  302. if (deadline > 0) {
  303. /*
  304. * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
  305. * sleep. Otherwise, the CPU might be waiting for a future timer
  306. * interrupt to wake it up, but the interrupt never comes because
  307. * the vCPU isn't running any insns and thus doesn't advance the
  308. * QEMU_CLOCK_VIRTUAL.
  309. */
  310. if (!icount_sleep) {
  311. /*
  312. * We never let VCPUs sleep in no sleep icount mode.
  313. * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
  314. * to the next QEMU_CLOCK_VIRTUAL event and notify it.
  315. * It is useful when we want a deterministic execution time,
  316. * isolated from host latencies.
  317. */
  318. seqlock_write_lock(&timers_state.vm_clock_seqlock,
  319. &timers_state.vm_clock_lock);
  320. qatomic_set_i64(&timers_state.qemu_icount_bias,
  321. timers_state.qemu_icount_bias + deadline);
  322. seqlock_write_unlock(&timers_state.vm_clock_seqlock,
  323. &timers_state.vm_clock_lock);
  324. qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
  325. } else {
  326. /*
  327. * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
  328. * "real" time, (related to the time left until the next event) has
  329. * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
  330. * This avoids that the warps are visible externally; for example,
  331. * you will not be sending network packets continuously instead of
  332. * every 100ms.
  333. */
  334. seqlock_write_lock(&timers_state.vm_clock_seqlock,
  335. &timers_state.vm_clock_lock);
  336. if (timers_state.vm_clock_warp_start == -1
  337. || timers_state.vm_clock_warp_start > clock) {
  338. timers_state.vm_clock_warp_start = clock;
  339. }
  340. seqlock_write_unlock(&timers_state.vm_clock_seqlock,
  341. &timers_state.vm_clock_lock);
  342. timer_mod_anticipate(timers_state.icount_warp_timer,
  343. clock + deadline);
  344. }
  345. } else if (deadline == 0) {
  346. qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
  347. }
  348. }
  349. void icount_account_warp_timer(void)
  350. {
  351. if (!icount_sleep) {
  352. return;
  353. }
  354. /*
  355. * Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
  356. * do not fire, so computing the deadline does not make sense.
  357. */
  358. if (!runstate_is_running()) {
  359. return;
  360. }
  361. replay_async_events();
  362. /* warp clock deterministically in record/replay mode */
  363. if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
  364. return;
  365. }
  366. timer_del(timers_state.icount_warp_timer);
  367. icount_warp_rt();
  368. }
  369. bool icount_configure(QemuOpts *opts, Error **errp)
  370. {
  371. const char *option = qemu_opt_get(opts, "shift");
  372. bool sleep = qemu_opt_get_bool(opts, "sleep", true);
  373. bool align = qemu_opt_get_bool(opts, "align", false);
  374. long time_shift = -1;
  375. if (!option) {
  376. if (qemu_opt_get(opts, "align") != NULL) {
  377. error_setg(errp, "Please specify shift option when using align");
  378. return false;
  379. }
  380. return true;
  381. }
  382. if (align && !sleep) {
  383. error_setg(errp, "align=on and sleep=off are incompatible");
  384. return false;
  385. }
  386. if (strcmp(option, "auto") != 0) {
  387. if (qemu_strtol(option, NULL, 0, &time_shift) < 0
  388. || time_shift < 0 || time_shift > MAX_ICOUNT_SHIFT) {
  389. error_setg(errp, "icount: Invalid shift value");
  390. return false;
  391. }
  392. } else if (icount_align_option) {
  393. error_setg(errp, "shift=auto and align=on are incompatible");
  394. return false;
  395. } else if (!icount_sleep) {
  396. error_setg(errp, "shift=auto and sleep=off are incompatible");
  397. return false;
  398. }
  399. icount_sleep = sleep;
  400. if (icount_sleep) {
  401. timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
  402. icount_timer_cb, NULL);
  403. }
  404. icount_align_option = align;
  405. if (time_shift >= 0) {
  406. timers_state.icount_time_shift = time_shift;
  407. icount_enable_precise();
  408. return true;
  409. }
  410. icount_enable_adaptive();
  411. /*
  412. * 125MIPS seems a reasonable initial guess at the guest speed.
  413. * It will be corrected fairly quickly anyway.
  414. */
  415. timers_state.icount_time_shift = 3;
  416. /*
  417. * Have both realtime and virtual time triggers for speed adjustment.
  418. * The realtime trigger catches emulated time passing too slowly,
  419. * the virtual time trigger catches emulated time passing too fast.
  420. * Realtime triggers occur even when idle, so use them less frequently
  421. * than VM triggers.
  422. */
  423. timers_state.vm_clock_warp_start = -1;
  424. timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
  425. icount_adjust_rt, NULL);
  426. timer_mod(timers_state.icount_rt_timer,
  427. qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
  428. timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
  429. icount_adjust_vm, NULL);
  430. timer_mod(timers_state.icount_vm_timer,
  431. qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
  432. NANOSECONDS_PER_SECOND / 10);
  433. return true;
  434. }
  435. void icount_notify_exit(void)
  436. {
  437. assert(icount_enabled());
  438. if (current_cpu) {
  439. qemu_cpu_kick(current_cpu);
  440. qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
  441. }
  442. }