clock.c 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352
  1. /*
  2. * QEMU KVM support, paravirtual clock device
  3. *
  4. * Copyright (C) 2011 Siemens AG
  5. *
  6. * Authors:
  7. * Jan Kiszka <jan.kiszka@siemens.com>
  8. *
  9. * This work is licensed under the terms of the GNU GPL version 2.
  10. * See the COPYING file in the top-level directory.
  11. *
  12. * Contributions after 2012-01-13 are licensed under the terms of the
  13. * GNU GPL, version 2 or (at your option) any later version.
  14. */
  15. #include "qemu/osdep.h"
  16. #include "qemu/host-utils.h"
  17. #include "qemu/module.h"
  18. #include "sysemu/kvm.h"
  19. #include "sysemu/runstate.h"
  20. #include "sysemu/hw_accel.h"
  21. #include "kvm/kvm_i386.h"
  22. #include "migration/vmstate.h"
  23. #include "hw/sysbus.h"
  24. #include "hw/i386/kvm/clock.h"
  25. #include "hw/qdev-properties.h"
  26. #include "qapi/error.h"
  27. #include <linux/kvm.h>
  28. #include "standard-headers/asm-x86/kvm_para.h"
  29. #include "qom/object.h"
  30. #define TYPE_KVM_CLOCK "kvmclock"
  31. OBJECT_DECLARE_SIMPLE_TYPE(KVMClockState, KVM_CLOCK)
  32. struct KVMClockState {
  33. /*< private >*/
  34. SysBusDevice busdev;
  35. /*< public >*/
  36. uint64_t clock;
  37. bool clock_valid;
  38. /* whether the 'clock' value was obtained in the 'paused' state */
  39. bool runstate_paused;
  40. /* whether machine type supports reliable KVM_GET_CLOCK */
  41. bool mach_use_reliable_get_clock;
  42. /* whether the 'clock' value was obtained in a host with
  43. * reliable KVM_GET_CLOCK */
  44. bool clock_is_reliable;
  45. };
  46. struct pvclock_vcpu_time_info {
  47. uint32_t version;
  48. uint32_t pad0;
  49. uint64_t tsc_timestamp;
  50. uint64_t system_time;
  51. uint32_t tsc_to_system_mul;
  52. int8_t tsc_shift;
  53. uint8_t flags;
  54. uint8_t pad[2];
  55. } __attribute__((__packed__)); /* 32 bytes */
  56. static uint64_t kvmclock_current_nsec(KVMClockState *s)
  57. {
  58. CPUState *cpu = first_cpu;
  59. CPUX86State *env = cpu->env_ptr;
  60. hwaddr kvmclock_struct_pa;
  61. uint64_t migration_tsc = env->tsc;
  62. struct pvclock_vcpu_time_info time;
  63. uint64_t delta;
  64. uint64_t nsec_lo;
  65. uint64_t nsec_hi;
  66. uint64_t nsec;
  67. cpu_synchronize_state(cpu);
  68. if (!(env->system_time_msr & 1ULL)) {
  69. /* KVM clock not active */
  70. return 0;
  71. }
  72. kvmclock_struct_pa = env->system_time_msr & ~1ULL;
  73. cpu_physical_memory_read(kvmclock_struct_pa, &time, sizeof(time));
  74. assert(time.tsc_timestamp <= migration_tsc);
  75. delta = migration_tsc - time.tsc_timestamp;
  76. if (time.tsc_shift < 0) {
  77. delta >>= -time.tsc_shift;
  78. } else {
  79. delta <<= time.tsc_shift;
  80. }
  81. mulu64(&nsec_lo, &nsec_hi, delta, time.tsc_to_system_mul);
  82. nsec = (nsec_lo >> 32) | (nsec_hi << 32);
  83. return nsec + time.system_time;
  84. }
  85. static void kvm_update_clock(KVMClockState *s)
  86. {
  87. struct kvm_clock_data data;
  88. int ret;
  89. ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data);
  90. if (ret < 0) {
  91. fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(-ret));
  92. abort();
  93. }
  94. s->clock = data.clock;
  95. /* If kvm_has_adjust_clock_stable() is false, KVM_GET_CLOCK returns
  96. * essentially CLOCK_MONOTONIC plus a guest-specific adjustment. This
  97. * can drift from the TSC-based value that is computed by the guest,
  98. * so we need to go through kvmclock_current_nsec(). If
  99. * kvm_has_adjust_clock_stable() is true, and the flags contain
  100. * KVM_CLOCK_TSC_STABLE, then KVM_GET_CLOCK returns a TSC-based value
  101. * and kvmclock_current_nsec() is not necessary.
  102. *
  103. * Here, however, we need not check KVM_CLOCK_TSC_STABLE. This is because:
  104. *
  105. * - if the host has disabled the kvmclock master clock, the guest already
  106. * has protection against time going backwards. This "safety net" is only
  107. * absent when kvmclock is stable;
  108. *
  109. * - therefore, we can replace a check like
  110. *
  111. * if last KVM_GET_CLOCK was not reliable then
  112. * read from memory
  113. *
  114. * with
  115. *
  116. * if last KVM_GET_CLOCK was not reliable && masterclock is enabled
  117. * read from memory
  118. *
  119. * However:
  120. *
  121. * - if kvm_has_adjust_clock_stable() returns false, the left side is
  122. * always true (KVM_GET_CLOCK is never reliable), and the right side is
  123. * unknown (because we don't have data.flags). We must assume it's true
  124. * and read from memory.
  125. *
  126. * - if kvm_has_adjust_clock_stable() returns true, the result of the &&
  127. * is always false (masterclock is enabled iff KVM_GET_CLOCK is reliable)
  128. *
  129. * So we can just use this instead:
  130. *
  131. * if !kvm_has_adjust_clock_stable() then
  132. * read from memory
  133. */
  134. s->clock_is_reliable = kvm_has_adjust_clock_stable();
  135. }
  136. static void do_kvmclock_ctrl(CPUState *cpu, run_on_cpu_data data)
  137. {
  138. int ret = kvm_vcpu_ioctl(cpu, KVM_KVMCLOCK_CTRL, 0);
  139. if (ret && ret != -EINVAL) {
  140. fprintf(stderr, "%s: %s\n", __func__, strerror(-ret));
  141. }
  142. }
  143. static void kvmclock_vm_state_change(void *opaque, bool running,
  144. RunState state)
  145. {
  146. KVMClockState *s = opaque;
  147. CPUState *cpu;
  148. int cap_clock_ctrl = kvm_check_extension(kvm_state, KVM_CAP_KVMCLOCK_CTRL);
  149. int ret;
  150. if (running) {
  151. struct kvm_clock_data data = {};
  152. /*
  153. * If the host where s->clock was read did not support reliable
  154. * KVM_GET_CLOCK, read kvmclock value from memory.
  155. */
  156. if (!s->clock_is_reliable) {
  157. uint64_t pvclock_via_mem = kvmclock_current_nsec(s);
  158. /* We can't rely on the saved clock value, just discard it */
  159. if (pvclock_via_mem) {
  160. s->clock = pvclock_via_mem;
  161. }
  162. }
  163. s->clock_valid = false;
  164. data.clock = s->clock;
  165. ret = kvm_vm_ioctl(kvm_state, KVM_SET_CLOCK, &data);
  166. if (ret < 0) {
  167. fprintf(stderr, "KVM_SET_CLOCK failed: %s\n", strerror(-ret));
  168. abort();
  169. }
  170. if (!cap_clock_ctrl) {
  171. return;
  172. }
  173. CPU_FOREACH(cpu) {
  174. run_on_cpu(cpu, do_kvmclock_ctrl, RUN_ON_CPU_NULL);
  175. }
  176. } else {
  177. if (s->clock_valid) {
  178. return;
  179. }
  180. s->runstate_paused = runstate_check(RUN_STATE_PAUSED);
  181. kvm_synchronize_all_tsc();
  182. kvm_update_clock(s);
  183. /*
  184. * If the VM is stopped, declare the clock state valid to
  185. * avoid re-reading it on next vmsave (which would return
  186. * a different value). Will be reset when the VM is continued.
  187. */
  188. s->clock_valid = true;
  189. }
  190. }
  191. static void kvmclock_realize(DeviceState *dev, Error **errp)
  192. {
  193. KVMClockState *s = KVM_CLOCK(dev);
  194. if (!kvm_enabled()) {
  195. error_setg(errp, "kvmclock device requires KVM");
  196. return;
  197. }
  198. kvm_update_clock(s);
  199. qemu_add_vm_change_state_handler(kvmclock_vm_state_change, s);
  200. }
  201. static bool kvmclock_clock_is_reliable_needed(void *opaque)
  202. {
  203. KVMClockState *s = opaque;
  204. return s->mach_use_reliable_get_clock;
  205. }
  206. static const VMStateDescription kvmclock_reliable_get_clock = {
  207. .name = "kvmclock/clock_is_reliable",
  208. .version_id = 1,
  209. .minimum_version_id = 1,
  210. .needed = kvmclock_clock_is_reliable_needed,
  211. .fields = (VMStateField[]) {
  212. VMSTATE_BOOL(clock_is_reliable, KVMClockState),
  213. VMSTATE_END_OF_LIST()
  214. }
  215. };
  216. /*
  217. * When migrating, assume the source has an unreliable
  218. * KVM_GET_CLOCK unless told otherwise.
  219. */
  220. static int kvmclock_pre_load(void *opaque)
  221. {
  222. KVMClockState *s = opaque;
  223. s->clock_is_reliable = false;
  224. return 0;
  225. }
  226. /*
  227. * When migrating a running guest, read the clock just
  228. * before migration, so that the guest clock counts
  229. * during the events between:
  230. *
  231. * * vm_stop()
  232. * *
  233. * * pre_save()
  234. *
  235. * This reduces kvmclock difference on migration from 5s
  236. * to 0.1s (when max_downtime == 5s), because sending the
  237. * final pages of memory (which happens between vm_stop()
  238. * and pre_save()) takes max_downtime.
  239. */
  240. static int kvmclock_pre_save(void *opaque)
  241. {
  242. KVMClockState *s = opaque;
  243. if (!s->runstate_paused) {
  244. kvm_update_clock(s);
  245. }
  246. return 0;
  247. }
  248. static const VMStateDescription kvmclock_vmsd = {
  249. .name = "kvmclock",
  250. .version_id = 1,
  251. .minimum_version_id = 1,
  252. .pre_load = kvmclock_pre_load,
  253. .pre_save = kvmclock_pre_save,
  254. .fields = (VMStateField[]) {
  255. VMSTATE_UINT64(clock, KVMClockState),
  256. VMSTATE_END_OF_LIST()
  257. },
  258. .subsections = (const VMStateDescription * []) {
  259. &kvmclock_reliable_get_clock,
  260. NULL
  261. }
  262. };
  263. static Property kvmclock_properties[] = {
  264. DEFINE_PROP_BOOL("x-mach-use-reliable-get-clock", KVMClockState,
  265. mach_use_reliable_get_clock, true),
  266. DEFINE_PROP_END_OF_LIST(),
  267. };
  268. static void kvmclock_class_init(ObjectClass *klass, void *data)
  269. {
  270. DeviceClass *dc = DEVICE_CLASS(klass);
  271. dc->realize = kvmclock_realize;
  272. dc->vmsd = &kvmclock_vmsd;
  273. device_class_set_props(dc, kvmclock_properties);
  274. }
  275. static const TypeInfo kvmclock_info = {
  276. .name = TYPE_KVM_CLOCK,
  277. .parent = TYPE_SYS_BUS_DEVICE,
  278. .instance_size = sizeof(KVMClockState),
  279. .class_init = kvmclock_class_init,
  280. };
  281. /* Note: Must be called after VCPU initialization. */
  282. void kvmclock_create(bool create_always)
  283. {
  284. X86CPU *cpu = X86_CPU(first_cpu);
  285. assert(kvm_enabled());
  286. if (!kvm_has_adjust_clock()) {
  287. return;
  288. }
  289. if (create_always ||
  290. cpu->env.features[FEAT_KVM] & ((1ULL << KVM_FEATURE_CLOCKSOURCE) |
  291. (1ULL << KVM_FEATURE_CLOCKSOURCE2))) {
  292. sysbus_create_simple(TYPE_KVM_CLOCK, -1, NULL);
  293. }
  294. }
  295. static void kvmclock_register_types(void)
  296. {
  297. type_register_static(&kvmclock_info);
  298. }
  299. type_init(kvmclock_register_types)