virtio-balloon.c 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954
  1. /*
  2. * Virtio Balloon Device
  3. *
  4. * Copyright IBM, Corp. 2008
  5. * Copyright (C) 2011 Red Hat, Inc.
  6. * Copyright (C) 2011 Amit Shah <amit.shah@redhat.com>
  7. *
  8. * Authors:
  9. * Anthony Liguori <aliguori@us.ibm.com>
  10. *
  11. * This work is licensed under the terms of the GNU GPL, version 2. See
  12. * the COPYING file in the top-level directory.
  13. *
  14. */
  15. #include "qemu/osdep.h"
  16. #include "qemu/iov.h"
  17. #include "qemu/module.h"
  18. #include "qemu/timer.h"
  19. #include "hw/virtio/virtio.h"
  20. #include "hw/mem/pc-dimm.h"
  21. #include "hw/qdev-properties.h"
  22. #include "sysemu/balloon.h"
  23. #include "hw/virtio/virtio-balloon.h"
  24. #include "exec/address-spaces.h"
  25. #include "qapi/error.h"
  26. #include "qapi/qapi-events-misc.h"
  27. #include "qapi/visitor.h"
  28. #include "trace.h"
  29. #include "qemu/error-report.h"
  30. #include "migration/misc.h"
  31. #include "hw/virtio/virtio-bus.h"
  32. #include "hw/virtio/virtio-access.h"
  33. #define BALLOON_PAGE_SIZE (1 << VIRTIO_BALLOON_PFN_SHIFT)
  34. typedef struct PartiallyBalloonedPage {
  35. ram_addr_t base_gpa;
  36. unsigned long *bitmap;
  37. } PartiallyBalloonedPage;
  38. static void virtio_balloon_pbp_free(PartiallyBalloonedPage *pbp)
  39. {
  40. if (!pbp->bitmap) {
  41. return;
  42. }
  43. g_free(pbp->bitmap);
  44. pbp->bitmap = NULL;
  45. }
  46. static void virtio_balloon_pbp_alloc(PartiallyBalloonedPage *pbp,
  47. ram_addr_t base_gpa,
  48. long subpages)
  49. {
  50. pbp->base_gpa = base_gpa;
  51. pbp->bitmap = bitmap_new(subpages);
  52. }
  53. static bool virtio_balloon_pbp_matches(PartiallyBalloonedPage *pbp,
  54. ram_addr_t base_gpa)
  55. {
  56. return pbp->base_gpa == base_gpa;
  57. }
  58. static void balloon_inflate_page(VirtIOBalloon *balloon,
  59. MemoryRegion *mr, hwaddr mr_offset,
  60. PartiallyBalloonedPage *pbp)
  61. {
  62. void *addr = memory_region_get_ram_ptr(mr) + mr_offset;
  63. ram_addr_t rb_offset, rb_aligned_offset, base_gpa;
  64. RAMBlock *rb;
  65. size_t rb_page_size;
  66. int subpages;
  67. /* XXX is there a better way to get to the RAMBlock than via a
  68. * host address? */
  69. rb = qemu_ram_block_from_host(addr, false, &rb_offset);
  70. rb_page_size = qemu_ram_pagesize(rb);
  71. if (rb_page_size == BALLOON_PAGE_SIZE) {
  72. /* Easy case */
  73. ram_block_discard_range(rb, rb_offset, rb_page_size);
  74. /* We ignore errors from ram_block_discard_range(), because it
  75. * has already reported them, and failing to discard a balloon
  76. * page is not fatal */
  77. return;
  78. }
  79. /* Hard case
  80. *
  81. * We've put a piece of a larger host page into the balloon - we
  82. * need to keep track until we have a whole host page to
  83. * discard
  84. */
  85. warn_report_once(
  86. "Balloon used with backing page size > 4kiB, this may not be reliable");
  87. rb_aligned_offset = QEMU_ALIGN_DOWN(rb_offset, rb_page_size);
  88. subpages = rb_page_size / BALLOON_PAGE_SIZE;
  89. base_gpa = memory_region_get_ram_addr(mr) + mr_offset -
  90. (rb_offset - rb_aligned_offset);
  91. if (pbp->bitmap && !virtio_balloon_pbp_matches(pbp, base_gpa)) {
  92. /* We've partially ballooned part of a host page, but now
  93. * we're trying to balloon part of a different one. Too hard,
  94. * give up on the old partial page */
  95. virtio_balloon_pbp_free(pbp);
  96. }
  97. if (!pbp->bitmap) {
  98. virtio_balloon_pbp_alloc(pbp, base_gpa, subpages);
  99. }
  100. set_bit((rb_offset - rb_aligned_offset) / BALLOON_PAGE_SIZE,
  101. pbp->bitmap);
  102. if (bitmap_full(pbp->bitmap, subpages)) {
  103. /* We've accumulated a full host page, we can actually discard
  104. * it now */
  105. ram_block_discard_range(rb, rb_aligned_offset, rb_page_size);
  106. /* We ignore errors from ram_block_discard_range(), because it
  107. * has already reported them, and failing to discard a balloon
  108. * page is not fatal */
  109. virtio_balloon_pbp_free(pbp);
  110. }
  111. }
  112. static void balloon_deflate_page(VirtIOBalloon *balloon,
  113. MemoryRegion *mr, hwaddr mr_offset)
  114. {
  115. void *addr = memory_region_get_ram_ptr(mr) + mr_offset;
  116. ram_addr_t rb_offset;
  117. RAMBlock *rb;
  118. size_t rb_page_size;
  119. void *host_addr;
  120. int ret;
  121. /* XXX is there a better way to get to the RAMBlock than via a
  122. * host address? */
  123. rb = qemu_ram_block_from_host(addr, false, &rb_offset);
  124. rb_page_size = qemu_ram_pagesize(rb);
  125. host_addr = (void *)((uintptr_t)addr & ~(rb_page_size - 1));
  126. /* When a page is deflated, we hint the whole host page it lives
  127. * on, since we can't do anything smaller */
  128. ret = qemu_madvise(host_addr, rb_page_size, QEMU_MADV_WILLNEED);
  129. if (ret != 0) {
  130. warn_report("Couldn't MADV_WILLNEED on balloon deflate: %s",
  131. strerror(errno));
  132. /* Otherwise ignore, failing to page hint shouldn't be fatal */
  133. }
  134. }
  135. static const char *balloon_stat_names[] = {
  136. [VIRTIO_BALLOON_S_SWAP_IN] = "stat-swap-in",
  137. [VIRTIO_BALLOON_S_SWAP_OUT] = "stat-swap-out",
  138. [VIRTIO_BALLOON_S_MAJFLT] = "stat-major-faults",
  139. [VIRTIO_BALLOON_S_MINFLT] = "stat-minor-faults",
  140. [VIRTIO_BALLOON_S_MEMFREE] = "stat-free-memory",
  141. [VIRTIO_BALLOON_S_MEMTOT] = "stat-total-memory",
  142. [VIRTIO_BALLOON_S_AVAIL] = "stat-available-memory",
  143. [VIRTIO_BALLOON_S_CACHES] = "stat-disk-caches",
  144. [VIRTIO_BALLOON_S_HTLB_PGALLOC] = "stat-htlb-pgalloc",
  145. [VIRTIO_BALLOON_S_HTLB_PGFAIL] = "stat-htlb-pgfail",
  146. [VIRTIO_BALLOON_S_NR] = NULL
  147. };
  148. /*
  149. * reset_stats - Mark all items in the stats array as unset
  150. *
  151. * This function needs to be called at device initialization and before
  152. * updating to a set of newly-generated stats. This will ensure that no
  153. * stale values stick around in case the guest reports a subset of the supported
  154. * statistics.
  155. */
  156. static inline void reset_stats(VirtIOBalloon *dev)
  157. {
  158. int i;
  159. for (i = 0; i < VIRTIO_BALLOON_S_NR; dev->stats[i++] = -1);
  160. }
  161. static bool balloon_stats_supported(const VirtIOBalloon *s)
  162. {
  163. VirtIODevice *vdev = VIRTIO_DEVICE(s);
  164. return virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_STATS_VQ);
  165. }
  166. static bool balloon_stats_enabled(const VirtIOBalloon *s)
  167. {
  168. return s->stats_poll_interval > 0;
  169. }
  170. static void balloon_stats_destroy_timer(VirtIOBalloon *s)
  171. {
  172. if (balloon_stats_enabled(s)) {
  173. timer_del(s->stats_timer);
  174. timer_free(s->stats_timer);
  175. s->stats_timer = NULL;
  176. s->stats_poll_interval = 0;
  177. }
  178. }
  179. static void balloon_stats_change_timer(VirtIOBalloon *s, int64_t secs)
  180. {
  181. timer_mod(s->stats_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + secs * 1000);
  182. }
  183. static void balloon_stats_poll_cb(void *opaque)
  184. {
  185. VirtIOBalloon *s = opaque;
  186. VirtIODevice *vdev = VIRTIO_DEVICE(s);
  187. if (s->stats_vq_elem == NULL || !balloon_stats_supported(s)) {
  188. /* re-schedule */
  189. balloon_stats_change_timer(s, s->stats_poll_interval);
  190. return;
  191. }
  192. virtqueue_push(s->svq, s->stats_vq_elem, s->stats_vq_offset);
  193. virtio_notify(vdev, s->svq);
  194. g_free(s->stats_vq_elem);
  195. s->stats_vq_elem = NULL;
  196. }
  197. static void balloon_stats_get_all(Object *obj, Visitor *v, const char *name,
  198. void *opaque, Error **errp)
  199. {
  200. Error *err = NULL;
  201. VirtIOBalloon *s = opaque;
  202. int i;
  203. visit_start_struct(v, name, NULL, 0, &err);
  204. if (err) {
  205. goto out;
  206. }
  207. visit_type_int(v, "last-update", &s->stats_last_update, &err);
  208. if (err) {
  209. goto out_end;
  210. }
  211. visit_start_struct(v, "stats", NULL, 0, &err);
  212. if (err) {
  213. goto out_end;
  214. }
  215. for (i = 0; i < VIRTIO_BALLOON_S_NR; i++) {
  216. visit_type_uint64(v, balloon_stat_names[i], &s->stats[i], &err);
  217. if (err) {
  218. goto out_nested;
  219. }
  220. }
  221. visit_check_struct(v, &err);
  222. out_nested:
  223. visit_end_struct(v, NULL);
  224. if (!err) {
  225. visit_check_struct(v, &err);
  226. }
  227. out_end:
  228. visit_end_struct(v, NULL);
  229. out:
  230. error_propagate(errp, err);
  231. }
  232. static void balloon_stats_get_poll_interval(Object *obj, Visitor *v,
  233. const char *name, void *opaque,
  234. Error **errp)
  235. {
  236. VirtIOBalloon *s = opaque;
  237. visit_type_int(v, name, &s->stats_poll_interval, errp);
  238. }
  239. static void balloon_stats_set_poll_interval(Object *obj, Visitor *v,
  240. const char *name, void *opaque,
  241. Error **errp)
  242. {
  243. VirtIOBalloon *s = opaque;
  244. Error *local_err = NULL;
  245. int64_t value;
  246. visit_type_int(v, name, &value, &local_err);
  247. if (local_err) {
  248. error_propagate(errp, local_err);
  249. return;
  250. }
  251. if (value < 0) {
  252. error_setg(errp, "timer value must be greater than zero");
  253. return;
  254. }
  255. if (value > UINT32_MAX) {
  256. error_setg(errp, "timer value is too big");
  257. return;
  258. }
  259. if (value == s->stats_poll_interval) {
  260. return;
  261. }
  262. if (value == 0) {
  263. /* timer=0 disables the timer */
  264. balloon_stats_destroy_timer(s);
  265. return;
  266. }
  267. if (balloon_stats_enabled(s)) {
  268. /* timer interval change */
  269. s->stats_poll_interval = value;
  270. balloon_stats_change_timer(s, value);
  271. return;
  272. }
  273. /* create a new timer */
  274. g_assert(s->stats_timer == NULL);
  275. s->stats_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, balloon_stats_poll_cb, s);
  276. s->stats_poll_interval = value;
  277. balloon_stats_change_timer(s, 0);
  278. }
  279. static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq)
  280. {
  281. VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
  282. VirtQueueElement *elem;
  283. MemoryRegionSection section;
  284. for (;;) {
  285. PartiallyBalloonedPage pbp = {};
  286. size_t offset = 0;
  287. uint32_t pfn;
  288. elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
  289. if (!elem) {
  290. break;
  291. }
  292. while (iov_to_buf(elem->out_sg, elem->out_num, offset, &pfn, 4) == 4) {
  293. unsigned int p = virtio_ldl_p(vdev, &pfn);
  294. hwaddr pa;
  295. pa = (hwaddr) p << VIRTIO_BALLOON_PFN_SHIFT;
  296. offset += 4;
  297. section = memory_region_find(get_system_memory(), pa,
  298. BALLOON_PAGE_SIZE);
  299. if (!section.mr) {
  300. trace_virtio_balloon_bad_addr(pa);
  301. continue;
  302. }
  303. if (!memory_region_is_ram(section.mr) ||
  304. memory_region_is_rom(section.mr) ||
  305. memory_region_is_romd(section.mr)) {
  306. trace_virtio_balloon_bad_addr(pa);
  307. memory_region_unref(section.mr);
  308. continue;
  309. }
  310. trace_virtio_balloon_handle_output(memory_region_name(section.mr),
  311. pa);
  312. if (!qemu_balloon_is_inhibited()) {
  313. if (vq == s->ivq) {
  314. balloon_inflate_page(s, section.mr,
  315. section.offset_within_region, &pbp);
  316. } else if (vq == s->dvq) {
  317. balloon_deflate_page(s, section.mr, section.offset_within_region);
  318. } else {
  319. g_assert_not_reached();
  320. }
  321. }
  322. memory_region_unref(section.mr);
  323. }
  324. virtqueue_push(vq, elem, offset);
  325. virtio_notify(vdev, vq);
  326. g_free(elem);
  327. virtio_balloon_pbp_free(&pbp);
  328. }
  329. }
  330. static void virtio_balloon_receive_stats(VirtIODevice *vdev, VirtQueue *vq)
  331. {
  332. VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
  333. VirtQueueElement *elem;
  334. VirtIOBalloonStat stat;
  335. size_t offset = 0;
  336. qemu_timeval tv;
  337. elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
  338. if (!elem) {
  339. goto out;
  340. }
  341. if (s->stats_vq_elem != NULL) {
  342. /* This should never happen if the driver follows the spec. */
  343. virtqueue_push(vq, s->stats_vq_elem, 0);
  344. virtio_notify(vdev, vq);
  345. g_free(s->stats_vq_elem);
  346. }
  347. s->stats_vq_elem = elem;
  348. /* Initialize the stats to get rid of any stale values. This is only
  349. * needed to handle the case where a guest supports fewer stats than it
  350. * used to (ie. it has booted into an old kernel).
  351. */
  352. reset_stats(s);
  353. while (iov_to_buf(elem->out_sg, elem->out_num, offset, &stat, sizeof(stat))
  354. == sizeof(stat)) {
  355. uint16_t tag = virtio_tswap16(vdev, stat.tag);
  356. uint64_t val = virtio_tswap64(vdev, stat.val);
  357. offset += sizeof(stat);
  358. if (tag < VIRTIO_BALLOON_S_NR)
  359. s->stats[tag] = val;
  360. }
  361. s->stats_vq_offset = offset;
  362. if (qemu_gettimeofday(&tv) < 0) {
  363. warn_report("%s: failed to get time of day", __func__);
  364. goto out;
  365. }
  366. s->stats_last_update = tv.tv_sec;
  367. out:
  368. if (balloon_stats_enabled(s)) {
  369. balloon_stats_change_timer(s, s->stats_poll_interval);
  370. }
  371. }
  372. static void virtio_balloon_handle_free_page_vq(VirtIODevice *vdev,
  373. VirtQueue *vq)
  374. {
  375. VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
  376. qemu_bh_schedule(s->free_page_bh);
  377. }
  378. static bool get_free_page_hints(VirtIOBalloon *dev)
  379. {
  380. VirtQueueElement *elem;
  381. VirtIODevice *vdev = VIRTIO_DEVICE(dev);
  382. VirtQueue *vq = dev->free_page_vq;
  383. bool ret = true;
  384. while (dev->block_iothread) {
  385. qemu_cond_wait(&dev->free_page_cond, &dev->free_page_lock);
  386. }
  387. elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
  388. if (!elem) {
  389. return false;
  390. }
  391. if (elem->out_num) {
  392. uint32_t id;
  393. size_t size = iov_to_buf(elem->out_sg, elem->out_num, 0,
  394. &id, sizeof(id));
  395. virtio_tswap32s(vdev, &id);
  396. if (unlikely(size != sizeof(id))) {
  397. virtio_error(vdev, "received an incorrect cmd id");
  398. ret = false;
  399. goto out;
  400. }
  401. if (id == dev->free_page_report_cmd_id) {
  402. dev->free_page_report_status = FREE_PAGE_REPORT_S_START;
  403. } else {
  404. /*
  405. * Stop the optimization only when it has started. This
  406. * avoids a stale stop sign for the previous command.
  407. */
  408. if (dev->free_page_report_status == FREE_PAGE_REPORT_S_START) {
  409. dev->free_page_report_status = FREE_PAGE_REPORT_S_STOP;
  410. }
  411. }
  412. }
  413. if (elem->in_num) {
  414. if (dev->free_page_report_status == FREE_PAGE_REPORT_S_START) {
  415. qemu_guest_free_page_hint(elem->in_sg[0].iov_base,
  416. elem->in_sg[0].iov_len);
  417. }
  418. }
  419. out:
  420. virtqueue_push(vq, elem, 1);
  421. g_free(elem);
  422. return ret;
  423. }
  424. static void virtio_ballloon_get_free_page_hints(void *opaque)
  425. {
  426. VirtIOBalloon *dev = opaque;
  427. VirtIODevice *vdev = VIRTIO_DEVICE(dev);
  428. VirtQueue *vq = dev->free_page_vq;
  429. bool continue_to_get_hints;
  430. do {
  431. qemu_mutex_lock(&dev->free_page_lock);
  432. virtio_queue_set_notification(vq, 0);
  433. continue_to_get_hints = get_free_page_hints(dev);
  434. qemu_mutex_unlock(&dev->free_page_lock);
  435. virtio_notify(vdev, vq);
  436. /*
  437. * Start to poll the vq once the reporting started. Otherwise, continue
  438. * only when there are entries on the vq, which need to be given back.
  439. */
  440. } while (continue_to_get_hints ||
  441. dev->free_page_report_status == FREE_PAGE_REPORT_S_START);
  442. virtio_queue_set_notification(vq, 1);
  443. }
  444. static bool virtio_balloon_free_page_support(void *opaque)
  445. {
  446. VirtIOBalloon *s = opaque;
  447. VirtIODevice *vdev = VIRTIO_DEVICE(s);
  448. return virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT);
  449. }
  450. static void virtio_balloon_free_page_start(VirtIOBalloon *s)
  451. {
  452. VirtIODevice *vdev = VIRTIO_DEVICE(s);
  453. /* For the stop and copy phase, we don't need to start the optimization */
  454. if (!vdev->vm_running) {
  455. return;
  456. }
  457. if (s->free_page_report_cmd_id == UINT_MAX) {
  458. s->free_page_report_cmd_id =
  459. VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN;
  460. } else {
  461. s->free_page_report_cmd_id++;
  462. }
  463. s->free_page_report_status = FREE_PAGE_REPORT_S_REQUESTED;
  464. virtio_notify_config(vdev);
  465. }
  466. static void virtio_balloon_free_page_stop(VirtIOBalloon *s)
  467. {
  468. VirtIODevice *vdev = VIRTIO_DEVICE(s);
  469. if (s->free_page_report_status != FREE_PAGE_REPORT_S_STOP) {
  470. /*
  471. * The lock also guarantees us that the
  472. * virtio_ballloon_get_free_page_hints exits after the
  473. * free_page_report_status is set to S_STOP.
  474. */
  475. qemu_mutex_lock(&s->free_page_lock);
  476. /*
  477. * The guest hasn't done the reporting, so host sends a notification
  478. * to the guest to actively stop the reporting.
  479. */
  480. s->free_page_report_status = FREE_PAGE_REPORT_S_STOP;
  481. qemu_mutex_unlock(&s->free_page_lock);
  482. virtio_notify_config(vdev);
  483. }
  484. }
  485. static void virtio_balloon_free_page_done(VirtIOBalloon *s)
  486. {
  487. VirtIODevice *vdev = VIRTIO_DEVICE(s);
  488. s->free_page_report_status = FREE_PAGE_REPORT_S_DONE;
  489. virtio_notify_config(vdev);
  490. }
  491. static int
  492. virtio_balloon_free_page_report_notify(NotifierWithReturn *n, void *data)
  493. {
  494. VirtIOBalloon *dev = container_of(n, VirtIOBalloon,
  495. free_page_report_notify);
  496. VirtIODevice *vdev = VIRTIO_DEVICE(dev);
  497. PrecopyNotifyData *pnd = data;
  498. if (!virtio_balloon_free_page_support(dev)) {
  499. /*
  500. * This is an optimization provided to migration, so just return 0 to
  501. * have the normal migration process not affected when this feature is
  502. * not supported.
  503. */
  504. return 0;
  505. }
  506. switch (pnd->reason) {
  507. case PRECOPY_NOTIFY_SETUP:
  508. precopy_enable_free_page_optimization();
  509. break;
  510. case PRECOPY_NOTIFY_COMPLETE:
  511. case PRECOPY_NOTIFY_CLEANUP:
  512. case PRECOPY_NOTIFY_BEFORE_BITMAP_SYNC:
  513. virtio_balloon_free_page_stop(dev);
  514. break;
  515. case PRECOPY_NOTIFY_AFTER_BITMAP_SYNC:
  516. if (vdev->vm_running) {
  517. virtio_balloon_free_page_start(dev);
  518. } else {
  519. virtio_balloon_free_page_done(dev);
  520. }
  521. break;
  522. default:
  523. virtio_error(vdev, "%s: %d reason unknown", __func__, pnd->reason);
  524. }
  525. return 0;
  526. }
  527. static size_t virtio_balloon_config_size(VirtIOBalloon *s)
  528. {
  529. uint64_t features = s->host_features;
  530. if (s->qemu_4_0_config_size) {
  531. return sizeof(struct virtio_balloon_config);
  532. }
  533. if (virtio_has_feature(features, VIRTIO_BALLOON_F_PAGE_POISON)) {
  534. return sizeof(struct virtio_balloon_config);
  535. }
  536. if (virtio_has_feature(features, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
  537. return offsetof(struct virtio_balloon_config, poison_val);
  538. }
  539. return offsetof(struct virtio_balloon_config, free_page_report_cmd_id);
  540. }
  541. static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data)
  542. {
  543. VirtIOBalloon *dev = VIRTIO_BALLOON(vdev);
  544. struct virtio_balloon_config config = {};
  545. config.num_pages = cpu_to_le32(dev->num_pages);
  546. config.actual = cpu_to_le32(dev->actual);
  547. if (dev->free_page_report_status == FREE_PAGE_REPORT_S_REQUESTED) {
  548. config.free_page_report_cmd_id =
  549. cpu_to_le32(dev->free_page_report_cmd_id);
  550. } else if (dev->free_page_report_status == FREE_PAGE_REPORT_S_STOP) {
  551. config.free_page_report_cmd_id =
  552. cpu_to_le32(VIRTIO_BALLOON_CMD_ID_STOP);
  553. } else if (dev->free_page_report_status == FREE_PAGE_REPORT_S_DONE) {
  554. config.free_page_report_cmd_id =
  555. cpu_to_le32(VIRTIO_BALLOON_CMD_ID_DONE);
  556. }
  557. trace_virtio_balloon_get_config(config.num_pages, config.actual);
  558. memcpy(config_data, &config, virtio_balloon_config_size(dev));
  559. }
  560. static int build_dimm_list(Object *obj, void *opaque)
  561. {
  562. GSList **list = opaque;
  563. if (object_dynamic_cast(obj, TYPE_PC_DIMM)) {
  564. DeviceState *dev = DEVICE(obj);
  565. if (dev->realized) { /* only realized DIMMs matter */
  566. *list = g_slist_prepend(*list, dev);
  567. }
  568. }
  569. object_child_foreach(obj, build_dimm_list, opaque);
  570. return 0;
  571. }
  572. static ram_addr_t get_current_ram_size(void)
  573. {
  574. GSList *list = NULL, *item;
  575. ram_addr_t size = ram_size;
  576. build_dimm_list(qdev_get_machine(), &list);
  577. for (item = list; item; item = g_slist_next(item)) {
  578. Object *obj = OBJECT(item->data);
  579. if (!strcmp(object_get_typename(obj), TYPE_PC_DIMM)) {
  580. size += object_property_get_int(obj, PC_DIMM_SIZE_PROP,
  581. &error_abort);
  582. }
  583. }
  584. g_slist_free(list);
  585. return size;
  586. }
  587. static void virtio_balloon_set_config(VirtIODevice *vdev,
  588. const uint8_t *config_data)
  589. {
  590. VirtIOBalloon *dev = VIRTIO_BALLOON(vdev);
  591. struct virtio_balloon_config config;
  592. uint32_t oldactual = dev->actual;
  593. ram_addr_t vm_ram_size = get_current_ram_size();
  594. memcpy(&config, config_data, virtio_balloon_config_size(dev));
  595. dev->actual = le32_to_cpu(config.actual);
  596. if (dev->actual != oldactual) {
  597. qapi_event_send_balloon_change(vm_ram_size -
  598. ((ram_addr_t) dev->actual << VIRTIO_BALLOON_PFN_SHIFT));
  599. }
  600. trace_virtio_balloon_set_config(dev->actual, oldactual);
  601. }
  602. static uint64_t virtio_balloon_get_features(VirtIODevice *vdev, uint64_t f,
  603. Error **errp)
  604. {
  605. VirtIOBalloon *dev = VIRTIO_BALLOON(vdev);
  606. f |= dev->host_features;
  607. virtio_add_feature(&f, VIRTIO_BALLOON_F_STATS_VQ);
  608. return f;
  609. }
  610. static void virtio_balloon_stat(void *opaque, BalloonInfo *info)
  611. {
  612. VirtIOBalloon *dev = opaque;
  613. info->actual = get_current_ram_size() - ((uint64_t) dev->actual <<
  614. VIRTIO_BALLOON_PFN_SHIFT);
  615. }
  616. static void virtio_balloon_to_target(void *opaque, ram_addr_t target)
  617. {
  618. VirtIOBalloon *dev = VIRTIO_BALLOON(opaque);
  619. VirtIODevice *vdev = VIRTIO_DEVICE(dev);
  620. ram_addr_t vm_ram_size = get_current_ram_size();
  621. if (target > vm_ram_size) {
  622. target = vm_ram_size;
  623. }
  624. if (target) {
  625. dev->num_pages = (vm_ram_size - target) >> VIRTIO_BALLOON_PFN_SHIFT;
  626. virtio_notify_config(vdev);
  627. }
  628. trace_virtio_balloon_to_target(target, dev->num_pages);
  629. }
  630. static int virtio_balloon_post_load_device(void *opaque, int version_id)
  631. {
  632. VirtIOBalloon *s = VIRTIO_BALLOON(opaque);
  633. if (balloon_stats_enabled(s)) {
  634. balloon_stats_change_timer(s, s->stats_poll_interval);
  635. }
  636. return 0;
  637. }
  638. static const VMStateDescription vmstate_virtio_balloon_free_page_report = {
  639. .name = "virtio-balloon-device/free-page-report",
  640. .version_id = 1,
  641. .minimum_version_id = 1,
  642. .needed = virtio_balloon_free_page_support,
  643. .fields = (VMStateField[]) {
  644. VMSTATE_UINT32(free_page_report_cmd_id, VirtIOBalloon),
  645. VMSTATE_UINT32(free_page_report_status, VirtIOBalloon),
  646. VMSTATE_END_OF_LIST()
  647. }
  648. };
  649. static const VMStateDescription vmstate_virtio_balloon_device = {
  650. .name = "virtio-balloon-device",
  651. .version_id = 1,
  652. .minimum_version_id = 1,
  653. .post_load = virtio_balloon_post_load_device,
  654. .fields = (VMStateField[]) {
  655. VMSTATE_UINT32(num_pages, VirtIOBalloon),
  656. VMSTATE_UINT32(actual, VirtIOBalloon),
  657. VMSTATE_END_OF_LIST()
  658. },
  659. .subsections = (const VMStateDescription * []) {
  660. &vmstate_virtio_balloon_free_page_report,
  661. NULL
  662. }
  663. };
  664. static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
  665. {
  666. VirtIODevice *vdev = VIRTIO_DEVICE(dev);
  667. VirtIOBalloon *s = VIRTIO_BALLOON(dev);
  668. int ret;
  669. virtio_init(vdev, "virtio-balloon", VIRTIO_ID_BALLOON,
  670. virtio_balloon_config_size(s));
  671. ret = qemu_add_balloon_handler(virtio_balloon_to_target,
  672. virtio_balloon_stat, s);
  673. if (ret < 0) {
  674. error_setg(errp, "Only one balloon device is supported");
  675. virtio_cleanup(vdev);
  676. return;
  677. }
  678. s->ivq = virtio_add_queue(vdev, 128, virtio_balloon_handle_output);
  679. s->dvq = virtio_add_queue(vdev, 128, virtio_balloon_handle_output);
  680. s->svq = virtio_add_queue(vdev, 128, virtio_balloon_receive_stats);
  681. if (virtio_has_feature(s->host_features,
  682. VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
  683. s->free_page_vq = virtio_add_queue(vdev, VIRTQUEUE_MAX_SIZE,
  684. virtio_balloon_handle_free_page_vq);
  685. s->free_page_report_status = FREE_PAGE_REPORT_S_STOP;
  686. s->free_page_report_cmd_id =
  687. VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN;
  688. s->free_page_report_notify.notify =
  689. virtio_balloon_free_page_report_notify;
  690. precopy_add_notifier(&s->free_page_report_notify);
  691. if (s->iothread) {
  692. object_ref(OBJECT(s->iothread));
  693. s->free_page_bh = aio_bh_new(iothread_get_aio_context(s->iothread),
  694. virtio_ballloon_get_free_page_hints, s);
  695. qemu_mutex_init(&s->free_page_lock);
  696. qemu_cond_init(&s->free_page_cond);
  697. s->block_iothread = false;
  698. } else {
  699. /* Simply disable this feature if the iothread wasn't created. */
  700. s->host_features &= ~(1 << VIRTIO_BALLOON_F_FREE_PAGE_HINT);
  701. virtio_error(vdev, "iothread is missing");
  702. }
  703. }
  704. reset_stats(s);
  705. }
  706. static void virtio_balloon_device_unrealize(DeviceState *dev, Error **errp)
  707. {
  708. VirtIODevice *vdev = VIRTIO_DEVICE(dev);
  709. VirtIOBalloon *s = VIRTIO_BALLOON(dev);
  710. if (virtio_balloon_free_page_support(s)) {
  711. qemu_bh_delete(s->free_page_bh);
  712. virtio_balloon_free_page_stop(s);
  713. precopy_remove_notifier(&s->free_page_report_notify);
  714. }
  715. balloon_stats_destroy_timer(s);
  716. qemu_remove_balloon_handler(s);
  717. virtio_cleanup(vdev);
  718. }
  719. static void virtio_balloon_device_reset(VirtIODevice *vdev)
  720. {
  721. VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
  722. if (virtio_balloon_free_page_support(s)) {
  723. virtio_balloon_free_page_stop(s);
  724. }
  725. if (s->stats_vq_elem != NULL) {
  726. virtqueue_unpop(s->svq, s->stats_vq_elem, 0);
  727. g_free(s->stats_vq_elem);
  728. s->stats_vq_elem = NULL;
  729. }
  730. }
  731. static void virtio_balloon_set_status(VirtIODevice *vdev, uint8_t status)
  732. {
  733. VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
  734. if (!s->stats_vq_elem && vdev->vm_running &&
  735. (status & VIRTIO_CONFIG_S_DRIVER_OK) && virtqueue_rewind(s->svq, 1)) {
  736. /* poll stats queue for the element we have discarded when the VM
  737. * was stopped */
  738. virtio_balloon_receive_stats(vdev, s->svq);
  739. }
  740. if (virtio_balloon_free_page_support(s)) {
  741. /*
  742. * The VM is woken up and the iothread was blocked, so signal it to
  743. * continue.
  744. */
  745. if (vdev->vm_running && s->block_iothread) {
  746. qemu_mutex_lock(&s->free_page_lock);
  747. s->block_iothread = false;
  748. qemu_cond_signal(&s->free_page_cond);
  749. qemu_mutex_unlock(&s->free_page_lock);
  750. }
  751. /* The VM is stopped, block the iothread. */
  752. if (!vdev->vm_running) {
  753. qemu_mutex_lock(&s->free_page_lock);
  754. s->block_iothread = true;
  755. qemu_mutex_unlock(&s->free_page_lock);
  756. }
  757. }
  758. }
  759. static void virtio_balloon_instance_init(Object *obj)
  760. {
  761. VirtIOBalloon *s = VIRTIO_BALLOON(obj);
  762. object_property_add(obj, "guest-stats", "guest statistics",
  763. balloon_stats_get_all, NULL, NULL, s, NULL);
  764. object_property_add(obj, "guest-stats-polling-interval", "int",
  765. balloon_stats_get_poll_interval,
  766. balloon_stats_set_poll_interval,
  767. NULL, s, NULL);
  768. }
  769. static const VMStateDescription vmstate_virtio_balloon = {
  770. .name = "virtio-balloon",
  771. .minimum_version_id = 1,
  772. .version_id = 1,
  773. .fields = (VMStateField[]) {
  774. VMSTATE_VIRTIO_DEVICE,
  775. VMSTATE_END_OF_LIST()
  776. },
  777. };
  778. static Property virtio_balloon_properties[] = {
  779. DEFINE_PROP_BIT("deflate-on-oom", VirtIOBalloon, host_features,
  780. VIRTIO_BALLOON_F_DEFLATE_ON_OOM, false),
  781. DEFINE_PROP_BIT("free-page-hint", VirtIOBalloon, host_features,
  782. VIRTIO_BALLOON_F_FREE_PAGE_HINT, false),
  783. /* QEMU 4.0 accidentally changed the config size even when free-page-hint
  784. * is disabled, resulting in QEMU 3.1 migration incompatibility. This
  785. * property retains this quirk for QEMU 4.1 machine types.
  786. */
  787. DEFINE_PROP_BOOL("qemu-4-0-config-size", VirtIOBalloon,
  788. qemu_4_0_config_size, false),
  789. DEFINE_PROP_LINK("iothread", VirtIOBalloon, iothread, TYPE_IOTHREAD,
  790. IOThread *),
  791. DEFINE_PROP_END_OF_LIST(),
  792. };
  793. static void virtio_balloon_class_init(ObjectClass *klass, void *data)
  794. {
  795. DeviceClass *dc = DEVICE_CLASS(klass);
  796. VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
  797. dc->props = virtio_balloon_properties;
  798. dc->vmsd = &vmstate_virtio_balloon;
  799. set_bit(DEVICE_CATEGORY_MISC, dc->categories);
  800. vdc->realize = virtio_balloon_device_realize;
  801. vdc->unrealize = virtio_balloon_device_unrealize;
  802. vdc->reset = virtio_balloon_device_reset;
  803. vdc->get_config = virtio_balloon_get_config;
  804. vdc->set_config = virtio_balloon_set_config;
  805. vdc->get_features = virtio_balloon_get_features;
  806. vdc->set_status = virtio_balloon_set_status;
  807. vdc->vmsd = &vmstate_virtio_balloon_device;
  808. }
  809. static const TypeInfo virtio_balloon_info = {
  810. .name = TYPE_VIRTIO_BALLOON,
  811. .parent = TYPE_VIRTIO_DEVICE,
  812. .instance_size = sizeof(VirtIOBalloon),
  813. .instance_init = virtio_balloon_instance_init,
  814. .class_init = virtio_balloon_class_init,
  815. };
  816. static void virtio_register_types(void)
  817. {
  818. type_register_static(&virtio_balloon_info);
  819. }
  820. type_init(virtio_register_types)