hv-balloon.c 55 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771
  1. /*
  2. * QEMU Hyper-V Dynamic Memory Protocol driver
  3. *
  4. * Copyright (C) 2020-2023 Oracle and/or its affiliates.
  5. *
  6. * This work is licensed under the terms of the GNU GPL, version 2 or later.
  7. * See the COPYING file in the top-level directory.
  8. */
  9. #include "qemu/osdep.h"
  10. #include "hv-balloon-internal.h"
  11. #include "exec/address-spaces.h"
  12. #include "exec/cpu-common.h"
  13. #include "exec/ramblock.h"
  14. #include "hw/boards.h"
  15. #include "hw/hyperv/dynmem-proto.h"
  16. #include "hw/hyperv/hv-balloon.h"
  17. #include "hw/hyperv/vmbus.h"
  18. #include "hw/mem/memory-device.h"
  19. #include "hw/mem/pc-dimm.h"
  20. #include "hw/qdev-core.h"
  21. #include "hw/qdev-properties.h"
  22. #include "monitor/qdev.h"
  23. #include "qapi/error.h"
  24. #include "qapi/qapi-commands-machine.h"
  25. #include "qapi/qapi-events-machine.h"
  26. #include "qapi/qapi-types-machine.h"
  27. #include "qobject/qdict.h"
  28. #include "qapi/visitor.h"
  29. #include "qemu/error-report.h"
  30. #include "qemu/module.h"
  31. #include "qemu/units.h"
  32. #include "qemu/timer.h"
  33. #include "system/balloon.h"
  34. #include "system/hostmem.h"
  35. #include "system/reset.h"
  36. #include "hv-balloon-our_range_memslots.h"
  37. #include "hv-balloon-page_range_tree.h"
  38. #include "trace.h"
  39. #define HV_BALLOON_ADDR_PROP "addr"
  40. #define HV_BALLOON_MEMDEV_PROP "memdev"
  41. #define HV_BALLOON_GUID "525074DC-8985-46e2-8057-A307DC18A502"
  42. /*
  43. * Some Windows versions (at least Server 2019) will crash with various
  44. * error codes when receiving DM protocol requests (at least
  45. * DM_MEM_HOT_ADD_REQUEST) immediately after boot.
  46. *
  47. * It looks like Hyper-V from Server 2016 uses a 50-second after-boot
  48. * delay, probably to workaround this issue, so we'll use this value, too.
  49. */
  50. #define HV_BALLOON_POST_INIT_WAIT (50 * 1000)
  51. #define HV_BALLOON_HA_CHUNK_SIZE (2 * GiB)
  52. #define HV_BALLOON_HA_CHUNK_PAGES (HV_BALLOON_HA_CHUNK_SIZE / HV_BALLOON_PAGE_SIZE)
  53. #define HV_BALLOON_HA_MEMSLOT_SIZE_ALIGN (128 * MiB)
  54. #define HV_BALLOON_HR_CHUNK_PAGES 585728
  55. /*
  56. * ^ that's the maximum number of pages
  57. * that Windows returns in one hot remove response
  58. *
  59. * If the number requested is too high Windows will no longer honor
  60. * these requests
  61. */
  62. struct HvBalloonClass {
  63. VMBusDeviceClass parent_class;
  64. } HvBalloonClass;
  65. typedef enum State {
  66. /* not a real state */
  67. S_NO_CHANGE = 0,
  68. S_WAIT_RESET,
  69. S_POST_RESET_CLOSED,
  70. /* init flow */
  71. S_VERSION,
  72. S_CAPS,
  73. S_POST_INIT_WAIT,
  74. S_IDLE,
  75. /* balloon op flow */
  76. S_BALLOON_POSTING,
  77. S_BALLOON_RB_WAIT,
  78. S_BALLOON_REPLY_WAIT,
  79. /* unballoon + hot add ops flow */
  80. S_UNBALLOON_POSTING,
  81. S_UNBALLOON_RB_WAIT,
  82. S_UNBALLOON_REPLY_WAIT,
  83. S_HOT_ADD_SETUP,
  84. S_HOT_ADD_RB_WAIT,
  85. S_HOT_ADD_POSTING,
  86. S_HOT_ADD_REPLY_WAIT,
  87. } State;
  88. typedef struct StateDesc {
  89. State state;
  90. const char *desc;
  91. } StateDesc;
  92. typedef struct HvBalloon {
  93. VMBusDevice parent;
  94. State state;
  95. union dm_version version;
  96. union dm_caps caps;
  97. QEMUTimer post_init_timer;
  98. unsigned int trans_id;
  99. struct {
  100. bool enabled;
  101. bool received;
  102. uint64_t committed;
  103. uint64_t available;
  104. } status_report;
  105. /* Guest target size */
  106. uint64_t target;
  107. bool target_changed;
  108. /* Current (un)balloon / hot-add operation parameters */
  109. union {
  110. uint64_t balloon_diff;
  111. struct {
  112. uint64_t unballoon_diff;
  113. uint64_t hot_add_diff;
  114. };
  115. struct {
  116. PageRange hot_add_range;
  117. uint64_t ha_current_count;
  118. };
  119. };
  120. OurRangeMemslots *our_range;
  121. /* Count of memslots covering our memory */
  122. unsigned int memslot_count;
  123. /* Nominal size of each memslot (the last one might be smaller) */
  124. uint64_t memslot_size;
  125. /* Non-ours removed memory */
  126. PageRangeTree removed_guest, removed_both;
  127. /* Grand totals of removed memory (both ours and non-ours) */
  128. uint64_t removed_guest_ctr, removed_both_ctr;
  129. /* MEMORY_DEVICE props */
  130. uint64_t addr;
  131. HostMemoryBackend *hostmem;
  132. MemoryRegion *mr;
  133. } HvBalloon;
  134. OBJECT_DEFINE_TYPE_WITH_INTERFACES(HvBalloon, hv_balloon, HV_BALLOON, VMBUS_DEVICE, \
  135. { TYPE_MEMORY_DEVICE }, { })
  136. #define HV_BALLOON_SET_STATE(hvb, news) \
  137. do { \
  138. assert(news != S_NO_CHANGE); \
  139. hv_balloon_state_set(hvb, news, # news); \
  140. } while (0)
  141. #define HV_BALLOON_STATE_DESC_SET(stdesc, news) \
  142. _hv_balloon_state_desc_set(stdesc, news, # news)
  143. #define HV_BALLOON_STATE_DESC_INIT \
  144. { \
  145. .state = S_NO_CHANGE, \
  146. }
  147. typedef struct HvBalloonReq {
  148. VMBusChanReq vmreq;
  149. } HvBalloonReq;
  150. /* total our memory includes parts currently removed from the guest */
  151. static uint64_t hv_balloon_total_our_ram(HvBalloon *balloon)
  152. {
  153. if (!balloon->our_range) {
  154. return 0;
  155. }
  156. return balloon->our_range->range.added;
  157. }
  158. /* TODO: unify the code below with virtio-balloon and cache the value */
  159. static int build_dimm_list(Object *obj, void *opaque)
  160. {
  161. GSList **list = opaque;
  162. if (object_dynamic_cast(obj, TYPE_PC_DIMM)) {
  163. DeviceState *dev = DEVICE(obj);
  164. if (dev->realized) { /* only realized DIMMs matter */
  165. *list = g_slist_prepend(*list, dev);
  166. }
  167. }
  168. object_child_foreach(obj, build_dimm_list, opaque);
  169. return 0;
  170. }
  171. static ram_addr_t get_current_ram_size(void)
  172. {
  173. GSList *list = NULL, *item;
  174. ram_addr_t size = current_machine->ram_size;
  175. build_dimm_list(qdev_get_machine(), &list);
  176. for (item = list; item; item = g_slist_next(item)) {
  177. Object *obj = OBJECT(item->data);
  178. if (!strcmp(object_get_typename(obj), TYPE_PC_DIMM))
  179. size += object_property_get_int(obj, PC_DIMM_SIZE_PROP,
  180. &error_abort);
  181. }
  182. g_slist_free(list);
  183. return size;
  184. }
  185. /* total RAM includes memory currently removed from the guest */
  186. static uint64_t hv_balloon_total_ram(HvBalloon *balloon)
  187. {
  188. ram_addr_t ram_size = get_current_ram_size();
  189. uint64_t ram_size_pages = ram_size >> HV_BALLOON_PFN_SHIFT;
  190. uint64_t our_ram_size_pages = hv_balloon_total_our_ram(balloon);
  191. assert(ram_size_pages > 0);
  192. return SUM_SATURATE_U64(ram_size_pages, our_ram_size_pages);
  193. }
  194. /*
  195. * calculating the total RAM size is a slow operation,
  196. * avoid it as much as possible
  197. */
  198. static uint64_t hv_balloon_total_removed_rs(HvBalloon *balloon,
  199. uint64_t ram_size_pages)
  200. {
  201. uint64_t total_removed;
  202. total_removed = SUM_SATURATE_U64(balloon->removed_guest_ctr,
  203. balloon->removed_both_ctr);
  204. /* possible if guest returns pages outside actual RAM */
  205. if (total_removed > ram_size_pages) {
  206. total_removed = ram_size_pages;
  207. }
  208. return total_removed;
  209. }
  210. /* Returns whether the state has actually changed */
  211. static bool hv_balloon_state_set(HvBalloon *balloon,
  212. State newst, const char *newststr)
  213. {
  214. if (newst == S_NO_CHANGE || balloon->state == newst) {
  215. return false;
  216. }
  217. balloon->state = newst;
  218. trace_hv_balloon_state_change(newststr);
  219. return true;
  220. }
  221. static void _hv_balloon_state_desc_set(StateDesc *stdesc,
  222. State newst, const char *newststr)
  223. {
  224. /* state setting is only permitted on a freshly init desc */
  225. assert(stdesc->state == S_NO_CHANGE);
  226. assert(newst != S_NO_CHANGE);
  227. stdesc->state = newst;
  228. stdesc->desc = newststr;
  229. }
  230. static VMBusChannel *hv_balloon_get_channel_maybe(HvBalloon *balloon)
  231. {
  232. return vmbus_device_channel(&balloon->parent, 0);
  233. }
  234. static VMBusChannel *hv_balloon_get_channel(HvBalloon *balloon)
  235. {
  236. VMBusChannel *chan;
  237. chan = hv_balloon_get_channel_maybe(balloon);
  238. assert(chan != NULL);
  239. return chan;
  240. }
  241. static ssize_t hv_balloon_send_packet(VMBusChannel *chan,
  242. struct dm_message *msg)
  243. {
  244. int ret;
  245. ret = vmbus_channel_reserve(chan, 0, msg->hdr.size);
  246. if (ret < 0) {
  247. return ret;
  248. }
  249. return vmbus_channel_send(chan, VMBUS_PACKET_DATA_INBAND,
  250. NULL, 0, msg, msg->hdr.size, false,
  251. msg->hdr.trans_id);
  252. }
  253. static bool hv_balloon_unballoon_get_source(HvBalloon *balloon,
  254. PageRangeTree *dtree,
  255. uint64_t **dctr,
  256. bool *is_our_range)
  257. {
  258. OurRange *our_range = OUR_RANGE(balloon->our_range);
  259. /* Try the boot memory first */
  260. if (g_tree_nnodes(balloon->removed_guest.t) > 0) {
  261. *dtree = balloon->removed_guest;
  262. *dctr = &balloon->removed_guest_ctr;
  263. *is_our_range = false;
  264. } else if (g_tree_nnodes(balloon->removed_both.t) > 0) {
  265. *dtree = balloon->removed_both;
  266. *dctr = &balloon->removed_both_ctr;
  267. *is_our_range = false;
  268. } else if (!our_range) {
  269. return false;
  270. } else if (!our_range_is_removed_tree_empty(our_range, false)) {
  271. *dtree = our_range_get_removed_tree(our_range, false);
  272. *dctr = &balloon->removed_guest_ctr;
  273. *is_our_range = true;
  274. } else if (!our_range_is_removed_tree_empty(our_range, true)) {
  275. *dtree = our_range_get_removed_tree(our_range, true);
  276. *dctr = &balloon->removed_both_ctr;
  277. *is_our_range = true;
  278. } else {
  279. return false;
  280. }
  281. return true;
  282. }
  283. static void hv_balloon_unballoon_rb_wait(HvBalloon *balloon, StateDesc *stdesc)
  284. {
  285. VMBusChannel *chan = hv_balloon_get_channel(balloon);
  286. struct dm_unballoon_request *ur;
  287. size_t ur_size = sizeof(*ur) + sizeof(ur->range_array[0]);
  288. assert(balloon->state == S_UNBALLOON_RB_WAIT);
  289. if (vmbus_channel_reserve(chan, 0, ur_size) < 0) {
  290. return;
  291. }
  292. HV_BALLOON_STATE_DESC_SET(stdesc, S_UNBALLOON_POSTING);
  293. }
  294. static void hv_balloon_unballoon_posting(HvBalloon *balloon, StateDesc *stdesc)
  295. {
  296. VMBusChannel *chan = hv_balloon_get_channel(balloon);
  297. PageRangeTree dtree;
  298. uint64_t *dctr;
  299. bool our_range;
  300. g_autofree struct dm_unballoon_request *ur = NULL;
  301. size_t ur_size = sizeof(*ur) + sizeof(ur->range_array[0]);
  302. PageRange range;
  303. bool bret;
  304. ssize_t ret;
  305. assert(balloon->state == S_UNBALLOON_POSTING);
  306. assert(balloon->unballoon_diff > 0);
  307. if (!hv_balloon_unballoon_get_source(balloon, &dtree, &dctr, &our_range)) {
  308. error_report("trying to unballoon but nothing seems to be ballooned");
  309. /*
  310. * there is little we can do as we might have already
  311. * sent the guest a partial request we can't cancel
  312. */
  313. return;
  314. }
  315. assert(balloon->our_range || !our_range);
  316. assert(dtree.t);
  317. assert(dctr);
  318. ur = g_malloc0(ur_size);
  319. ur->hdr.type = DM_UNBALLOON_REQUEST;
  320. ur->hdr.size = ur_size;
  321. ur->hdr.trans_id = balloon->trans_id;
  322. bret = hvb_page_range_tree_pop(dtree, &range, MIN(balloon->unballoon_diff,
  323. HV_BALLOON_HA_CHUNK_PAGES));
  324. assert(bret);
  325. /* TODO: madvise? */
  326. *dctr -= range.count;
  327. balloon->unballoon_diff -= range.count;
  328. ur->range_count = 1;
  329. ur->range_array[0].finfo.start_page = range.start;
  330. ur->range_array[0].finfo.page_cnt = range.count;
  331. ur->more_pages = balloon->unballoon_diff > 0;
  332. trace_hv_balloon_outgoing_unballoon(ur->hdr.trans_id,
  333. range.count, range.start,
  334. balloon->unballoon_diff);
  335. if (ur->more_pages) {
  336. HV_BALLOON_STATE_DESC_SET(stdesc, S_UNBALLOON_RB_WAIT);
  337. } else {
  338. HV_BALLOON_STATE_DESC_SET(stdesc, S_UNBALLOON_REPLY_WAIT);
  339. }
  340. ret = vmbus_channel_send(chan, VMBUS_PACKET_DATA_INBAND,
  341. NULL, 0, ur, ur_size, false,
  342. ur->hdr.trans_id);
  343. if (ret <= 0) {
  344. error_report("error %zd when posting unballoon msg, expect problems",
  345. ret);
  346. }
  347. }
  348. static bool hv_balloon_our_range_ensure(HvBalloon *balloon)
  349. {
  350. uint64_t align;
  351. MemoryRegion *hostmem_mr;
  352. g_autoptr(OurRangeMemslots) our_range_memslots = NULL;
  353. OurRange *our_range;
  354. if (balloon->our_range) {
  355. return true;
  356. }
  357. if (!balloon->hostmem) {
  358. return false;
  359. }
  360. align = (1 << balloon->caps.cap_bits.hot_add_alignment) * MiB;
  361. assert(QEMU_IS_ALIGNED(balloon->addr, align));
  362. hostmem_mr = host_memory_backend_get_memory(balloon->hostmem);
  363. our_range_memslots = hvb_our_range_memslots_new(balloon->addr,
  364. balloon->mr, hostmem_mr,
  365. OBJECT(balloon),
  366. balloon->memslot_count,
  367. balloon->memslot_size);
  368. our_range = OUR_RANGE(our_range_memslots);
  369. if (hvb_page_range_tree_intree_any(balloon->removed_guest,
  370. our_range->range.start,
  371. our_range->range.count) ||
  372. hvb_page_range_tree_intree_any(balloon->removed_both,
  373. our_range->range.start,
  374. our_range->range.count)) {
  375. error_report("some parts of the memory backend were already returned by the guest. this should not happen, please reboot the guest and try again");
  376. return false;
  377. }
  378. trace_hv_balloon_our_range_add(our_range->range.count,
  379. our_range->range.start);
  380. balloon->our_range = g_steal_pointer(&our_range_memslots);
  381. return true;
  382. }
  383. static void hv_balloon_hot_add_setup(HvBalloon *balloon, StateDesc *stdesc)
  384. {
  385. /* need to make copy since it is in union with hot_add_range */
  386. uint64_t hot_add_diff = balloon->hot_add_diff;
  387. PageRange *hot_add_range = &balloon->hot_add_range;
  388. uint64_t align, our_range_remaining;
  389. OurRange *our_range;
  390. assert(balloon->state == S_HOT_ADD_SETUP);
  391. assert(hot_add_diff > 0);
  392. if (!hv_balloon_our_range_ensure(balloon)) {
  393. goto ret_idle;
  394. }
  395. our_range = OUR_RANGE(balloon->our_range);
  396. align = (1 << balloon->caps.cap_bits.hot_add_alignment) *
  397. (MiB / HV_BALLOON_PAGE_SIZE);
  398. /* Absolute GPA in pages */
  399. hot_add_range->start = our_range_get_remaining_start(our_range);
  400. assert(QEMU_IS_ALIGNED(hot_add_range->start, align));
  401. our_range_remaining = our_range_get_remaining_size(our_range);
  402. hot_add_range->count = MIN(our_range_remaining, hot_add_diff);
  403. hot_add_range->count = QEMU_ALIGN_DOWN(hot_add_range->count, align);
  404. if (hot_add_range->count == 0) {
  405. goto ret_idle;
  406. }
  407. hvb_our_range_memslots_ensure_mapped_additional(balloon->our_range,
  408. hot_add_range->count);
  409. HV_BALLOON_STATE_DESC_SET(stdesc, S_HOT_ADD_RB_WAIT);
  410. return;
  411. ret_idle:
  412. HV_BALLOON_STATE_DESC_SET(stdesc, S_IDLE);
  413. }
  414. static void hv_balloon_hot_add_rb_wait(HvBalloon *balloon, StateDesc *stdesc)
  415. {
  416. VMBusChannel *chan = hv_balloon_get_channel(balloon);
  417. struct dm_hot_add_with_region *ha;
  418. size_t ha_size = sizeof(*ha);
  419. assert(balloon->state == S_HOT_ADD_RB_WAIT);
  420. if (vmbus_channel_reserve(chan, 0, ha_size) < 0) {
  421. return;
  422. }
  423. HV_BALLOON_STATE_DESC_SET(stdesc, S_HOT_ADD_POSTING);
  424. }
  425. static void hv_balloon_hot_add_posting(HvBalloon *balloon, StateDesc *stdesc)
  426. {
  427. PageRange *hot_add_range = &balloon->hot_add_range;
  428. uint64_t *current_count = &balloon->ha_current_count;
  429. VMBusChannel *chan = hv_balloon_get_channel(balloon);
  430. g_autofree struct dm_hot_add_with_region *ha = NULL;
  431. size_t ha_size = sizeof(*ha);
  432. union dm_mem_page_range *ha_region;
  433. uint64_t align, chunk_max_size;
  434. ssize_t ret;
  435. assert(balloon->state == S_HOT_ADD_POSTING);
  436. assert(hot_add_range->count > 0);
  437. align = (1 << balloon->caps.cap_bits.hot_add_alignment) *
  438. (MiB / HV_BALLOON_PAGE_SIZE);
  439. if (align >= HV_BALLOON_HA_CHUNK_PAGES) {
  440. /*
  441. * If the required alignment is higher than the chunk size we let it
  442. * override that size.
  443. */
  444. chunk_max_size = align;
  445. } else {
  446. chunk_max_size = QEMU_ALIGN_DOWN(HV_BALLOON_HA_CHUNK_PAGES, align);
  447. }
  448. /*
  449. * hot_add_range->count starts aligned in hv_balloon_hot_add_setup(),
  450. * then it is either reduced by subtracting aligned current_count or
  451. * further hot-adds are prevented by marking the whole remaining our range
  452. * as unusable in hv_balloon_handle_hot_add_response().
  453. */
  454. *current_count = MIN(hot_add_range->count, chunk_max_size);
  455. ha = g_malloc0(ha_size);
  456. ha_region = &ha->region;
  457. ha->hdr.type = DM_MEM_HOT_ADD_REQUEST;
  458. ha->hdr.size = ha_size;
  459. ha->hdr.trans_id = balloon->trans_id;
  460. ha->range.finfo.start_page = hot_add_range->start;
  461. ha->range.finfo.page_cnt = *current_count;
  462. ha_region->finfo.start_page = hot_add_range->start;
  463. ha_region->finfo.page_cnt = ha->range.finfo.page_cnt;
  464. trace_hv_balloon_outgoing_hot_add(ha->hdr.trans_id,
  465. *current_count, hot_add_range->start);
  466. ret = vmbus_channel_send(chan, VMBUS_PACKET_DATA_INBAND,
  467. NULL, 0, ha, ha_size, false,
  468. ha->hdr.trans_id);
  469. if (ret <= 0) {
  470. error_report("error %zd when posting hot add msg, expect problems",
  471. ret);
  472. }
  473. HV_BALLOON_STATE_DESC_SET(stdesc, S_HOT_ADD_REPLY_WAIT);
  474. }
  475. static void hv_balloon_balloon_rb_wait(HvBalloon *balloon, StateDesc *stdesc)
  476. {
  477. VMBusChannel *chan = hv_balloon_get_channel(balloon);
  478. size_t bl_size = sizeof(struct dm_balloon);
  479. assert(balloon->state == S_BALLOON_RB_WAIT);
  480. if (vmbus_channel_reserve(chan, 0, bl_size) < 0) {
  481. return;
  482. }
  483. HV_BALLOON_STATE_DESC_SET(stdesc, S_BALLOON_POSTING);
  484. }
  485. static void hv_balloon_balloon_posting(HvBalloon *balloon, StateDesc *stdesc)
  486. {
  487. VMBusChannel *chan = hv_balloon_get_channel(balloon);
  488. struct dm_balloon bl;
  489. size_t bl_size = sizeof(bl);
  490. ssize_t ret;
  491. assert(balloon->state == S_BALLOON_POSTING);
  492. assert(balloon->balloon_diff > 0);
  493. memset(&bl, 0, sizeof(bl));
  494. bl.hdr.type = DM_BALLOON_REQUEST;
  495. bl.hdr.size = bl_size;
  496. bl.hdr.trans_id = balloon->trans_id;
  497. bl.num_pages = MIN(balloon->balloon_diff, HV_BALLOON_HR_CHUNK_PAGES);
  498. trace_hv_balloon_outgoing_balloon(bl.hdr.trans_id, bl.num_pages,
  499. balloon->balloon_diff);
  500. ret = vmbus_channel_send(chan, VMBUS_PACKET_DATA_INBAND,
  501. NULL, 0, &bl, bl_size, false,
  502. bl.hdr.trans_id);
  503. if (ret <= 0) {
  504. error_report("error %zd when posting balloon msg, expect problems",
  505. ret);
  506. }
  507. HV_BALLOON_STATE_DESC_SET(stdesc, S_BALLOON_REPLY_WAIT);
  508. }
  509. static void hv_balloon_idle_state_process_target(HvBalloon *balloon,
  510. StateDesc *stdesc)
  511. {
  512. bool can_balloon = balloon->caps.cap_bits.balloon;
  513. uint64_t ram_size_pages, total_removed;
  514. ram_size_pages = hv_balloon_total_ram(balloon);
  515. total_removed = hv_balloon_total_removed_rs(balloon, ram_size_pages);
  516. /*
  517. * we need to cache the values computed from the balloon target value when
  518. * starting the adjustment procedure in case someone changes the target when
  519. * the procedure is in progress
  520. */
  521. if (balloon->target > ram_size_pages - total_removed) {
  522. bool can_hot_add = balloon->caps.cap_bits.hot_add;
  523. uint64_t target_diff = balloon->target -
  524. (ram_size_pages - total_removed);
  525. balloon->unballoon_diff = MIN(target_diff, total_removed);
  526. if (can_hot_add) {
  527. balloon->hot_add_diff = target_diff - balloon->unballoon_diff;
  528. } else {
  529. balloon->hot_add_diff = 0;
  530. }
  531. if (balloon->unballoon_diff > 0) {
  532. assert(can_balloon);
  533. HV_BALLOON_STATE_DESC_SET(stdesc, S_UNBALLOON_RB_WAIT);
  534. } else if (balloon->hot_add_diff > 0) {
  535. HV_BALLOON_STATE_DESC_SET(stdesc, S_HOT_ADD_SETUP);
  536. }
  537. } else if (can_balloon &&
  538. balloon->target < ram_size_pages - total_removed) {
  539. balloon->balloon_diff = ram_size_pages - total_removed -
  540. balloon->target;
  541. HV_BALLOON_STATE_DESC_SET(stdesc, S_BALLOON_RB_WAIT);
  542. }
  543. }
  544. static void hv_balloon_idle_state(HvBalloon *balloon,
  545. StateDesc *stdesc)
  546. {
  547. assert(balloon->state == S_IDLE);
  548. if (balloon->target_changed) {
  549. balloon->target_changed = false;
  550. hv_balloon_idle_state_process_target(balloon, stdesc);
  551. return;
  552. }
  553. }
  554. static const struct {
  555. void (*handler)(HvBalloon *balloon, StateDesc *stdesc);
  556. } state_handlers[] = {
  557. [S_IDLE].handler = hv_balloon_idle_state,
  558. [S_BALLOON_POSTING].handler = hv_balloon_balloon_posting,
  559. [S_BALLOON_RB_WAIT].handler = hv_balloon_balloon_rb_wait,
  560. [S_UNBALLOON_POSTING].handler = hv_balloon_unballoon_posting,
  561. [S_UNBALLOON_RB_WAIT].handler = hv_balloon_unballoon_rb_wait,
  562. [S_HOT_ADD_SETUP].handler = hv_balloon_hot_add_setup,
  563. [S_HOT_ADD_RB_WAIT].handler = hv_balloon_hot_add_rb_wait,
  564. [S_HOT_ADD_POSTING].handler = hv_balloon_hot_add_posting,
  565. };
  566. static void hv_balloon_handle_state(HvBalloon *balloon, StateDesc *stdesc)
  567. {
  568. if (balloon->state >= ARRAY_SIZE(state_handlers) ||
  569. !state_handlers[balloon->state].handler) {
  570. return;
  571. }
  572. state_handlers[balloon->state].handler(balloon, stdesc);
  573. }
  574. static void hv_balloon_remove_response_insert_range(PageRangeTree tree,
  575. const PageRange *range,
  576. uint64_t *ctr1,
  577. uint64_t *ctr2,
  578. uint64_t *ctr3)
  579. {
  580. uint64_t dupcount, effcount;
  581. if (range->count == 0) {
  582. return;
  583. }
  584. dupcount = 0;
  585. hvb_page_range_tree_insert(tree, range->start, range->count, &dupcount);
  586. assert(dupcount <= range->count);
  587. effcount = range->count - dupcount;
  588. *ctr1 += effcount;
  589. *ctr2 += effcount;
  590. if (ctr3) {
  591. *ctr3 += effcount;
  592. }
  593. }
  594. static void hv_balloon_remove_response_handle_range(HvBalloon *balloon,
  595. PageRange *range,
  596. bool both,
  597. uint64_t *removedctr)
  598. {
  599. OurRange *our_range = OUR_RANGE(balloon->our_range);
  600. PageRangeTree globaltree =
  601. both ? balloon->removed_both : balloon->removed_guest;
  602. uint64_t *globalctr =
  603. both ? &balloon->removed_both_ctr : &balloon->removed_guest_ctr;
  604. PageRange rangeeff;
  605. if (range->count == 0) {
  606. return;
  607. }
  608. trace_hv_balloon_remove_response(range->count, range->start, both);
  609. if (our_range) {
  610. /* Includes the not-yet-hot-added and unusable parts. */
  611. rangeeff = our_range->range;
  612. } else {
  613. rangeeff.start = rangeeff.count = 0;
  614. }
  615. if (page_range_intersection_size(range, rangeeff.start, rangeeff.count) > 0) {
  616. PageRangeTree ourtree = our_range_get_removed_tree(our_range, both);
  617. PageRange rangehole, rangecommon;
  618. uint64_t ourremoved = 0;
  619. /* process the hole before our range, if it exists */
  620. page_range_part_before(range, rangeeff.start, &rangehole);
  621. hv_balloon_remove_response_insert_range(globaltree, &rangehole,
  622. globalctr, removedctr, NULL);
  623. if (rangehole.count > 0) {
  624. trace_hv_balloon_remove_response_hole(rangehole.count,
  625. rangehole.start,
  626. range->count, range->start,
  627. rangeeff.start, both);
  628. }
  629. /* process our part */
  630. page_range_intersect(range, rangeeff.start, rangeeff.count,
  631. &rangecommon);
  632. hv_balloon_remove_response_insert_range(ourtree, &rangecommon,
  633. globalctr, removedctr,
  634. &ourremoved);
  635. if (rangecommon.count > 0) {
  636. trace_hv_balloon_remove_response_common(rangecommon.count,
  637. rangecommon.start,
  638. range->count, range->start,
  639. rangeeff.count,
  640. rangeeff.start, ourremoved,
  641. both);
  642. }
  643. /* calculate what's left after our range */
  644. rangecommon = *range;
  645. page_range_part_after(&rangecommon, rangeeff.start, rangeeff.count,
  646. range);
  647. }
  648. /* process the remainder of the range that lies after our range */
  649. if (range->count > 0) {
  650. hv_balloon_remove_response_insert_range(globaltree, range,
  651. globalctr, removedctr, NULL);
  652. trace_hv_balloon_remove_response_remainder(range->count, range->start,
  653. both);
  654. range->count = 0;
  655. }
  656. }
  657. static void hv_balloon_remove_response_handle_pages(HvBalloon *balloon,
  658. PageRange *range,
  659. uint64_t start,
  660. uint64_t count,
  661. bool both,
  662. uint64_t *removedctr)
  663. {
  664. assert(count > 0);
  665. /*
  666. * if there is an existing range that the new range can't be joined to
  667. * dump it into tree(s)
  668. */
  669. if (range->count > 0 && !page_range_joinable(range, start, count)) {
  670. hv_balloon_remove_response_handle_range(balloon, range, both,
  671. removedctr);
  672. }
  673. if (range->count == 0) {
  674. range->start = start;
  675. range->count = count;
  676. } else if (page_range_joinable_left(range, start, count)) {
  677. range->start = start;
  678. range->count += count;
  679. } else { /* page_range_joinable_right() */
  680. range->count += count;
  681. }
  682. }
  683. static gboolean hv_balloon_handle_remove_host_addr_node(gpointer key,
  684. gpointer value,
  685. gpointer data)
  686. {
  687. PageRange *range = value;
  688. uint64_t pageoff;
  689. for (pageoff = 0; pageoff < range->count; ) {
  690. uint64_t addr_64 = (range->start + pageoff) * HV_BALLOON_PAGE_SIZE;
  691. void *addr;
  692. RAMBlock *rb;
  693. ram_addr_t rb_offset;
  694. size_t rb_page_size;
  695. size_t discard_size;
  696. assert(addr_64 <= UINTPTR_MAX);
  697. addr = (void *)((uintptr_t)addr_64);
  698. rb = qemu_ram_block_from_host(addr, false, &rb_offset);
  699. rb_page_size = qemu_ram_pagesize(rb);
  700. if (rb_page_size != HV_BALLOON_PAGE_SIZE) {
  701. /* TODO: these should end in "removed_guest" */
  702. warn_report("guest reported removed page backed by unsupported page size %zu",
  703. rb_page_size);
  704. pageoff++;
  705. continue;
  706. }
  707. discard_size = MIN(range->count - pageoff,
  708. (rb->max_length - rb_offset) /
  709. HV_BALLOON_PAGE_SIZE);
  710. discard_size = MAX(discard_size, 1);
  711. if (ram_block_discard_range(rb, rb_offset, discard_size *
  712. HV_BALLOON_PAGE_SIZE) != 0) {
  713. warn_report("guest reported removed page failed discard");
  714. }
  715. pageoff += discard_size;
  716. }
  717. return false;
  718. }
  719. static void hv_balloon_handle_remove_host_addr_tree(PageRangeTree tree)
  720. {
  721. g_tree_foreach(tree.t, hv_balloon_handle_remove_host_addr_node, NULL);
  722. }
  723. static int hv_balloon_handle_remove_section(PageRangeTree tree,
  724. const MemoryRegionSection *section,
  725. uint64_t count)
  726. {
  727. void *addr = memory_region_get_ram_ptr(section->mr) +
  728. section->offset_within_region;
  729. uint64_t addr_page;
  730. assert(count > 0);
  731. if ((uintptr_t)addr % HV_BALLOON_PAGE_SIZE) {
  732. warn_report("guest reported removed pages at an unaligned host addr %p",
  733. addr);
  734. return -EINVAL;
  735. }
  736. addr_page = (uintptr_t)addr / HV_BALLOON_PAGE_SIZE;
  737. hvb_page_range_tree_insert(tree, addr_page, count, NULL);
  738. return 0;
  739. }
  740. static void hv_balloon_handle_remove_ranges(HvBalloon *balloon,
  741. union dm_mem_page_range ranges[],
  742. uint32_t count)
  743. {
  744. uint64_t removedcnt;
  745. PageRangeTree removed_host_addr;
  746. PageRange range_guest, range_both;
  747. hvb_page_range_tree_init(&removed_host_addr);
  748. range_guest.count = range_both.count = removedcnt = 0;
  749. for (unsigned int ctr = 0; ctr < count; ctr++) {
  750. union dm_mem_page_range *mr = &ranges[ctr];
  751. hwaddr pa;
  752. MemoryRegionSection section;
  753. for (unsigned int offset = 0; offset < mr->finfo.page_cnt; ) {
  754. int ret;
  755. uint64_t pageno = mr->finfo.start_page + offset;
  756. uint64_t pagecnt = 1;
  757. pa = (hwaddr)pageno << HV_BALLOON_PFN_SHIFT;
  758. section = memory_region_find(get_system_memory(), pa,
  759. (mr->finfo.page_cnt - offset) *
  760. HV_BALLOON_PAGE_SIZE);
  761. if (!section.mr) {
  762. warn_report("guest reported removed page %"PRIu64" not found in RAM",
  763. pageno);
  764. ret = -EINVAL;
  765. goto finish_page;
  766. }
  767. pagecnt = int128_get64(section.size) / HV_BALLOON_PAGE_SIZE;
  768. if (pagecnt <= 0) {
  769. warn_report("guest reported removed page %"PRIu64" in a section smaller than page size",
  770. pageno);
  771. pagecnt = 1; /* skip the whole page */
  772. ret = -EINVAL;
  773. goto finish_page;
  774. }
  775. if (!memory_region_is_ram(section.mr) ||
  776. memory_region_is_rom(section.mr) ||
  777. memory_region_is_romd(section.mr)) {
  778. warn_report("guest reported removed page %"PRIu64" in a section that is not an ordinary RAM",
  779. pageno);
  780. ret = -EINVAL;
  781. goto finish_page;
  782. }
  783. ret = hv_balloon_handle_remove_section(removed_host_addr, &section,
  784. pagecnt);
  785. finish_page:
  786. if (ret == 0) {
  787. hv_balloon_remove_response_handle_pages(balloon,
  788. &range_both,
  789. pageno, pagecnt,
  790. true, &removedcnt);
  791. } else {
  792. hv_balloon_remove_response_handle_pages(balloon,
  793. &range_guest,
  794. pageno, pagecnt,
  795. false, &removedcnt);
  796. }
  797. if (section.mr) {
  798. memory_region_unref(section.mr);
  799. }
  800. offset += pagecnt;
  801. }
  802. }
  803. hv_balloon_remove_response_handle_range(balloon, &range_both, true,
  804. &removedcnt);
  805. hv_balloon_remove_response_handle_range(balloon, &range_guest, false,
  806. &removedcnt);
  807. hv_balloon_handle_remove_host_addr_tree(removed_host_addr);
  808. hvb_page_range_tree_destroy(&removed_host_addr);
  809. if (removedcnt > balloon->balloon_diff) {
  810. warn_report("guest reported more pages removed than currently pending (%"PRIu64" vs %"PRIu64")",
  811. removedcnt, balloon->balloon_diff);
  812. balloon->balloon_diff = 0;
  813. } else {
  814. balloon->balloon_diff -= removedcnt;
  815. }
  816. }
  817. static bool hv_balloon_handle_msg_size(HvBalloonReq *req, size_t minsize,
  818. const char *msgname)
  819. {
  820. VMBusChanReq *vmreq = &req->vmreq;
  821. uint32_t msglen = vmreq->msglen;
  822. if (msglen >= minsize) {
  823. return true;
  824. }
  825. warn_report("%s message too short (%u vs %zu), ignoring", msgname,
  826. (unsigned int)msglen, minsize);
  827. return false;
  828. }
  829. static void hv_balloon_handle_version_request(HvBalloon *balloon,
  830. HvBalloonReq *req,
  831. StateDesc *stdesc)
  832. {
  833. VMBusChanReq *vmreq = &req->vmreq;
  834. struct dm_version_request *msgVr = vmreq->msg;
  835. struct dm_version_response respVr;
  836. if (balloon->state != S_VERSION) {
  837. warn_report("unexpected DM_VERSION_REQUEST in %d state",
  838. balloon->state);
  839. return;
  840. }
  841. if (!hv_balloon_handle_msg_size(req, sizeof(*msgVr),
  842. "DM_VERSION_REQUEST")) {
  843. return;
  844. }
  845. trace_hv_balloon_incoming_version(msgVr->version.major_version,
  846. msgVr->version.minor_version);
  847. memset(&respVr, 0, sizeof(respVr));
  848. respVr.hdr.type = DM_VERSION_RESPONSE;
  849. respVr.hdr.size = sizeof(respVr);
  850. respVr.hdr.trans_id = msgVr->hdr.trans_id;
  851. respVr.is_accepted = msgVr->version.version >= DYNMEM_PROTOCOL_VERSION_1 &&
  852. msgVr->version.version <= DYNMEM_PROTOCOL_VERSION_3;
  853. hv_balloon_send_packet(vmreq->chan, (struct dm_message *)&respVr);
  854. if (respVr.is_accepted) {
  855. HV_BALLOON_STATE_DESC_SET(stdesc, S_CAPS);
  856. }
  857. }
  858. static void hv_balloon_handle_caps_report(HvBalloon *balloon,
  859. HvBalloonReq *req,
  860. StateDesc *stdesc)
  861. {
  862. VMBusChanReq *vmreq = &req->vmreq;
  863. struct dm_capabilities *msgCap = vmreq->msg;
  864. struct dm_capabilities_resp_msg respCap;
  865. if (balloon->state != S_CAPS) {
  866. warn_report("unexpected DM_CAPABILITIES_REPORT in %d state",
  867. balloon->state);
  868. return;
  869. }
  870. if (!hv_balloon_handle_msg_size(req, sizeof(*msgCap),
  871. "DM_CAPABILITIES_REPORT")) {
  872. return;
  873. }
  874. trace_hv_balloon_incoming_caps(msgCap->caps.caps);
  875. balloon->caps = msgCap->caps;
  876. memset(&respCap, 0, sizeof(respCap));
  877. respCap.hdr.type = DM_CAPABILITIES_RESPONSE;
  878. respCap.hdr.size = sizeof(respCap);
  879. respCap.hdr.trans_id = msgCap->hdr.trans_id;
  880. respCap.is_accepted = 1;
  881. respCap.hot_remove = 1;
  882. respCap.suppress_pressure_reports = !balloon->status_report.enabled;
  883. hv_balloon_send_packet(vmreq->chan, (struct dm_message *)&respCap);
  884. timer_mod(&balloon->post_init_timer,
  885. qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
  886. HV_BALLOON_POST_INIT_WAIT);
  887. HV_BALLOON_STATE_DESC_SET(stdesc, S_POST_INIT_WAIT);
  888. }
  889. static void hv_balloon_handle_status_report(HvBalloon *balloon,
  890. HvBalloonReq *req)
  891. {
  892. VMBusChanReq *vmreq = &req->vmreq;
  893. struct dm_status *msgStatus = vmreq->msg;
  894. if (!hv_balloon_handle_msg_size(req, sizeof(*msgStatus),
  895. "DM_STATUS_REPORT")) {
  896. return;
  897. }
  898. if (!balloon->status_report.enabled) {
  899. return;
  900. }
  901. balloon->status_report.committed = msgStatus->num_committed;
  902. balloon->status_report.committed *= HV_BALLOON_PAGE_SIZE;
  903. balloon->status_report.available = msgStatus->num_avail;
  904. balloon->status_report.available *= HV_BALLOON_PAGE_SIZE;
  905. balloon->status_report.received = true;
  906. qapi_event_send_hv_balloon_status_report(balloon->status_report.committed,
  907. balloon->status_report.available);
  908. }
  909. HvBalloonInfo *qmp_query_hv_balloon_status_report(Error **errp)
  910. {
  911. HvBalloon *balloon;
  912. HvBalloonInfo *info;
  913. balloon = HV_BALLOON(object_resolve_path_type("", TYPE_HV_BALLOON, NULL));
  914. if (!balloon) {
  915. error_setg(errp, "no %s device present", TYPE_HV_BALLOON);
  916. return NULL;
  917. }
  918. if (!balloon->status_report.enabled) {
  919. error_setg(errp, "guest memory status reporting not enabled");
  920. return NULL;
  921. }
  922. if (!balloon->status_report.received) {
  923. error_setg(errp, "no guest memory status report received yet");
  924. return NULL;
  925. }
  926. info = g_malloc0(sizeof(*info));
  927. info->committed = balloon->status_report.committed;
  928. info->available = balloon->status_report.available;
  929. return info;
  930. }
  931. static void hv_balloon_handle_unballoon_response(HvBalloon *balloon,
  932. HvBalloonReq *req,
  933. StateDesc *stdesc)
  934. {
  935. VMBusChanReq *vmreq = &req->vmreq;
  936. struct dm_unballoon_response *msgUrR = vmreq->msg;
  937. if (balloon->state != S_UNBALLOON_REPLY_WAIT) {
  938. warn_report("unexpected DM_UNBALLOON_RESPONSE in %d state",
  939. balloon->state);
  940. return;
  941. }
  942. if (!hv_balloon_handle_msg_size(req, sizeof(*msgUrR),
  943. "DM_UNBALLOON_RESPONSE"))
  944. return;
  945. trace_hv_balloon_incoming_unballoon(msgUrR->hdr.trans_id);
  946. balloon->trans_id++;
  947. if (balloon->hot_add_diff > 0) {
  948. bool can_hot_add = balloon->caps.cap_bits.hot_add;
  949. assert(can_hot_add);
  950. HV_BALLOON_STATE_DESC_SET(stdesc, S_HOT_ADD_SETUP);
  951. } else {
  952. HV_BALLOON_STATE_DESC_SET(stdesc, S_IDLE);
  953. }
  954. }
  955. static void hv_balloon_handle_hot_add_response(HvBalloon *balloon,
  956. HvBalloonReq *req,
  957. StateDesc *stdesc)
  958. {
  959. PageRange *hot_add_range = &balloon->hot_add_range;
  960. VMBusChanReq *vmreq = &req->vmreq;
  961. struct dm_hot_add_response *msgHaR = vmreq->msg;
  962. OurRange *our_range;
  963. if (balloon->state != S_HOT_ADD_REPLY_WAIT) {
  964. warn_report("unexpected DM_HOT_ADD_RESPONSE in %d state",
  965. balloon->state);
  966. return;
  967. }
  968. assert(balloon->our_range);
  969. our_range = OUR_RANGE(balloon->our_range);
  970. if (!hv_balloon_handle_msg_size(req, sizeof(*msgHaR),
  971. "DM_HOT_ADD_RESPONSE"))
  972. return;
  973. trace_hv_balloon_incoming_hot_add(msgHaR->hdr.trans_id, msgHaR->result,
  974. msgHaR->page_count);
  975. balloon->trans_id++;
  976. if (msgHaR->result) {
  977. if (msgHaR->page_count > balloon->ha_current_count) {
  978. warn_report("DM_HOT_ADD_RESPONSE page count higher than requested (%"PRIu32" vs %"PRIu64")",
  979. msgHaR->page_count, balloon->ha_current_count);
  980. msgHaR->page_count = balloon->ha_current_count;
  981. }
  982. hvb_our_range_mark_added(our_range, msgHaR->page_count);
  983. hot_add_range->start += msgHaR->page_count;
  984. hot_add_range->count -= msgHaR->page_count;
  985. }
  986. if (!msgHaR->result || msgHaR->page_count < balloon->ha_current_count) {
  987. /*
  988. * the current planned range was only partially hot-added, take note
  989. * how much of it remains and don't attempt any further hot adds
  990. */
  991. our_range_mark_remaining_unusable(our_range);
  992. goto ret_idle;
  993. }
  994. /* any pages remaining to hot-add in our range? */
  995. if (hot_add_range->count > 0) {
  996. HV_BALLOON_STATE_DESC_SET(stdesc, S_HOT_ADD_RB_WAIT);
  997. return;
  998. }
  999. ret_idle:
  1000. HV_BALLOON_STATE_DESC_SET(stdesc, S_IDLE);
  1001. }
  1002. static void hv_balloon_handle_balloon_response(HvBalloon *balloon,
  1003. HvBalloonReq *req,
  1004. StateDesc *stdesc)
  1005. {
  1006. VMBusChanReq *vmreq = &req->vmreq;
  1007. struct dm_balloon_response *msgBR = vmreq->msg;
  1008. if (balloon->state != S_BALLOON_REPLY_WAIT) {
  1009. warn_report("unexpected DM_BALLOON_RESPONSE in %d state",
  1010. balloon->state);
  1011. return;
  1012. }
  1013. if (!hv_balloon_handle_msg_size(req, sizeof(*msgBR),
  1014. "DM_BALLOON_RESPONSE"))
  1015. return;
  1016. trace_hv_balloon_incoming_balloon(msgBR->hdr.trans_id, msgBR->range_count,
  1017. msgBR->more_pages);
  1018. if (vmreq->msglen < sizeof(*msgBR) +
  1019. (uint64_t)sizeof(msgBR->range_array[0]) * msgBR->range_count) {
  1020. warn_report("DM_BALLOON_RESPONSE too short for the range count");
  1021. return;
  1022. }
  1023. if (msgBR->range_count == 0) {
  1024. /* The guest is already at its minimum size */
  1025. balloon->balloon_diff = 0;
  1026. goto ret_end_trans;
  1027. } else {
  1028. hv_balloon_handle_remove_ranges(balloon,
  1029. msgBR->range_array,
  1030. msgBR->range_count);
  1031. }
  1032. /* More responses expected? */
  1033. if (msgBR->more_pages) {
  1034. return;
  1035. }
  1036. ret_end_trans:
  1037. balloon->trans_id++;
  1038. if (balloon->balloon_diff > 0) {
  1039. HV_BALLOON_STATE_DESC_SET(stdesc, S_BALLOON_RB_WAIT);
  1040. } else {
  1041. HV_BALLOON_STATE_DESC_SET(stdesc, S_IDLE);
  1042. }
  1043. }
  1044. static void hv_balloon_handle_packet(HvBalloon *balloon, HvBalloonReq *req,
  1045. StateDesc *stdesc)
  1046. {
  1047. VMBusChanReq *vmreq = &req->vmreq;
  1048. struct dm_message *msg = vmreq->msg;
  1049. if (vmreq->msglen < sizeof(msg->hdr)) {
  1050. return;
  1051. }
  1052. switch (msg->hdr.type) {
  1053. case DM_VERSION_REQUEST:
  1054. hv_balloon_handle_version_request(balloon, req, stdesc);
  1055. break;
  1056. case DM_CAPABILITIES_REPORT:
  1057. hv_balloon_handle_caps_report(balloon, req, stdesc);
  1058. break;
  1059. case DM_STATUS_REPORT:
  1060. hv_balloon_handle_status_report(balloon, req);
  1061. break;
  1062. case DM_MEM_HOT_ADD_RESPONSE:
  1063. hv_balloon_handle_hot_add_response(balloon, req, stdesc);
  1064. break;
  1065. case DM_UNBALLOON_RESPONSE:
  1066. hv_balloon_handle_unballoon_response(balloon, req, stdesc);
  1067. break;
  1068. case DM_BALLOON_RESPONSE:
  1069. hv_balloon_handle_balloon_response(balloon, req, stdesc);
  1070. break;
  1071. default:
  1072. warn_report("unknown DM message %u", msg->hdr.type);
  1073. break;
  1074. }
  1075. }
  1076. static bool hv_balloon_recv_channel(HvBalloon *balloon, StateDesc *stdesc)
  1077. {
  1078. VMBusChannel *chan;
  1079. HvBalloonReq *req;
  1080. if (balloon->state == S_WAIT_RESET ||
  1081. balloon->state == S_POST_RESET_CLOSED) {
  1082. return false;
  1083. }
  1084. chan = hv_balloon_get_channel(balloon);
  1085. if (vmbus_channel_recv_start(chan)) {
  1086. return false;
  1087. }
  1088. while ((req = vmbus_channel_recv_peek(chan, sizeof(*req)))) {
  1089. hv_balloon_handle_packet(balloon, req, stdesc);
  1090. vmbus_free_req(req);
  1091. vmbus_channel_recv_pop(chan);
  1092. if (stdesc->state != S_NO_CHANGE) {
  1093. break;
  1094. }
  1095. }
  1096. return vmbus_channel_recv_done(chan) > 0;
  1097. }
  1098. /* old state handler -> new state transition (potential) */
  1099. static bool hv_balloon_event_loop_state(HvBalloon *balloon)
  1100. {
  1101. StateDesc state_new = HV_BALLOON_STATE_DESC_INIT;
  1102. hv_balloon_handle_state(balloon, &state_new);
  1103. return hv_balloon_state_set(balloon, state_new.state, state_new.desc);
  1104. }
  1105. /* VMBus message -> new state transition (potential) */
  1106. static bool hv_balloon_event_loop_recv(HvBalloon *balloon)
  1107. {
  1108. StateDesc state_new = HV_BALLOON_STATE_DESC_INIT;
  1109. bool any_recv, state_changed;
  1110. any_recv = hv_balloon_recv_channel(balloon, &state_new);
  1111. state_changed = hv_balloon_state_set(balloon,
  1112. state_new.state, state_new.desc);
  1113. return state_changed || any_recv;
  1114. }
  1115. static void hv_balloon_event_loop(HvBalloon *balloon)
  1116. {
  1117. bool state_repeat, recv_repeat;
  1118. do {
  1119. state_repeat = hv_balloon_event_loop_state(balloon);
  1120. recv_repeat = hv_balloon_event_loop_recv(balloon);
  1121. } while (state_repeat || recv_repeat);
  1122. }
  1123. static void hv_balloon_vmdev_chan_notify(VMBusChannel *chan)
  1124. {
  1125. HvBalloon *balloon = HV_BALLOON(vmbus_channel_device(chan));
  1126. hv_balloon_event_loop(balloon);
  1127. }
  1128. static void hv_balloon_stat(void *opaque, BalloonInfo *info)
  1129. {
  1130. HvBalloon *balloon = opaque;
  1131. info->actual = (hv_balloon_total_ram(balloon) - balloon->removed_both_ctr)
  1132. << HV_BALLOON_PFN_SHIFT;
  1133. }
  1134. static void hv_balloon_to_target(void *opaque, ram_addr_t target)
  1135. {
  1136. HvBalloon *balloon = opaque;
  1137. uint64_t target_pages = target >> HV_BALLOON_PFN_SHIFT;
  1138. if (!target_pages) {
  1139. return;
  1140. }
  1141. /*
  1142. * always set target_changed, even with unchanged target, as the user
  1143. * might be asking us to try again reaching it
  1144. */
  1145. balloon->target = target_pages;
  1146. balloon->target_changed = true;
  1147. hv_balloon_event_loop(balloon);
  1148. }
  1149. static int hv_balloon_vmdev_open_channel(VMBusChannel *chan)
  1150. {
  1151. HvBalloon *balloon = HV_BALLOON(vmbus_channel_device(chan));
  1152. if (balloon->state != S_POST_RESET_CLOSED) {
  1153. warn_report("guest trying to open a DM channel in invalid %d state",
  1154. balloon->state);
  1155. return -EINVAL;
  1156. }
  1157. HV_BALLOON_SET_STATE(balloon, S_VERSION);
  1158. hv_balloon_event_loop(balloon);
  1159. return 0;
  1160. }
  1161. static void hv_balloon_vmdev_close_channel(VMBusChannel *chan)
  1162. {
  1163. HvBalloon *balloon = HV_BALLOON(vmbus_channel_device(chan));
  1164. timer_del(&balloon->post_init_timer);
  1165. /* Don't report stale data */
  1166. balloon->status_report.received = false;
  1167. HV_BALLOON_SET_STATE(balloon, S_WAIT_RESET);
  1168. hv_balloon_event_loop(balloon);
  1169. }
  1170. static void hv_balloon_post_init_timer(void *opaque)
  1171. {
  1172. HvBalloon *balloon = opaque;
  1173. if (balloon->state != S_POST_INIT_WAIT) {
  1174. return;
  1175. }
  1176. HV_BALLOON_SET_STATE(balloon, S_IDLE);
  1177. hv_balloon_event_loop(balloon);
  1178. }
  1179. static void hv_balloon_system_reset_unrealize_common(HvBalloon *balloon)
  1180. {
  1181. g_clear_pointer(&balloon->our_range, hvb_our_range_memslots_free);
  1182. }
  1183. static void hv_balloon_system_reset(void *opaque)
  1184. {
  1185. HvBalloon *balloon = HV_BALLOON(opaque);
  1186. hv_balloon_system_reset_unrealize_common(balloon);
  1187. }
  1188. static void hv_balloon_ensure_mr(HvBalloon *balloon)
  1189. {
  1190. MemoryRegion *hostmem_mr;
  1191. assert(balloon->hostmem);
  1192. if (balloon->mr) {
  1193. return;
  1194. }
  1195. hostmem_mr = host_memory_backend_get_memory(balloon->hostmem);
  1196. balloon->mr = g_new0(MemoryRegion, 1);
  1197. memory_region_init(balloon->mr, OBJECT(balloon), TYPE_HV_BALLOON,
  1198. memory_region_size(hostmem_mr));
  1199. balloon->mr->align = memory_region_get_alignment(hostmem_mr);
  1200. }
  1201. static void hv_balloon_free_mr(HvBalloon *balloon)
  1202. {
  1203. if (!balloon->mr) {
  1204. return;
  1205. }
  1206. object_unparent(OBJECT(balloon->mr));
  1207. g_clear_pointer(&balloon->mr, g_free);
  1208. }
  1209. static void hv_balloon_vmdev_realize(VMBusDevice *vdev, Error **errp)
  1210. {
  1211. ERRP_GUARD();
  1212. HvBalloon *balloon = HV_BALLOON(vdev);
  1213. int ret;
  1214. balloon->state = S_WAIT_RESET;
  1215. ret = qemu_add_balloon_handler(hv_balloon_to_target, hv_balloon_stat,
  1216. balloon);
  1217. if (ret < 0) {
  1218. /* This also protects against having multiple hv-balloon instances */
  1219. error_setg(errp, "Only one balloon device is supported");
  1220. return;
  1221. }
  1222. if (balloon->hostmem) {
  1223. if (host_memory_backend_is_mapped(balloon->hostmem)) {
  1224. Object *obj = OBJECT(balloon->hostmem);
  1225. error_setg(errp, "'%s' property specifies a busy memdev: %s",
  1226. HV_BALLOON_MEMDEV_PROP,
  1227. object_get_canonical_path_component(obj));
  1228. goto out_balloon_handler;
  1229. }
  1230. hv_balloon_ensure_mr(balloon);
  1231. /* This is rather unlikely to happen, but let's still check for it. */
  1232. if (!QEMU_IS_ALIGNED(memory_region_size(balloon->mr),
  1233. HV_BALLOON_PAGE_SIZE)) {
  1234. error_setg(errp, "'%s' property memdev size has to be a multiple of 0x%" PRIx64,
  1235. HV_BALLOON_MEMDEV_PROP, (uint64_t)HV_BALLOON_PAGE_SIZE);
  1236. goto out_balloon_handler;
  1237. }
  1238. host_memory_backend_set_mapped(balloon->hostmem, true);
  1239. vmstate_register_ram(host_memory_backend_get_memory(balloon->hostmem),
  1240. DEVICE(balloon));
  1241. } else if (balloon->addr) {
  1242. error_setg(errp, "'%s' property must not be set without a memdev",
  1243. HV_BALLOON_MEMDEV_PROP);
  1244. goto out_balloon_handler;
  1245. }
  1246. timer_init_ms(&balloon->post_init_timer, QEMU_CLOCK_VIRTUAL,
  1247. hv_balloon_post_init_timer, balloon);
  1248. qemu_register_reset(hv_balloon_system_reset, balloon);
  1249. return;
  1250. out_balloon_handler:
  1251. qemu_remove_balloon_handler(balloon);
  1252. }
  1253. /*
  1254. * VMBus device reset has to be implemented in case the guest decides to
  1255. * disconnect and reconnect to the VMBus without rebooting the whole system.
  1256. *
  1257. * However, the hot-added memory can't be removed here as Windows keeps on using
  1258. * it until the system is restarted, even after disconnecting from the VMBus.
  1259. */
  1260. static void hv_balloon_vmdev_reset(VMBusDevice *vdev)
  1261. {
  1262. HvBalloon *balloon = HV_BALLOON(vdev);
  1263. if (balloon->state == S_POST_RESET_CLOSED) {
  1264. return;
  1265. }
  1266. if (balloon->our_range) {
  1267. hvb_our_range_clear_removed_trees(OUR_RANGE(balloon->our_range));
  1268. }
  1269. hvb_page_range_tree_destroy(&balloon->removed_guest);
  1270. hvb_page_range_tree_destroy(&balloon->removed_both);
  1271. hvb_page_range_tree_init(&balloon->removed_guest);
  1272. hvb_page_range_tree_init(&balloon->removed_both);
  1273. balloon->trans_id = 0;
  1274. balloon->removed_guest_ctr = 0;
  1275. balloon->removed_both_ctr = 0;
  1276. HV_BALLOON_SET_STATE(balloon, S_POST_RESET_CLOSED);
  1277. hv_balloon_event_loop(balloon);
  1278. }
  1279. /*
  1280. * Clean up things that were (possibly) allocated pre-realization, for example
  1281. * from memory_device_pre_plug(), so we don't leak them if the device don't
  1282. * actually get realized in the end.
  1283. */
  1284. static void hv_balloon_unrealize_finalize_common(HvBalloon *balloon)
  1285. {
  1286. hv_balloon_free_mr(balloon);
  1287. balloon->addr = 0;
  1288. balloon->memslot_count = 0;
  1289. }
  1290. static void hv_balloon_vmdev_unrealize(VMBusDevice *vdev)
  1291. {
  1292. HvBalloon *balloon = HV_BALLOON(vdev);
  1293. qemu_unregister_reset(hv_balloon_system_reset, balloon);
  1294. hv_balloon_system_reset_unrealize_common(balloon);
  1295. qemu_remove_balloon_handler(balloon);
  1296. if (balloon->hostmem) {
  1297. vmstate_unregister_ram(host_memory_backend_get_memory(balloon->hostmem),
  1298. DEVICE(balloon));
  1299. host_memory_backend_set_mapped(balloon->hostmem, false);
  1300. }
  1301. hvb_page_range_tree_destroy(&balloon->removed_guest);
  1302. hvb_page_range_tree_destroy(&balloon->removed_both);
  1303. hv_balloon_unrealize_finalize_common(balloon);
  1304. }
  1305. static uint64_t hv_balloon_md_get_addr(const MemoryDeviceState *md)
  1306. {
  1307. return object_property_get_uint(OBJECT(md), HV_BALLOON_ADDR_PROP,
  1308. &error_abort);
  1309. }
  1310. static void hv_balloon_md_set_addr(MemoryDeviceState *md, uint64_t addr,
  1311. Error **errp)
  1312. {
  1313. object_property_set_uint(OBJECT(md), HV_BALLOON_ADDR_PROP, addr, errp);
  1314. }
  1315. static MemoryRegion *hv_balloon_md_get_memory_region(MemoryDeviceState *md,
  1316. Error **errp)
  1317. {
  1318. HvBalloon *balloon = HV_BALLOON(md);
  1319. if (!balloon->hostmem) {
  1320. return NULL;
  1321. }
  1322. hv_balloon_ensure_mr(balloon);
  1323. return balloon->mr;
  1324. }
  1325. static uint64_t hv_balloon_md_get_min_alignment(const MemoryDeviceState *md)
  1326. {
  1327. /*
  1328. * The VM can indicate an alignment up to 32 GiB. Memory device core can
  1329. * usually only handle/guarantee 1 GiB alignment. The user will have to
  1330. * specify a larger maxmem eventually.
  1331. *
  1332. * The memory device core will warn the user in case maxmem might have to be
  1333. * increased and will fail plugging the device if there is not sufficient
  1334. * space after alignment.
  1335. *
  1336. * TODO: we could do the alignment ourselves in a slightly bigger region.
  1337. * But this feels better, although the warning might be annoying. Maybe
  1338. * we can optimize that in the future (e.g., with such a device on the
  1339. * cmdline place/size the device memory region differently.
  1340. */
  1341. return 32 * GiB;
  1342. }
  1343. static void hv_balloon_md_fill_device_info(const MemoryDeviceState *md,
  1344. MemoryDeviceInfo *info)
  1345. {
  1346. HvBalloonDeviceInfo *hi = g_new0(HvBalloonDeviceInfo, 1);
  1347. const HvBalloon *balloon = HV_BALLOON(md);
  1348. DeviceState *dev = DEVICE(md);
  1349. if (dev->id) {
  1350. hi->id = g_strdup(dev->id);
  1351. }
  1352. if (balloon->hostmem) {
  1353. hi->memdev = object_get_canonical_path(OBJECT(balloon->hostmem));
  1354. hi->memaddr = balloon->addr;
  1355. hi->has_memaddr = true;
  1356. hi->max_size = memory_region_size(balloon->mr);
  1357. /* TODO: expose current provided size or something else? */
  1358. } else {
  1359. hi->max_size = 0;
  1360. }
  1361. info->u.hv_balloon.data = hi;
  1362. info->type = MEMORY_DEVICE_INFO_KIND_HV_BALLOON;
  1363. }
  1364. static void hv_balloon_decide_memslots(MemoryDeviceState *md,
  1365. unsigned int limit)
  1366. {
  1367. HvBalloon *balloon = HV_BALLOON(md);
  1368. MemoryRegion *hostmem_mr;
  1369. uint64_t region_size, memslot_size, memslots;
  1370. /* We're called exactly once, before realizing the device. */
  1371. assert(!balloon->memslot_count);
  1372. /* We should not be called if we don't have a memory backend */
  1373. assert(balloon->hostmem);
  1374. hostmem_mr = host_memory_backend_get_memory(balloon->hostmem);
  1375. region_size = memory_region_size(hostmem_mr);
  1376. assert(region_size > 0);
  1377. memslot_size = QEMU_ALIGN_UP(region_size / limit,
  1378. HV_BALLOON_HA_MEMSLOT_SIZE_ALIGN);
  1379. memslots = QEMU_ALIGN_UP(region_size, memslot_size) / memslot_size;
  1380. if (memslots > 1) {
  1381. balloon->memslot_size = memslot_size;
  1382. } else {
  1383. balloon->memslot_size = region_size;
  1384. }
  1385. assert(memslots <= UINT_MAX);
  1386. balloon->memslot_count = memslots;
  1387. }
  1388. static unsigned int hv_balloon_get_memslots(MemoryDeviceState *md)
  1389. {
  1390. const HvBalloon *balloon = HV_BALLOON(md);
  1391. /* We're called after setting the suggested limit. */
  1392. assert(balloon->memslot_count > 0);
  1393. return balloon->memslot_count;
  1394. }
  1395. static void hv_balloon_init(Object *obj)
  1396. {
  1397. }
  1398. static void hv_balloon_finalize(Object *obj)
  1399. {
  1400. HvBalloon *balloon = HV_BALLOON(obj);
  1401. hv_balloon_unrealize_finalize_common(balloon);
  1402. }
  1403. static const Property hv_balloon_properties[] = {
  1404. DEFINE_PROP_BOOL("status-report", HvBalloon,
  1405. status_report.enabled, false),
  1406. /* MEMORY_DEVICE props */
  1407. DEFINE_PROP_LINK(HV_BALLOON_MEMDEV_PROP, HvBalloon, hostmem,
  1408. TYPE_MEMORY_BACKEND, HostMemoryBackend *),
  1409. DEFINE_PROP_UINT64(HV_BALLOON_ADDR_PROP, HvBalloon, addr, 0),
  1410. };
  1411. static void hv_balloon_class_init(ObjectClass *klass, void *data)
  1412. {
  1413. DeviceClass *dc = DEVICE_CLASS(klass);
  1414. VMBusDeviceClass *vdc = VMBUS_DEVICE_CLASS(klass);
  1415. MemoryDeviceClass *mdc = MEMORY_DEVICE_CLASS(klass);
  1416. device_class_set_props(dc, hv_balloon_properties);
  1417. qemu_uuid_parse(HV_BALLOON_GUID, &vdc->classid);
  1418. set_bit(DEVICE_CATEGORY_MISC, dc->categories);
  1419. vdc->vmdev_realize = hv_balloon_vmdev_realize;
  1420. vdc->vmdev_unrealize = hv_balloon_vmdev_unrealize;
  1421. vdc->vmdev_reset = hv_balloon_vmdev_reset;
  1422. vdc->open_channel = hv_balloon_vmdev_open_channel;
  1423. vdc->close_channel = hv_balloon_vmdev_close_channel;
  1424. vdc->chan_notify_cb = hv_balloon_vmdev_chan_notify;
  1425. mdc->get_addr = hv_balloon_md_get_addr;
  1426. mdc->set_addr = hv_balloon_md_set_addr;
  1427. mdc->get_plugged_size = memory_device_get_region_size;
  1428. mdc->get_memory_region = hv_balloon_md_get_memory_region;
  1429. mdc->decide_memslots = hv_balloon_decide_memslots;
  1430. mdc->get_memslots = hv_balloon_get_memslots;
  1431. mdc->get_min_alignment = hv_balloon_md_get_min_alignment;
  1432. mdc->fill_device_info = hv_balloon_md_fill_device_info;
  1433. }