virtio-mem.c 45 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387
  1. /*
  2. * Virtio MEM device
  3. *
  4. * Copyright (C) 2020 Red Hat, Inc.
  5. *
  6. * Authors:
  7. * David Hildenbrand <david@redhat.com>
  8. *
  9. * This work is licensed under the terms of the GNU GPL, version 2.
  10. * See the COPYING file in the top-level directory.
  11. */
  12. #include "qemu/osdep.h"
  13. #include "qemu/iov.h"
  14. #include "qemu/cutils.h"
  15. #include "qemu/error-report.h"
  16. #include "qemu/units.h"
  17. #include "sysemu/numa.h"
  18. #include "sysemu/sysemu.h"
  19. #include "sysemu/reset.h"
  20. #include "hw/virtio/virtio.h"
  21. #include "hw/virtio/virtio-bus.h"
  22. #include "hw/virtio/virtio-access.h"
  23. #include "hw/virtio/virtio-mem.h"
  24. #include "qapi/error.h"
  25. #include "qapi/visitor.h"
  26. #include "exec/ram_addr.h"
  27. #include "migration/misc.h"
  28. #include "hw/boards.h"
  29. #include "hw/qdev-properties.h"
  30. #include CONFIG_DEVICES
  31. #include "trace.h"
  32. /*
  33. * We only had legacy x86 guests that did not support
  34. * VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE. Other targets don't have legacy guests.
  35. */
  36. #if defined(TARGET_X86_64) || defined(TARGET_I386)
  37. #define VIRTIO_MEM_HAS_LEGACY_GUESTS
  38. #endif
  39. /*
  40. * Let's not allow blocks smaller than 1 MiB, for example, to keep the tracking
  41. * bitmap small.
  42. */
  43. #define VIRTIO_MEM_MIN_BLOCK_SIZE ((uint32_t)(1 * MiB))
  44. static uint32_t virtio_mem_default_thp_size(void)
  45. {
  46. uint32_t default_thp_size = VIRTIO_MEM_MIN_BLOCK_SIZE;
  47. #if defined(__x86_64__) || defined(__arm__) || defined(__powerpc64__)
  48. default_thp_size = 2 * MiB;
  49. #elif defined(__aarch64__)
  50. if (qemu_real_host_page_size() == 4 * KiB) {
  51. default_thp_size = 2 * MiB;
  52. } else if (qemu_real_host_page_size() == 16 * KiB) {
  53. default_thp_size = 32 * MiB;
  54. } else if (qemu_real_host_page_size() == 64 * KiB) {
  55. default_thp_size = 512 * MiB;
  56. }
  57. #endif
  58. return default_thp_size;
  59. }
  60. /*
  61. * We want to have a reasonable default block size such that
  62. * 1. We avoid splitting THPs when unplugging memory, which degrades
  63. * performance.
  64. * 2. We avoid placing THPs for plugged blocks that also cover unplugged
  65. * blocks.
  66. *
  67. * The actual THP size might differ between Linux kernels, so we try to probe
  68. * it. In the future (if we ever run into issues regarding 2.), we might want
  69. * to disable THP in case we fail to properly probe the THP size, or if the
  70. * block size is configured smaller than the THP size.
  71. */
  72. static uint32_t thp_size;
  73. #define HPAGE_PMD_SIZE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size"
  74. static uint32_t virtio_mem_thp_size(void)
  75. {
  76. gchar *content = NULL;
  77. const char *endptr;
  78. uint64_t tmp;
  79. if (thp_size) {
  80. return thp_size;
  81. }
  82. /*
  83. * Try to probe the actual THP size, fallback to (sane but eventually
  84. * incorrect) default sizes.
  85. */
  86. if (g_file_get_contents(HPAGE_PMD_SIZE_PATH, &content, NULL, NULL) &&
  87. !qemu_strtou64(content, &endptr, 0, &tmp) &&
  88. (!endptr || *endptr == '\n')) {
  89. /* Sanity-check the value and fallback to something reasonable. */
  90. if (!tmp || !is_power_of_2(tmp)) {
  91. warn_report("Read unsupported THP size: %" PRIx64, tmp);
  92. } else {
  93. thp_size = tmp;
  94. }
  95. }
  96. if (!thp_size) {
  97. thp_size = virtio_mem_default_thp_size();
  98. warn_report("Could not detect THP size, falling back to %" PRIx64
  99. " MiB.", thp_size / MiB);
  100. }
  101. g_free(content);
  102. return thp_size;
  103. }
  104. static uint64_t virtio_mem_default_block_size(RAMBlock *rb)
  105. {
  106. const uint64_t page_size = qemu_ram_pagesize(rb);
  107. /* We can have hugetlbfs with a page size smaller than the THP size. */
  108. if (page_size == qemu_real_host_page_size()) {
  109. return MAX(page_size, virtio_mem_thp_size());
  110. }
  111. return MAX(page_size, VIRTIO_MEM_MIN_BLOCK_SIZE);
  112. }
  113. #if defined(VIRTIO_MEM_HAS_LEGACY_GUESTS)
  114. static bool virtio_mem_has_shared_zeropage(RAMBlock *rb)
  115. {
  116. /*
  117. * We only have a guaranteed shared zeropage on ordinary MAP_PRIVATE
  118. * anonymous RAM. In any other case, reading unplugged *can* populate a
  119. * fresh page, consuming actual memory.
  120. */
  121. return !qemu_ram_is_shared(rb) && rb->fd < 0 &&
  122. qemu_ram_pagesize(rb) == qemu_real_host_page_size();
  123. }
  124. #endif /* VIRTIO_MEM_HAS_LEGACY_GUESTS */
  125. /*
  126. * Size the usable region bigger than the requested size if possible. Esp.
  127. * Linux guests will only add (aligned) memory blocks in case they fully
  128. * fit into the usable region, but plug+online only a subset of the pages.
  129. * The memory block size corresponds mostly to the section size.
  130. *
  131. * This allows e.g., to add 20MB with a section size of 128MB on x86_64, and
  132. * a section size of 512MB on arm64 (as long as the start address is properly
  133. * aligned, similar to ordinary DIMMs).
  134. *
  135. * We can change this at any time and maybe even make it configurable if
  136. * necessary (as the section size can change). But it's more likely that the
  137. * section size will rather get smaller and not bigger over time.
  138. */
  139. #if defined(TARGET_X86_64) || defined(TARGET_I386)
  140. #define VIRTIO_MEM_USABLE_EXTENT (2 * (128 * MiB))
  141. #elif defined(TARGET_ARM)
  142. #define VIRTIO_MEM_USABLE_EXTENT (2 * (512 * MiB))
  143. #else
  144. #error VIRTIO_MEM_USABLE_EXTENT not defined
  145. #endif
  146. static bool virtio_mem_is_busy(void)
  147. {
  148. /*
  149. * Postcopy cannot handle concurrent discards and we don't want to migrate
  150. * pages on-demand with stale content when plugging new blocks.
  151. *
  152. * For precopy, we don't want unplugged blocks in our migration stream, and
  153. * when plugging new blocks, the page content might differ between source
  154. * and destination (observable by the guest when not initializing pages
  155. * after plugging them) until we're running on the destination (as we didn't
  156. * migrate these blocks when they were unplugged).
  157. */
  158. return migration_in_incoming_postcopy() || !migration_is_idle();
  159. }
  160. typedef int (*virtio_mem_range_cb)(const VirtIOMEM *vmem, void *arg,
  161. uint64_t offset, uint64_t size);
  162. static int virtio_mem_for_each_unplugged_range(const VirtIOMEM *vmem, void *arg,
  163. virtio_mem_range_cb cb)
  164. {
  165. unsigned long first_zero_bit, last_zero_bit;
  166. uint64_t offset, size;
  167. int ret = 0;
  168. first_zero_bit = find_first_zero_bit(vmem->bitmap, vmem->bitmap_size);
  169. while (first_zero_bit < vmem->bitmap_size) {
  170. offset = first_zero_bit * vmem->block_size;
  171. last_zero_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
  172. first_zero_bit + 1) - 1;
  173. size = (last_zero_bit - first_zero_bit + 1) * vmem->block_size;
  174. ret = cb(vmem, arg, offset, size);
  175. if (ret) {
  176. break;
  177. }
  178. first_zero_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
  179. last_zero_bit + 2);
  180. }
  181. return ret;
  182. }
  183. /*
  184. * Adjust the memory section to cover the intersection with the given range.
  185. *
  186. * Returns false if the intersection is empty, otherwise returns true.
  187. */
  188. static bool virito_mem_intersect_memory_section(MemoryRegionSection *s,
  189. uint64_t offset, uint64_t size)
  190. {
  191. uint64_t start = MAX(s->offset_within_region, offset);
  192. uint64_t end = MIN(s->offset_within_region + int128_get64(s->size),
  193. offset + size);
  194. if (end <= start) {
  195. return false;
  196. }
  197. s->offset_within_address_space += start - s->offset_within_region;
  198. s->offset_within_region = start;
  199. s->size = int128_make64(end - start);
  200. return true;
  201. }
  202. typedef int (*virtio_mem_section_cb)(MemoryRegionSection *s, void *arg);
  203. static int virtio_mem_for_each_plugged_section(const VirtIOMEM *vmem,
  204. MemoryRegionSection *s,
  205. void *arg,
  206. virtio_mem_section_cb cb)
  207. {
  208. unsigned long first_bit, last_bit;
  209. uint64_t offset, size;
  210. int ret = 0;
  211. first_bit = s->offset_within_region / vmem->bitmap_size;
  212. first_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size, first_bit);
  213. while (first_bit < vmem->bitmap_size) {
  214. MemoryRegionSection tmp = *s;
  215. offset = first_bit * vmem->block_size;
  216. last_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
  217. first_bit + 1) - 1;
  218. size = (last_bit - first_bit + 1) * vmem->block_size;
  219. if (!virito_mem_intersect_memory_section(&tmp, offset, size)) {
  220. break;
  221. }
  222. ret = cb(&tmp, arg);
  223. if (ret) {
  224. break;
  225. }
  226. first_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
  227. last_bit + 2);
  228. }
  229. return ret;
  230. }
  231. static int virtio_mem_for_each_unplugged_section(const VirtIOMEM *vmem,
  232. MemoryRegionSection *s,
  233. void *arg,
  234. virtio_mem_section_cb cb)
  235. {
  236. unsigned long first_bit, last_bit;
  237. uint64_t offset, size;
  238. int ret = 0;
  239. first_bit = s->offset_within_region / vmem->bitmap_size;
  240. first_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size, first_bit);
  241. while (first_bit < vmem->bitmap_size) {
  242. MemoryRegionSection tmp = *s;
  243. offset = first_bit * vmem->block_size;
  244. last_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
  245. first_bit + 1) - 1;
  246. size = (last_bit - first_bit + 1) * vmem->block_size;
  247. if (!virito_mem_intersect_memory_section(&tmp, offset, size)) {
  248. break;
  249. }
  250. ret = cb(&tmp, arg);
  251. if (ret) {
  252. break;
  253. }
  254. first_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
  255. last_bit + 2);
  256. }
  257. return ret;
  258. }
  259. static int virtio_mem_notify_populate_cb(MemoryRegionSection *s, void *arg)
  260. {
  261. RamDiscardListener *rdl = arg;
  262. return rdl->notify_populate(rdl, s);
  263. }
  264. static int virtio_mem_notify_discard_cb(MemoryRegionSection *s, void *arg)
  265. {
  266. RamDiscardListener *rdl = arg;
  267. rdl->notify_discard(rdl, s);
  268. return 0;
  269. }
  270. static void virtio_mem_notify_unplug(VirtIOMEM *vmem, uint64_t offset,
  271. uint64_t size)
  272. {
  273. RamDiscardListener *rdl;
  274. QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
  275. MemoryRegionSection tmp = *rdl->section;
  276. if (!virito_mem_intersect_memory_section(&tmp, offset, size)) {
  277. continue;
  278. }
  279. rdl->notify_discard(rdl, &tmp);
  280. }
  281. }
  282. static int virtio_mem_notify_plug(VirtIOMEM *vmem, uint64_t offset,
  283. uint64_t size)
  284. {
  285. RamDiscardListener *rdl, *rdl2;
  286. int ret = 0;
  287. QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
  288. MemoryRegionSection tmp = *rdl->section;
  289. if (!virito_mem_intersect_memory_section(&tmp, offset, size)) {
  290. continue;
  291. }
  292. ret = rdl->notify_populate(rdl, &tmp);
  293. if (ret) {
  294. break;
  295. }
  296. }
  297. if (ret) {
  298. /* Notify all already-notified listeners. */
  299. QLIST_FOREACH(rdl2, &vmem->rdl_list, next) {
  300. MemoryRegionSection tmp = *rdl->section;
  301. if (rdl2 == rdl) {
  302. break;
  303. }
  304. if (!virito_mem_intersect_memory_section(&tmp, offset, size)) {
  305. continue;
  306. }
  307. rdl2->notify_discard(rdl2, &tmp);
  308. }
  309. }
  310. return ret;
  311. }
  312. static void virtio_mem_notify_unplug_all(VirtIOMEM *vmem)
  313. {
  314. RamDiscardListener *rdl;
  315. if (!vmem->size) {
  316. return;
  317. }
  318. QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
  319. if (rdl->double_discard_supported) {
  320. rdl->notify_discard(rdl, rdl->section);
  321. } else {
  322. virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
  323. virtio_mem_notify_discard_cb);
  324. }
  325. }
  326. }
  327. static bool virtio_mem_test_bitmap(const VirtIOMEM *vmem, uint64_t start_gpa,
  328. uint64_t size, bool plugged)
  329. {
  330. const unsigned long first_bit = (start_gpa - vmem->addr) / vmem->block_size;
  331. const unsigned long last_bit = first_bit + (size / vmem->block_size) - 1;
  332. unsigned long found_bit;
  333. /* We fake a shorter bitmap to avoid searching too far. */
  334. if (plugged) {
  335. found_bit = find_next_zero_bit(vmem->bitmap, last_bit + 1, first_bit);
  336. } else {
  337. found_bit = find_next_bit(vmem->bitmap, last_bit + 1, first_bit);
  338. }
  339. return found_bit > last_bit;
  340. }
  341. static void virtio_mem_set_bitmap(VirtIOMEM *vmem, uint64_t start_gpa,
  342. uint64_t size, bool plugged)
  343. {
  344. const unsigned long bit = (start_gpa - vmem->addr) / vmem->block_size;
  345. const unsigned long nbits = size / vmem->block_size;
  346. if (plugged) {
  347. bitmap_set(vmem->bitmap, bit, nbits);
  348. } else {
  349. bitmap_clear(vmem->bitmap, bit, nbits);
  350. }
  351. }
  352. static void virtio_mem_send_response(VirtIOMEM *vmem, VirtQueueElement *elem,
  353. struct virtio_mem_resp *resp)
  354. {
  355. VirtIODevice *vdev = VIRTIO_DEVICE(vmem);
  356. VirtQueue *vq = vmem->vq;
  357. trace_virtio_mem_send_response(le16_to_cpu(resp->type));
  358. iov_from_buf(elem->in_sg, elem->in_num, 0, resp, sizeof(*resp));
  359. virtqueue_push(vq, elem, sizeof(*resp));
  360. virtio_notify(vdev, vq);
  361. }
  362. static void virtio_mem_send_response_simple(VirtIOMEM *vmem,
  363. VirtQueueElement *elem,
  364. uint16_t type)
  365. {
  366. struct virtio_mem_resp resp = {
  367. .type = cpu_to_le16(type),
  368. };
  369. virtio_mem_send_response(vmem, elem, &resp);
  370. }
  371. static bool virtio_mem_valid_range(const VirtIOMEM *vmem, uint64_t gpa,
  372. uint64_t size)
  373. {
  374. if (!QEMU_IS_ALIGNED(gpa, vmem->block_size)) {
  375. return false;
  376. }
  377. if (gpa + size < gpa || !size) {
  378. return false;
  379. }
  380. if (gpa < vmem->addr || gpa >= vmem->addr + vmem->usable_region_size) {
  381. return false;
  382. }
  383. if (gpa + size > vmem->addr + vmem->usable_region_size) {
  384. return false;
  385. }
  386. return true;
  387. }
  388. static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa,
  389. uint64_t size, bool plug)
  390. {
  391. const uint64_t offset = start_gpa - vmem->addr;
  392. RAMBlock *rb = vmem->memdev->mr.ram_block;
  393. if (virtio_mem_is_busy()) {
  394. return -EBUSY;
  395. }
  396. if (!plug) {
  397. if (ram_block_discard_range(rb, offset, size)) {
  398. return -EBUSY;
  399. }
  400. virtio_mem_notify_unplug(vmem, offset, size);
  401. } else {
  402. int ret = 0;
  403. if (vmem->prealloc) {
  404. void *area = memory_region_get_ram_ptr(&vmem->memdev->mr) + offset;
  405. int fd = memory_region_get_fd(&vmem->memdev->mr);
  406. Error *local_err = NULL;
  407. os_mem_prealloc(fd, area, size, 1, &local_err);
  408. if (local_err) {
  409. static bool warned;
  410. /*
  411. * Warn only once, we don't want to fill the log with these
  412. * warnings.
  413. */
  414. if (!warned) {
  415. warn_report_err(local_err);
  416. warned = true;
  417. } else {
  418. error_free(local_err);
  419. }
  420. ret = -EBUSY;
  421. }
  422. }
  423. if (!ret) {
  424. ret = virtio_mem_notify_plug(vmem, offset, size);
  425. }
  426. if (ret) {
  427. /* Could be preallocation or a notifier populated memory. */
  428. ram_block_discard_range(vmem->memdev->mr.ram_block, offset, size);
  429. return -EBUSY;
  430. }
  431. }
  432. virtio_mem_set_bitmap(vmem, start_gpa, size, plug);
  433. return 0;
  434. }
  435. static int virtio_mem_state_change_request(VirtIOMEM *vmem, uint64_t gpa,
  436. uint16_t nb_blocks, bool plug)
  437. {
  438. const uint64_t size = nb_blocks * vmem->block_size;
  439. int ret;
  440. if (!virtio_mem_valid_range(vmem, gpa, size)) {
  441. return VIRTIO_MEM_RESP_ERROR;
  442. }
  443. if (plug && (vmem->size + size > vmem->requested_size)) {
  444. return VIRTIO_MEM_RESP_NACK;
  445. }
  446. /* test if really all blocks are in the opposite state */
  447. if (!virtio_mem_test_bitmap(vmem, gpa, size, !plug)) {
  448. return VIRTIO_MEM_RESP_ERROR;
  449. }
  450. ret = virtio_mem_set_block_state(vmem, gpa, size, plug);
  451. if (ret) {
  452. return VIRTIO_MEM_RESP_BUSY;
  453. }
  454. if (plug) {
  455. vmem->size += size;
  456. } else {
  457. vmem->size -= size;
  458. }
  459. notifier_list_notify(&vmem->size_change_notifiers, &vmem->size);
  460. return VIRTIO_MEM_RESP_ACK;
  461. }
  462. static void virtio_mem_plug_request(VirtIOMEM *vmem, VirtQueueElement *elem,
  463. struct virtio_mem_req *req)
  464. {
  465. const uint64_t gpa = le64_to_cpu(req->u.plug.addr);
  466. const uint16_t nb_blocks = le16_to_cpu(req->u.plug.nb_blocks);
  467. uint16_t type;
  468. trace_virtio_mem_plug_request(gpa, nb_blocks);
  469. type = virtio_mem_state_change_request(vmem, gpa, nb_blocks, true);
  470. virtio_mem_send_response_simple(vmem, elem, type);
  471. }
  472. static void virtio_mem_unplug_request(VirtIOMEM *vmem, VirtQueueElement *elem,
  473. struct virtio_mem_req *req)
  474. {
  475. const uint64_t gpa = le64_to_cpu(req->u.unplug.addr);
  476. const uint16_t nb_blocks = le16_to_cpu(req->u.unplug.nb_blocks);
  477. uint16_t type;
  478. trace_virtio_mem_unplug_request(gpa, nb_blocks);
  479. type = virtio_mem_state_change_request(vmem, gpa, nb_blocks, false);
  480. virtio_mem_send_response_simple(vmem, elem, type);
  481. }
  482. static void virtio_mem_resize_usable_region(VirtIOMEM *vmem,
  483. uint64_t requested_size,
  484. bool can_shrink)
  485. {
  486. uint64_t newsize = MIN(memory_region_size(&vmem->memdev->mr),
  487. requested_size + VIRTIO_MEM_USABLE_EXTENT);
  488. /* The usable region size always has to be multiples of the block size. */
  489. newsize = QEMU_ALIGN_UP(newsize, vmem->block_size);
  490. if (!requested_size) {
  491. newsize = 0;
  492. }
  493. if (newsize < vmem->usable_region_size && !can_shrink) {
  494. return;
  495. }
  496. trace_virtio_mem_resized_usable_region(vmem->usable_region_size, newsize);
  497. vmem->usable_region_size = newsize;
  498. }
  499. static int virtio_mem_unplug_all(VirtIOMEM *vmem)
  500. {
  501. RAMBlock *rb = vmem->memdev->mr.ram_block;
  502. if (virtio_mem_is_busy()) {
  503. return -EBUSY;
  504. }
  505. if (ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb))) {
  506. return -EBUSY;
  507. }
  508. virtio_mem_notify_unplug_all(vmem);
  509. bitmap_clear(vmem->bitmap, 0, vmem->bitmap_size);
  510. if (vmem->size) {
  511. vmem->size = 0;
  512. notifier_list_notify(&vmem->size_change_notifiers, &vmem->size);
  513. }
  514. trace_virtio_mem_unplugged_all();
  515. virtio_mem_resize_usable_region(vmem, vmem->requested_size, true);
  516. return 0;
  517. }
  518. static void virtio_mem_unplug_all_request(VirtIOMEM *vmem,
  519. VirtQueueElement *elem)
  520. {
  521. trace_virtio_mem_unplug_all_request();
  522. if (virtio_mem_unplug_all(vmem)) {
  523. virtio_mem_send_response_simple(vmem, elem, VIRTIO_MEM_RESP_BUSY);
  524. } else {
  525. virtio_mem_send_response_simple(vmem, elem, VIRTIO_MEM_RESP_ACK);
  526. }
  527. }
  528. static void virtio_mem_state_request(VirtIOMEM *vmem, VirtQueueElement *elem,
  529. struct virtio_mem_req *req)
  530. {
  531. const uint16_t nb_blocks = le16_to_cpu(req->u.state.nb_blocks);
  532. const uint64_t gpa = le64_to_cpu(req->u.state.addr);
  533. const uint64_t size = nb_blocks * vmem->block_size;
  534. struct virtio_mem_resp resp = {
  535. .type = cpu_to_le16(VIRTIO_MEM_RESP_ACK),
  536. };
  537. trace_virtio_mem_state_request(gpa, nb_blocks);
  538. if (!virtio_mem_valid_range(vmem, gpa, size)) {
  539. virtio_mem_send_response_simple(vmem, elem, VIRTIO_MEM_RESP_ERROR);
  540. return;
  541. }
  542. if (virtio_mem_test_bitmap(vmem, gpa, size, true)) {
  543. resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_PLUGGED);
  544. } else if (virtio_mem_test_bitmap(vmem, gpa, size, false)) {
  545. resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_UNPLUGGED);
  546. } else {
  547. resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_MIXED);
  548. }
  549. trace_virtio_mem_state_response(le16_to_cpu(resp.u.state.state));
  550. virtio_mem_send_response(vmem, elem, &resp);
  551. }
  552. static void virtio_mem_handle_request(VirtIODevice *vdev, VirtQueue *vq)
  553. {
  554. const int len = sizeof(struct virtio_mem_req);
  555. VirtIOMEM *vmem = VIRTIO_MEM(vdev);
  556. VirtQueueElement *elem;
  557. struct virtio_mem_req req;
  558. uint16_t type;
  559. while (true) {
  560. elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
  561. if (!elem) {
  562. return;
  563. }
  564. if (iov_to_buf(elem->out_sg, elem->out_num, 0, &req, len) < len) {
  565. virtio_error(vdev, "virtio-mem protocol violation: invalid request"
  566. " size: %d", len);
  567. virtqueue_detach_element(vq, elem, 0);
  568. g_free(elem);
  569. return;
  570. }
  571. if (iov_size(elem->in_sg, elem->in_num) <
  572. sizeof(struct virtio_mem_resp)) {
  573. virtio_error(vdev, "virtio-mem protocol violation: not enough space"
  574. " for response: %zu",
  575. iov_size(elem->in_sg, elem->in_num));
  576. virtqueue_detach_element(vq, elem, 0);
  577. g_free(elem);
  578. return;
  579. }
  580. type = le16_to_cpu(req.type);
  581. switch (type) {
  582. case VIRTIO_MEM_REQ_PLUG:
  583. virtio_mem_plug_request(vmem, elem, &req);
  584. break;
  585. case VIRTIO_MEM_REQ_UNPLUG:
  586. virtio_mem_unplug_request(vmem, elem, &req);
  587. break;
  588. case VIRTIO_MEM_REQ_UNPLUG_ALL:
  589. virtio_mem_unplug_all_request(vmem, elem);
  590. break;
  591. case VIRTIO_MEM_REQ_STATE:
  592. virtio_mem_state_request(vmem, elem, &req);
  593. break;
  594. default:
  595. virtio_error(vdev, "virtio-mem protocol violation: unknown request"
  596. " type: %d", type);
  597. virtqueue_detach_element(vq, elem, 0);
  598. g_free(elem);
  599. return;
  600. }
  601. g_free(elem);
  602. }
  603. }
  604. static void virtio_mem_get_config(VirtIODevice *vdev, uint8_t *config_data)
  605. {
  606. VirtIOMEM *vmem = VIRTIO_MEM(vdev);
  607. struct virtio_mem_config *config = (void *) config_data;
  608. config->block_size = cpu_to_le64(vmem->block_size);
  609. config->node_id = cpu_to_le16(vmem->node);
  610. config->requested_size = cpu_to_le64(vmem->requested_size);
  611. config->plugged_size = cpu_to_le64(vmem->size);
  612. config->addr = cpu_to_le64(vmem->addr);
  613. config->region_size = cpu_to_le64(memory_region_size(&vmem->memdev->mr));
  614. config->usable_region_size = cpu_to_le64(vmem->usable_region_size);
  615. }
  616. static uint64_t virtio_mem_get_features(VirtIODevice *vdev, uint64_t features,
  617. Error **errp)
  618. {
  619. MachineState *ms = MACHINE(qdev_get_machine());
  620. VirtIOMEM *vmem = VIRTIO_MEM(vdev);
  621. if (ms->numa_state) {
  622. #if defined(CONFIG_ACPI)
  623. virtio_add_feature(&features, VIRTIO_MEM_F_ACPI_PXM);
  624. #endif
  625. }
  626. assert(vmem->unplugged_inaccessible != ON_OFF_AUTO_AUTO);
  627. if (vmem->unplugged_inaccessible == ON_OFF_AUTO_ON) {
  628. virtio_add_feature(&features, VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE);
  629. }
  630. return features;
  631. }
  632. static int virtio_mem_validate_features(VirtIODevice *vdev)
  633. {
  634. if (virtio_host_has_feature(vdev, VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE) &&
  635. !virtio_vdev_has_feature(vdev, VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE)) {
  636. return -EFAULT;
  637. }
  638. return 0;
  639. }
  640. static void virtio_mem_system_reset(void *opaque)
  641. {
  642. VirtIOMEM *vmem = VIRTIO_MEM(opaque);
  643. /*
  644. * During usual resets, we will unplug all memory and shrink the usable
  645. * region size. This is, however, not possible in all scenarios. Then,
  646. * the guest has to deal with this manually (VIRTIO_MEM_REQ_UNPLUG_ALL).
  647. */
  648. virtio_mem_unplug_all(vmem);
  649. }
  650. static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
  651. {
  652. MachineState *ms = MACHINE(qdev_get_machine());
  653. int nb_numa_nodes = ms->numa_state ? ms->numa_state->num_nodes : 0;
  654. VirtIODevice *vdev = VIRTIO_DEVICE(dev);
  655. VirtIOMEM *vmem = VIRTIO_MEM(dev);
  656. uint64_t page_size;
  657. RAMBlock *rb;
  658. int ret;
  659. if (!vmem->memdev) {
  660. error_setg(errp, "'%s' property is not set", VIRTIO_MEM_MEMDEV_PROP);
  661. return;
  662. } else if (host_memory_backend_is_mapped(vmem->memdev)) {
  663. error_setg(errp, "'%s' property specifies a busy memdev: %s",
  664. VIRTIO_MEM_MEMDEV_PROP,
  665. object_get_canonical_path_component(OBJECT(vmem->memdev)));
  666. return;
  667. } else if (!memory_region_is_ram(&vmem->memdev->mr) ||
  668. memory_region_is_rom(&vmem->memdev->mr) ||
  669. !vmem->memdev->mr.ram_block) {
  670. error_setg(errp, "'%s' property specifies an unsupported memdev",
  671. VIRTIO_MEM_MEMDEV_PROP);
  672. return;
  673. }
  674. if ((nb_numa_nodes && vmem->node >= nb_numa_nodes) ||
  675. (!nb_numa_nodes && vmem->node)) {
  676. error_setg(errp, "'%s' property has value '%" PRIu32 "', which exceeds"
  677. "the number of numa nodes: %d", VIRTIO_MEM_NODE_PROP,
  678. vmem->node, nb_numa_nodes ? nb_numa_nodes : 1);
  679. return;
  680. }
  681. if (enable_mlock) {
  682. error_setg(errp, "Incompatible with mlock");
  683. return;
  684. }
  685. rb = vmem->memdev->mr.ram_block;
  686. page_size = qemu_ram_pagesize(rb);
  687. #if defined(VIRTIO_MEM_HAS_LEGACY_GUESTS)
  688. switch (vmem->unplugged_inaccessible) {
  689. case ON_OFF_AUTO_AUTO:
  690. if (virtio_mem_has_shared_zeropage(rb)) {
  691. vmem->unplugged_inaccessible = ON_OFF_AUTO_OFF;
  692. } else {
  693. vmem->unplugged_inaccessible = ON_OFF_AUTO_ON;
  694. }
  695. break;
  696. case ON_OFF_AUTO_OFF:
  697. if (!virtio_mem_has_shared_zeropage(rb)) {
  698. warn_report("'%s' property set to 'off' with a memdev that does"
  699. " not support the shared zeropage.",
  700. VIRTIO_MEM_UNPLUGGED_INACCESSIBLE_PROP);
  701. }
  702. break;
  703. default:
  704. break;
  705. }
  706. #else /* VIRTIO_MEM_HAS_LEGACY_GUESTS */
  707. vmem->unplugged_inaccessible = ON_OFF_AUTO_ON;
  708. #endif /* VIRTIO_MEM_HAS_LEGACY_GUESTS */
  709. /*
  710. * If the block size wasn't configured by the user, use a sane default. This
  711. * allows using hugetlbfs backends of any page size without manual
  712. * intervention.
  713. */
  714. if (!vmem->block_size) {
  715. vmem->block_size = virtio_mem_default_block_size(rb);
  716. }
  717. if (vmem->block_size < page_size) {
  718. error_setg(errp, "'%s' property has to be at least the page size (0x%"
  719. PRIx64 ")", VIRTIO_MEM_BLOCK_SIZE_PROP, page_size);
  720. return;
  721. } else if (vmem->block_size < virtio_mem_default_block_size(rb)) {
  722. warn_report("'%s' property is smaller than the default block size (%"
  723. PRIx64 " MiB)", VIRTIO_MEM_BLOCK_SIZE_PROP,
  724. virtio_mem_default_block_size(rb) / MiB);
  725. }
  726. if (!QEMU_IS_ALIGNED(vmem->requested_size, vmem->block_size)) {
  727. error_setg(errp, "'%s' property has to be multiples of '%s' (0x%" PRIx64
  728. ")", VIRTIO_MEM_REQUESTED_SIZE_PROP,
  729. VIRTIO_MEM_BLOCK_SIZE_PROP, vmem->block_size);
  730. return;
  731. } else if (!QEMU_IS_ALIGNED(vmem->addr, vmem->block_size)) {
  732. error_setg(errp, "'%s' property has to be multiples of '%s' (0x%" PRIx64
  733. ")", VIRTIO_MEM_ADDR_PROP, VIRTIO_MEM_BLOCK_SIZE_PROP,
  734. vmem->block_size);
  735. return;
  736. } else if (!QEMU_IS_ALIGNED(memory_region_size(&vmem->memdev->mr),
  737. vmem->block_size)) {
  738. error_setg(errp, "'%s' property memdev size has to be multiples of"
  739. "'%s' (0x%" PRIx64 ")", VIRTIO_MEM_MEMDEV_PROP,
  740. VIRTIO_MEM_BLOCK_SIZE_PROP, vmem->block_size);
  741. return;
  742. }
  743. if (ram_block_coordinated_discard_require(true)) {
  744. error_setg(errp, "Discarding RAM is disabled");
  745. return;
  746. }
  747. ret = ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb));
  748. if (ret) {
  749. error_setg_errno(errp, -ret, "Unexpected error discarding RAM");
  750. ram_block_coordinated_discard_require(false);
  751. return;
  752. }
  753. virtio_mem_resize_usable_region(vmem, vmem->requested_size, true);
  754. vmem->bitmap_size = memory_region_size(&vmem->memdev->mr) /
  755. vmem->block_size;
  756. vmem->bitmap = bitmap_new(vmem->bitmap_size);
  757. virtio_init(vdev, TYPE_VIRTIO_MEM, VIRTIO_ID_MEM,
  758. sizeof(struct virtio_mem_config));
  759. vmem->vq = virtio_add_queue(vdev, 128, virtio_mem_handle_request);
  760. host_memory_backend_set_mapped(vmem->memdev, true);
  761. vmstate_register_ram(&vmem->memdev->mr, DEVICE(vmem));
  762. qemu_register_reset(virtio_mem_system_reset, vmem);
  763. /*
  764. * Set ourselves as RamDiscardManager before the plug handler maps the
  765. * memory region and exposes it via an address space.
  766. */
  767. memory_region_set_ram_discard_manager(&vmem->memdev->mr,
  768. RAM_DISCARD_MANAGER(vmem));
  769. }
  770. static void virtio_mem_device_unrealize(DeviceState *dev)
  771. {
  772. VirtIODevice *vdev = VIRTIO_DEVICE(dev);
  773. VirtIOMEM *vmem = VIRTIO_MEM(dev);
  774. /*
  775. * The unplug handler unmapped the memory region, it cannot be
  776. * found via an address space anymore. Unset ourselves.
  777. */
  778. memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL);
  779. qemu_unregister_reset(virtio_mem_system_reset, vmem);
  780. vmstate_unregister_ram(&vmem->memdev->mr, DEVICE(vmem));
  781. host_memory_backend_set_mapped(vmem->memdev, false);
  782. virtio_del_queue(vdev, 0);
  783. virtio_cleanup(vdev);
  784. g_free(vmem->bitmap);
  785. ram_block_coordinated_discard_require(false);
  786. }
  787. static int virtio_mem_discard_range_cb(const VirtIOMEM *vmem, void *arg,
  788. uint64_t offset, uint64_t size)
  789. {
  790. RAMBlock *rb = vmem->memdev->mr.ram_block;
  791. return ram_block_discard_range(rb, offset, size) ? -EINVAL : 0;
  792. }
  793. static int virtio_mem_restore_unplugged(VirtIOMEM *vmem)
  794. {
  795. /* Make sure all memory is really discarded after migration. */
  796. return virtio_mem_for_each_unplugged_range(vmem, NULL,
  797. virtio_mem_discard_range_cb);
  798. }
  799. static int virtio_mem_post_load(void *opaque, int version_id)
  800. {
  801. VirtIOMEM *vmem = VIRTIO_MEM(opaque);
  802. RamDiscardListener *rdl;
  803. int ret;
  804. /*
  805. * We started out with all memory discarded and our memory region is mapped
  806. * into an address space. Replay, now that we updated the bitmap.
  807. */
  808. QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
  809. ret = virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
  810. virtio_mem_notify_populate_cb);
  811. if (ret) {
  812. return ret;
  813. }
  814. }
  815. if (migration_in_incoming_postcopy()) {
  816. return 0;
  817. }
  818. return virtio_mem_restore_unplugged(vmem);
  819. }
  820. typedef struct VirtIOMEMMigSanityChecks {
  821. VirtIOMEM *parent;
  822. uint64_t addr;
  823. uint64_t region_size;
  824. uint64_t block_size;
  825. uint32_t node;
  826. } VirtIOMEMMigSanityChecks;
  827. static int virtio_mem_mig_sanity_checks_pre_save(void *opaque)
  828. {
  829. VirtIOMEMMigSanityChecks *tmp = opaque;
  830. VirtIOMEM *vmem = tmp->parent;
  831. tmp->addr = vmem->addr;
  832. tmp->region_size = memory_region_size(&vmem->memdev->mr);
  833. tmp->block_size = vmem->block_size;
  834. tmp->node = vmem->node;
  835. return 0;
  836. }
  837. static int virtio_mem_mig_sanity_checks_post_load(void *opaque, int version_id)
  838. {
  839. VirtIOMEMMigSanityChecks *tmp = opaque;
  840. VirtIOMEM *vmem = tmp->parent;
  841. const uint64_t new_region_size = memory_region_size(&vmem->memdev->mr);
  842. if (tmp->addr != vmem->addr) {
  843. error_report("Property '%s' changed from 0x%" PRIx64 " to 0x%" PRIx64,
  844. VIRTIO_MEM_ADDR_PROP, tmp->addr, vmem->addr);
  845. return -EINVAL;
  846. }
  847. /*
  848. * Note: Preparation for resizeable memory regions. The maximum size
  849. * of the memory region must not change during migration.
  850. */
  851. if (tmp->region_size != new_region_size) {
  852. error_report("Property '%s' size changed from 0x%" PRIx64 " to 0x%"
  853. PRIx64, VIRTIO_MEM_MEMDEV_PROP, tmp->region_size,
  854. new_region_size);
  855. return -EINVAL;
  856. }
  857. if (tmp->block_size != vmem->block_size) {
  858. error_report("Property '%s' changed from 0x%" PRIx64 " to 0x%" PRIx64,
  859. VIRTIO_MEM_BLOCK_SIZE_PROP, tmp->block_size,
  860. vmem->block_size);
  861. return -EINVAL;
  862. }
  863. if (tmp->node != vmem->node) {
  864. error_report("Property '%s' changed from %" PRIu32 " to %" PRIu32,
  865. VIRTIO_MEM_NODE_PROP, tmp->node, vmem->node);
  866. return -EINVAL;
  867. }
  868. return 0;
  869. }
  870. static const VMStateDescription vmstate_virtio_mem_sanity_checks = {
  871. .name = "virtio-mem-device/sanity-checks",
  872. .pre_save = virtio_mem_mig_sanity_checks_pre_save,
  873. .post_load = virtio_mem_mig_sanity_checks_post_load,
  874. .fields = (VMStateField[]) {
  875. VMSTATE_UINT64(addr, VirtIOMEMMigSanityChecks),
  876. VMSTATE_UINT64(region_size, VirtIOMEMMigSanityChecks),
  877. VMSTATE_UINT64(block_size, VirtIOMEMMigSanityChecks),
  878. VMSTATE_UINT32(node, VirtIOMEMMigSanityChecks),
  879. VMSTATE_END_OF_LIST(),
  880. },
  881. };
  882. static const VMStateDescription vmstate_virtio_mem_device = {
  883. .name = "virtio-mem-device",
  884. .minimum_version_id = 1,
  885. .version_id = 1,
  886. .priority = MIG_PRI_VIRTIO_MEM,
  887. .post_load = virtio_mem_post_load,
  888. .fields = (VMStateField[]) {
  889. VMSTATE_WITH_TMP(VirtIOMEM, VirtIOMEMMigSanityChecks,
  890. vmstate_virtio_mem_sanity_checks),
  891. VMSTATE_UINT64(usable_region_size, VirtIOMEM),
  892. VMSTATE_UINT64(size, VirtIOMEM),
  893. VMSTATE_UINT64(requested_size, VirtIOMEM),
  894. VMSTATE_BITMAP(bitmap, VirtIOMEM, 0, bitmap_size),
  895. VMSTATE_END_OF_LIST()
  896. },
  897. };
  898. static const VMStateDescription vmstate_virtio_mem = {
  899. .name = "virtio-mem",
  900. .minimum_version_id = 1,
  901. .version_id = 1,
  902. .fields = (VMStateField[]) {
  903. VMSTATE_VIRTIO_DEVICE,
  904. VMSTATE_END_OF_LIST()
  905. },
  906. };
  907. static void virtio_mem_fill_device_info(const VirtIOMEM *vmem,
  908. VirtioMEMDeviceInfo *vi)
  909. {
  910. vi->memaddr = vmem->addr;
  911. vi->node = vmem->node;
  912. vi->requested_size = vmem->requested_size;
  913. vi->size = vmem->size;
  914. vi->max_size = memory_region_size(&vmem->memdev->mr);
  915. vi->block_size = vmem->block_size;
  916. vi->memdev = object_get_canonical_path(OBJECT(vmem->memdev));
  917. }
  918. static MemoryRegion *virtio_mem_get_memory_region(VirtIOMEM *vmem, Error **errp)
  919. {
  920. if (!vmem->memdev) {
  921. error_setg(errp, "'%s' property must be set", VIRTIO_MEM_MEMDEV_PROP);
  922. return NULL;
  923. }
  924. return &vmem->memdev->mr;
  925. }
  926. static void virtio_mem_add_size_change_notifier(VirtIOMEM *vmem,
  927. Notifier *notifier)
  928. {
  929. notifier_list_add(&vmem->size_change_notifiers, notifier);
  930. }
  931. static void virtio_mem_remove_size_change_notifier(VirtIOMEM *vmem,
  932. Notifier *notifier)
  933. {
  934. notifier_remove(notifier);
  935. }
  936. static void virtio_mem_get_size(Object *obj, Visitor *v, const char *name,
  937. void *opaque, Error **errp)
  938. {
  939. const VirtIOMEM *vmem = VIRTIO_MEM(obj);
  940. uint64_t value = vmem->size;
  941. visit_type_size(v, name, &value, errp);
  942. }
  943. static void virtio_mem_get_requested_size(Object *obj, Visitor *v,
  944. const char *name, void *opaque,
  945. Error **errp)
  946. {
  947. const VirtIOMEM *vmem = VIRTIO_MEM(obj);
  948. uint64_t value = vmem->requested_size;
  949. visit_type_size(v, name, &value, errp);
  950. }
  951. static void virtio_mem_set_requested_size(Object *obj, Visitor *v,
  952. const char *name, void *opaque,
  953. Error **errp)
  954. {
  955. VirtIOMEM *vmem = VIRTIO_MEM(obj);
  956. Error *err = NULL;
  957. uint64_t value;
  958. visit_type_size(v, name, &value, &err);
  959. if (err) {
  960. error_propagate(errp, err);
  961. return;
  962. }
  963. /*
  964. * The block size and memory backend are not fixed until the device was
  965. * realized. realize() will verify these properties then.
  966. */
  967. if (DEVICE(obj)->realized) {
  968. if (!QEMU_IS_ALIGNED(value, vmem->block_size)) {
  969. error_setg(errp, "'%s' has to be multiples of '%s' (0x%" PRIx64
  970. ")", name, VIRTIO_MEM_BLOCK_SIZE_PROP,
  971. vmem->block_size);
  972. return;
  973. } else if (value > memory_region_size(&vmem->memdev->mr)) {
  974. error_setg(errp, "'%s' cannot exceed the memory backend size"
  975. "(0x%" PRIx64 ")", name,
  976. memory_region_size(&vmem->memdev->mr));
  977. return;
  978. }
  979. if (value != vmem->requested_size) {
  980. virtio_mem_resize_usable_region(vmem, value, false);
  981. vmem->requested_size = value;
  982. }
  983. /*
  984. * Trigger a config update so the guest gets notified. We trigger
  985. * even if the size didn't change (especially helpful for debugging).
  986. */
  987. virtio_notify_config(VIRTIO_DEVICE(vmem));
  988. } else {
  989. vmem->requested_size = value;
  990. }
  991. }
  992. static void virtio_mem_get_block_size(Object *obj, Visitor *v, const char *name,
  993. void *opaque, Error **errp)
  994. {
  995. const VirtIOMEM *vmem = VIRTIO_MEM(obj);
  996. uint64_t value = vmem->block_size;
  997. /*
  998. * If not configured by the user (and we're not realized yet), use the
  999. * default block size we would use with the current memory backend.
  1000. */
  1001. if (!value) {
  1002. if (vmem->memdev && memory_region_is_ram(&vmem->memdev->mr)) {
  1003. value = virtio_mem_default_block_size(vmem->memdev->mr.ram_block);
  1004. } else {
  1005. value = virtio_mem_thp_size();
  1006. }
  1007. }
  1008. visit_type_size(v, name, &value, errp);
  1009. }
  1010. static void virtio_mem_set_block_size(Object *obj, Visitor *v, const char *name,
  1011. void *opaque, Error **errp)
  1012. {
  1013. VirtIOMEM *vmem = VIRTIO_MEM(obj);
  1014. Error *err = NULL;
  1015. uint64_t value;
  1016. if (DEVICE(obj)->realized) {
  1017. error_setg(errp, "'%s' cannot be changed", name);
  1018. return;
  1019. }
  1020. visit_type_size(v, name, &value, &err);
  1021. if (err) {
  1022. error_propagate(errp, err);
  1023. return;
  1024. }
  1025. if (value < VIRTIO_MEM_MIN_BLOCK_SIZE) {
  1026. error_setg(errp, "'%s' property has to be at least 0x%" PRIx32, name,
  1027. VIRTIO_MEM_MIN_BLOCK_SIZE);
  1028. return;
  1029. } else if (!is_power_of_2(value)) {
  1030. error_setg(errp, "'%s' property has to be a power of two", name);
  1031. return;
  1032. }
  1033. vmem->block_size = value;
  1034. }
  1035. static void virtio_mem_instance_init(Object *obj)
  1036. {
  1037. VirtIOMEM *vmem = VIRTIO_MEM(obj);
  1038. notifier_list_init(&vmem->size_change_notifiers);
  1039. QLIST_INIT(&vmem->rdl_list);
  1040. object_property_add(obj, VIRTIO_MEM_SIZE_PROP, "size", virtio_mem_get_size,
  1041. NULL, NULL, NULL);
  1042. object_property_add(obj, VIRTIO_MEM_REQUESTED_SIZE_PROP, "size",
  1043. virtio_mem_get_requested_size,
  1044. virtio_mem_set_requested_size, NULL, NULL);
  1045. object_property_add(obj, VIRTIO_MEM_BLOCK_SIZE_PROP, "size",
  1046. virtio_mem_get_block_size, virtio_mem_set_block_size,
  1047. NULL, NULL);
  1048. }
  1049. static Property virtio_mem_properties[] = {
  1050. DEFINE_PROP_UINT64(VIRTIO_MEM_ADDR_PROP, VirtIOMEM, addr, 0),
  1051. DEFINE_PROP_UINT32(VIRTIO_MEM_NODE_PROP, VirtIOMEM, node, 0),
  1052. DEFINE_PROP_BOOL(VIRTIO_MEM_PREALLOC_PROP, VirtIOMEM, prealloc, false),
  1053. DEFINE_PROP_LINK(VIRTIO_MEM_MEMDEV_PROP, VirtIOMEM, memdev,
  1054. TYPE_MEMORY_BACKEND, HostMemoryBackend *),
  1055. #if defined(VIRTIO_MEM_HAS_LEGACY_GUESTS)
  1056. DEFINE_PROP_ON_OFF_AUTO(VIRTIO_MEM_UNPLUGGED_INACCESSIBLE_PROP, VirtIOMEM,
  1057. unplugged_inaccessible, ON_OFF_AUTO_AUTO),
  1058. #endif
  1059. DEFINE_PROP_END_OF_LIST(),
  1060. };
  1061. static uint64_t virtio_mem_rdm_get_min_granularity(const RamDiscardManager *rdm,
  1062. const MemoryRegion *mr)
  1063. {
  1064. const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
  1065. g_assert(mr == &vmem->memdev->mr);
  1066. return vmem->block_size;
  1067. }
  1068. static bool virtio_mem_rdm_is_populated(const RamDiscardManager *rdm,
  1069. const MemoryRegionSection *s)
  1070. {
  1071. const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
  1072. uint64_t start_gpa = vmem->addr + s->offset_within_region;
  1073. uint64_t end_gpa = start_gpa + int128_get64(s->size);
  1074. g_assert(s->mr == &vmem->memdev->mr);
  1075. start_gpa = QEMU_ALIGN_DOWN(start_gpa, vmem->block_size);
  1076. end_gpa = QEMU_ALIGN_UP(end_gpa, vmem->block_size);
  1077. if (!virtio_mem_valid_range(vmem, start_gpa, end_gpa - start_gpa)) {
  1078. return false;
  1079. }
  1080. return virtio_mem_test_bitmap(vmem, start_gpa, end_gpa - start_gpa, true);
  1081. }
  1082. struct VirtIOMEMReplayData {
  1083. void *fn;
  1084. void *opaque;
  1085. };
  1086. static int virtio_mem_rdm_replay_populated_cb(MemoryRegionSection *s, void *arg)
  1087. {
  1088. struct VirtIOMEMReplayData *data = arg;
  1089. return ((ReplayRamPopulate)data->fn)(s, data->opaque);
  1090. }
  1091. static int virtio_mem_rdm_replay_populated(const RamDiscardManager *rdm,
  1092. MemoryRegionSection *s,
  1093. ReplayRamPopulate replay_fn,
  1094. void *opaque)
  1095. {
  1096. const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
  1097. struct VirtIOMEMReplayData data = {
  1098. .fn = replay_fn,
  1099. .opaque = opaque,
  1100. };
  1101. g_assert(s->mr == &vmem->memdev->mr);
  1102. return virtio_mem_for_each_plugged_section(vmem, s, &data,
  1103. virtio_mem_rdm_replay_populated_cb);
  1104. }
  1105. static int virtio_mem_rdm_replay_discarded_cb(MemoryRegionSection *s,
  1106. void *arg)
  1107. {
  1108. struct VirtIOMEMReplayData *data = arg;
  1109. ((ReplayRamDiscard)data->fn)(s, data->opaque);
  1110. return 0;
  1111. }
  1112. static void virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm,
  1113. MemoryRegionSection *s,
  1114. ReplayRamDiscard replay_fn,
  1115. void *opaque)
  1116. {
  1117. const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
  1118. struct VirtIOMEMReplayData data = {
  1119. .fn = replay_fn,
  1120. .opaque = opaque,
  1121. };
  1122. g_assert(s->mr == &vmem->memdev->mr);
  1123. virtio_mem_for_each_unplugged_section(vmem, s, &data,
  1124. virtio_mem_rdm_replay_discarded_cb);
  1125. }
  1126. static void virtio_mem_rdm_register_listener(RamDiscardManager *rdm,
  1127. RamDiscardListener *rdl,
  1128. MemoryRegionSection *s)
  1129. {
  1130. VirtIOMEM *vmem = VIRTIO_MEM(rdm);
  1131. int ret;
  1132. g_assert(s->mr == &vmem->memdev->mr);
  1133. rdl->section = memory_region_section_new_copy(s);
  1134. QLIST_INSERT_HEAD(&vmem->rdl_list, rdl, next);
  1135. ret = virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
  1136. virtio_mem_notify_populate_cb);
  1137. if (ret) {
  1138. error_report("%s: Replaying plugged ranges failed: %s", __func__,
  1139. strerror(-ret));
  1140. }
  1141. }
  1142. static void virtio_mem_rdm_unregister_listener(RamDiscardManager *rdm,
  1143. RamDiscardListener *rdl)
  1144. {
  1145. VirtIOMEM *vmem = VIRTIO_MEM(rdm);
  1146. g_assert(rdl->section->mr == &vmem->memdev->mr);
  1147. if (vmem->size) {
  1148. if (rdl->double_discard_supported) {
  1149. rdl->notify_discard(rdl, rdl->section);
  1150. } else {
  1151. virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
  1152. virtio_mem_notify_discard_cb);
  1153. }
  1154. }
  1155. memory_region_section_free_copy(rdl->section);
  1156. rdl->section = NULL;
  1157. QLIST_REMOVE(rdl, next);
  1158. }
  1159. static void virtio_mem_class_init(ObjectClass *klass, void *data)
  1160. {
  1161. DeviceClass *dc = DEVICE_CLASS(klass);
  1162. VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
  1163. VirtIOMEMClass *vmc = VIRTIO_MEM_CLASS(klass);
  1164. RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_CLASS(klass);
  1165. device_class_set_props(dc, virtio_mem_properties);
  1166. dc->vmsd = &vmstate_virtio_mem;
  1167. set_bit(DEVICE_CATEGORY_MISC, dc->categories);
  1168. vdc->realize = virtio_mem_device_realize;
  1169. vdc->unrealize = virtio_mem_device_unrealize;
  1170. vdc->get_config = virtio_mem_get_config;
  1171. vdc->get_features = virtio_mem_get_features;
  1172. vdc->validate_features = virtio_mem_validate_features;
  1173. vdc->vmsd = &vmstate_virtio_mem_device;
  1174. vmc->fill_device_info = virtio_mem_fill_device_info;
  1175. vmc->get_memory_region = virtio_mem_get_memory_region;
  1176. vmc->add_size_change_notifier = virtio_mem_add_size_change_notifier;
  1177. vmc->remove_size_change_notifier = virtio_mem_remove_size_change_notifier;
  1178. rdmc->get_min_granularity = virtio_mem_rdm_get_min_granularity;
  1179. rdmc->is_populated = virtio_mem_rdm_is_populated;
  1180. rdmc->replay_populated = virtio_mem_rdm_replay_populated;
  1181. rdmc->replay_discarded = virtio_mem_rdm_replay_discarded;
  1182. rdmc->register_listener = virtio_mem_rdm_register_listener;
  1183. rdmc->unregister_listener = virtio_mem_rdm_unregister_listener;
  1184. }
  1185. static const TypeInfo virtio_mem_info = {
  1186. .name = TYPE_VIRTIO_MEM,
  1187. .parent = TYPE_VIRTIO_DEVICE,
  1188. .instance_size = sizeof(VirtIOMEM),
  1189. .instance_init = virtio_mem_instance_init,
  1190. .class_init = virtio_mem_class_init,
  1191. .class_size = sizeof(VirtIOMEMClass),
  1192. .interfaces = (InterfaceInfo[]) {
  1193. { TYPE_RAM_DISCARD_MANAGER },
  1194. { }
  1195. },
  1196. };
  1197. static void virtio_register_types(void)
  1198. {
  1199. type_register_static(&virtio_mem_info);
  1200. }
  1201. type_init(virtio_register_types)