hostmem.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525
  1. /*
  2. * QEMU Host Memory Backend
  3. *
  4. * Copyright (C) 2013-2014 Red Hat Inc
  5. *
  6. * Authors:
  7. * Igor Mammedov <imammedo@redhat.com>
  8. *
  9. * This work is licensed under the terms of the GNU GPL, version 2 or later.
  10. * See the COPYING file in the top-level directory.
  11. */
  12. #include "qemu/osdep.h"
  13. #include "sysemu/hostmem.h"
  14. #include "sysemu/sysemu.h"
  15. #include "hw/boards.h"
  16. #include "qapi/error.h"
  17. #include "qapi/qapi-builtin-visit.h"
  18. #include "qapi/visitor.h"
  19. #include "qemu/config-file.h"
  20. #include "qom/object_interfaces.h"
  21. #include "qemu/mmap-alloc.h"
  22. #ifdef CONFIG_NUMA
  23. #include <numaif.h>
  24. QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_DEFAULT != MPOL_DEFAULT);
  25. QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_PREFERRED != MPOL_PREFERRED);
  26. QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_BIND != MPOL_BIND);
  27. QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_INTERLEAVE != MPOL_INTERLEAVE);
  28. #endif
  29. char *
  30. host_memory_backend_get_name(HostMemoryBackend *backend)
  31. {
  32. if (!backend->use_canonical_path) {
  33. return g_strdup(object_get_canonical_path_component(OBJECT(backend)));
  34. }
  35. return object_get_canonical_path(OBJECT(backend));
  36. }
  37. static void
  38. host_memory_backend_get_size(Object *obj, Visitor *v, const char *name,
  39. void *opaque, Error **errp)
  40. {
  41. HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  42. uint64_t value = backend->size;
  43. visit_type_size(v, name, &value, errp);
  44. }
  45. static void
  46. host_memory_backend_set_size(Object *obj, Visitor *v, const char *name,
  47. void *opaque, Error **errp)
  48. {
  49. HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  50. uint64_t value;
  51. if (host_memory_backend_mr_inited(backend)) {
  52. error_setg(errp, "cannot change property %s of %s ", name,
  53. object_get_typename(obj));
  54. return;
  55. }
  56. if (!visit_type_size(v, name, &value, errp)) {
  57. return;
  58. }
  59. if (!value) {
  60. error_setg(errp,
  61. "property '%s' of %s doesn't take value '%" PRIu64 "'",
  62. name, object_get_typename(obj), value);
  63. return;
  64. }
  65. backend->size = value;
  66. }
  67. static void
  68. host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name,
  69. void *opaque, Error **errp)
  70. {
  71. HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  72. uint16List *host_nodes = NULL;
  73. uint16List **node = &host_nodes;
  74. unsigned long value;
  75. value = find_first_bit(backend->host_nodes, MAX_NODES);
  76. if (value == MAX_NODES) {
  77. goto ret;
  78. }
  79. *node = g_malloc0(sizeof(**node));
  80. (*node)->value = value;
  81. node = &(*node)->next;
  82. do {
  83. value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1);
  84. if (value == MAX_NODES) {
  85. break;
  86. }
  87. *node = g_malloc0(sizeof(**node));
  88. (*node)->value = value;
  89. node = &(*node)->next;
  90. } while (true);
  91. ret:
  92. visit_type_uint16List(v, name, &host_nodes, errp);
  93. }
  94. static void
  95. host_memory_backend_set_host_nodes(Object *obj, Visitor *v, const char *name,
  96. void *opaque, Error **errp)
  97. {
  98. #ifdef CONFIG_NUMA
  99. HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  100. uint16List *l, *host_nodes = NULL;
  101. visit_type_uint16List(v, name, &host_nodes, errp);
  102. for (l = host_nodes; l; l = l->next) {
  103. if (l->value >= MAX_NODES) {
  104. error_setg(errp, "Invalid host-nodes value: %d", l->value);
  105. goto out;
  106. }
  107. }
  108. for (l = host_nodes; l; l = l->next) {
  109. bitmap_set(backend->host_nodes, l->value, 1);
  110. }
  111. out:
  112. qapi_free_uint16List(host_nodes);
  113. #else
  114. error_setg(errp, "NUMA node binding are not supported by this QEMU");
  115. #endif
  116. }
  117. static int
  118. host_memory_backend_get_policy(Object *obj, Error **errp G_GNUC_UNUSED)
  119. {
  120. HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  121. return backend->policy;
  122. }
  123. static void
  124. host_memory_backend_set_policy(Object *obj, int policy, Error **errp)
  125. {
  126. HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  127. backend->policy = policy;
  128. #ifndef CONFIG_NUMA
  129. if (policy != HOST_MEM_POLICY_DEFAULT) {
  130. error_setg(errp, "NUMA policies are not supported by this QEMU");
  131. }
  132. #endif
  133. }
  134. static bool host_memory_backend_get_merge(Object *obj, Error **errp)
  135. {
  136. HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  137. return backend->merge;
  138. }
  139. static void host_memory_backend_set_merge(Object *obj, bool value, Error **errp)
  140. {
  141. HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  142. if (!host_memory_backend_mr_inited(backend)) {
  143. backend->merge = value;
  144. return;
  145. }
  146. if (value != backend->merge) {
  147. void *ptr = memory_region_get_ram_ptr(&backend->mr);
  148. uint64_t sz = memory_region_size(&backend->mr);
  149. qemu_madvise(ptr, sz,
  150. value ? QEMU_MADV_MERGEABLE : QEMU_MADV_UNMERGEABLE);
  151. backend->merge = value;
  152. }
  153. }
  154. static bool host_memory_backend_get_dump(Object *obj, Error **errp)
  155. {
  156. HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  157. return backend->dump;
  158. }
  159. static void host_memory_backend_set_dump(Object *obj, bool value, Error **errp)
  160. {
  161. HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  162. if (!host_memory_backend_mr_inited(backend)) {
  163. backend->dump = value;
  164. return;
  165. }
  166. if (value != backend->dump) {
  167. void *ptr = memory_region_get_ram_ptr(&backend->mr);
  168. uint64_t sz = memory_region_size(&backend->mr);
  169. qemu_madvise(ptr, sz,
  170. value ? QEMU_MADV_DODUMP : QEMU_MADV_DONTDUMP);
  171. backend->dump = value;
  172. }
  173. }
  174. static bool host_memory_backend_get_prealloc(Object *obj, Error **errp)
  175. {
  176. HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  177. return backend->prealloc;
  178. }
  179. static void host_memory_backend_set_prealloc(Object *obj, bool value,
  180. Error **errp)
  181. {
  182. Error *local_err = NULL;
  183. HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  184. if (!host_memory_backend_mr_inited(backend)) {
  185. backend->prealloc = value;
  186. return;
  187. }
  188. if (value && !backend->prealloc) {
  189. int fd = memory_region_get_fd(&backend->mr);
  190. void *ptr = memory_region_get_ram_ptr(&backend->mr);
  191. uint64_t sz = memory_region_size(&backend->mr);
  192. os_mem_prealloc(fd, ptr, sz, backend->prealloc_threads, &local_err);
  193. if (local_err) {
  194. error_propagate(errp, local_err);
  195. return;
  196. }
  197. backend->prealloc = true;
  198. }
  199. }
  200. static void host_memory_backend_get_prealloc_threads(Object *obj, Visitor *v,
  201. const char *name, void *opaque, Error **errp)
  202. {
  203. HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  204. visit_type_uint32(v, name, &backend->prealloc_threads, errp);
  205. }
  206. static void host_memory_backend_set_prealloc_threads(Object *obj, Visitor *v,
  207. const char *name, void *opaque, Error **errp)
  208. {
  209. HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  210. uint32_t value;
  211. if (!visit_type_uint32(v, name, &value, errp)) {
  212. return;
  213. }
  214. if (value <= 0) {
  215. error_setg(errp, "property '%s' of %s doesn't take value '%d'", name,
  216. object_get_typename(obj), value);
  217. return;
  218. }
  219. backend->prealloc_threads = value;
  220. }
  221. static void host_memory_backend_init(Object *obj)
  222. {
  223. HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  224. MachineState *machine = MACHINE(qdev_get_machine());
  225. /* TODO: convert access to globals to compat properties */
  226. backend->merge = machine_mem_merge(machine);
  227. backend->dump = machine_dump_guest_core(machine);
  228. backend->prealloc_threads = 1;
  229. }
  230. static void host_memory_backend_post_init(Object *obj)
  231. {
  232. object_apply_compat_props(obj);
  233. }
  234. bool host_memory_backend_mr_inited(HostMemoryBackend *backend)
  235. {
  236. /*
  237. * NOTE: We forbid zero-length memory backend, so here zero means
  238. * "we haven't inited the backend memory region yet".
  239. */
  240. return memory_region_size(&backend->mr) != 0;
  241. }
  242. MemoryRegion *host_memory_backend_get_memory(HostMemoryBackend *backend)
  243. {
  244. return host_memory_backend_mr_inited(backend) ? &backend->mr : NULL;
  245. }
  246. void host_memory_backend_set_mapped(HostMemoryBackend *backend, bool mapped)
  247. {
  248. backend->is_mapped = mapped;
  249. }
  250. bool host_memory_backend_is_mapped(HostMemoryBackend *backend)
  251. {
  252. return backend->is_mapped;
  253. }
  254. #ifdef __linux__
  255. size_t host_memory_backend_pagesize(HostMemoryBackend *memdev)
  256. {
  257. Object *obj = OBJECT(memdev);
  258. char *path = object_property_get_str(obj, "mem-path", NULL);
  259. size_t pagesize = qemu_mempath_getpagesize(path);
  260. g_free(path);
  261. return pagesize;
  262. }
  263. #else
  264. size_t host_memory_backend_pagesize(HostMemoryBackend *memdev)
  265. {
  266. return qemu_real_host_page_size;
  267. }
  268. #endif
  269. static void
  270. host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
  271. {
  272. HostMemoryBackend *backend = MEMORY_BACKEND(uc);
  273. HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc);
  274. Error *local_err = NULL;
  275. void *ptr;
  276. uint64_t sz;
  277. if (bc->alloc) {
  278. bc->alloc(backend, &local_err);
  279. if (local_err) {
  280. goto out;
  281. }
  282. ptr = memory_region_get_ram_ptr(&backend->mr);
  283. sz = memory_region_size(&backend->mr);
  284. if (backend->merge) {
  285. qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE);
  286. }
  287. if (!backend->dump) {
  288. qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP);
  289. }
  290. #ifdef CONFIG_NUMA
  291. unsigned long lastbit = find_last_bit(backend->host_nodes, MAX_NODES);
  292. /* lastbit == MAX_NODES means maxnode = 0 */
  293. unsigned long maxnode = (lastbit + 1) % (MAX_NODES + 1);
  294. /* ensure policy won't be ignored in case memory is preallocated
  295. * before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so
  296. * this doesn't catch hugepage case. */
  297. unsigned flags = MPOL_MF_STRICT | MPOL_MF_MOVE;
  298. /* check for invalid host-nodes and policies and give more verbose
  299. * error messages than mbind(). */
  300. if (maxnode && backend->policy == MPOL_DEFAULT) {
  301. error_setg(errp, "host-nodes must be empty for policy default,"
  302. " or you should explicitly specify a policy other"
  303. " than default");
  304. return;
  305. } else if (maxnode == 0 && backend->policy != MPOL_DEFAULT) {
  306. error_setg(errp, "host-nodes must be set for policy %s",
  307. HostMemPolicy_str(backend->policy));
  308. return;
  309. }
  310. /* We can have up to MAX_NODES nodes, but we need to pass maxnode+1
  311. * as argument to mbind() due to an old Linux bug (feature?) which
  312. * cuts off the last specified node. This means backend->host_nodes
  313. * must have MAX_NODES+1 bits available.
  314. */
  315. assert(sizeof(backend->host_nodes) >=
  316. BITS_TO_LONGS(MAX_NODES + 1) * sizeof(unsigned long));
  317. assert(maxnode <= MAX_NODES);
  318. if (maxnode &&
  319. mbind(ptr, sz, backend->policy, backend->host_nodes, maxnode + 1,
  320. flags)) {
  321. if (backend->policy != MPOL_DEFAULT || errno != ENOSYS) {
  322. error_setg_errno(errp, errno,
  323. "cannot bind memory to host NUMA nodes");
  324. return;
  325. }
  326. }
  327. #endif
  328. /* Preallocate memory after the NUMA policy has been instantiated.
  329. * This is necessary to guarantee memory is allocated with
  330. * specified NUMA policy in place.
  331. */
  332. if (backend->prealloc) {
  333. os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz,
  334. backend->prealloc_threads, &local_err);
  335. if (local_err) {
  336. goto out;
  337. }
  338. }
  339. }
  340. out:
  341. error_propagate(errp, local_err);
  342. }
  343. static bool
  344. host_memory_backend_can_be_deleted(UserCreatable *uc)
  345. {
  346. if (host_memory_backend_is_mapped(MEMORY_BACKEND(uc))) {
  347. return false;
  348. } else {
  349. return true;
  350. }
  351. }
  352. static bool host_memory_backend_get_share(Object *o, Error **errp)
  353. {
  354. HostMemoryBackend *backend = MEMORY_BACKEND(o);
  355. return backend->share;
  356. }
  357. static void host_memory_backend_set_share(Object *o, bool value, Error **errp)
  358. {
  359. HostMemoryBackend *backend = MEMORY_BACKEND(o);
  360. if (host_memory_backend_mr_inited(backend)) {
  361. error_setg(errp, "cannot change property value");
  362. return;
  363. }
  364. backend->share = value;
  365. }
  366. static bool
  367. host_memory_backend_get_use_canonical_path(Object *obj, Error **errp)
  368. {
  369. HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  370. return backend->use_canonical_path;
  371. }
  372. static void
  373. host_memory_backend_set_use_canonical_path(Object *obj, bool value,
  374. Error **errp)
  375. {
  376. HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  377. backend->use_canonical_path = value;
  378. }
  379. static void
  380. host_memory_backend_class_init(ObjectClass *oc, void *data)
  381. {
  382. UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
  383. ucc->complete = host_memory_backend_memory_complete;
  384. ucc->can_be_deleted = host_memory_backend_can_be_deleted;
  385. object_class_property_add_bool(oc, "merge",
  386. host_memory_backend_get_merge,
  387. host_memory_backend_set_merge);
  388. object_class_property_set_description(oc, "merge",
  389. "Mark memory as mergeable");
  390. object_class_property_add_bool(oc, "dump",
  391. host_memory_backend_get_dump,
  392. host_memory_backend_set_dump);
  393. object_class_property_set_description(oc, "dump",
  394. "Set to 'off' to exclude from core dump");
  395. object_class_property_add_bool(oc, "prealloc",
  396. host_memory_backend_get_prealloc,
  397. host_memory_backend_set_prealloc);
  398. object_class_property_set_description(oc, "prealloc",
  399. "Preallocate memory");
  400. object_class_property_add(oc, "prealloc-threads", "int",
  401. host_memory_backend_get_prealloc_threads,
  402. host_memory_backend_set_prealloc_threads,
  403. NULL, NULL);
  404. object_class_property_set_description(oc, "prealloc-threads",
  405. "Number of CPU threads to use for prealloc");
  406. object_class_property_add(oc, "size", "int",
  407. host_memory_backend_get_size,
  408. host_memory_backend_set_size,
  409. NULL, NULL);
  410. object_class_property_set_description(oc, "size",
  411. "Size of the memory region (ex: 500M)");
  412. object_class_property_add(oc, "host-nodes", "int",
  413. host_memory_backend_get_host_nodes,
  414. host_memory_backend_set_host_nodes,
  415. NULL, NULL);
  416. object_class_property_set_description(oc, "host-nodes",
  417. "Binds memory to the list of NUMA host nodes");
  418. object_class_property_add_enum(oc, "policy", "HostMemPolicy",
  419. &HostMemPolicy_lookup,
  420. host_memory_backend_get_policy,
  421. host_memory_backend_set_policy);
  422. object_class_property_set_description(oc, "policy",
  423. "Set the NUMA policy");
  424. object_class_property_add_bool(oc, "share",
  425. host_memory_backend_get_share, host_memory_backend_set_share);
  426. object_class_property_set_description(oc, "share",
  427. "Mark the memory as private to QEMU or shared");
  428. object_class_property_add_bool(oc, "x-use-canonical-path-for-ramblock-id",
  429. host_memory_backend_get_use_canonical_path,
  430. host_memory_backend_set_use_canonical_path);
  431. }
  432. static const TypeInfo host_memory_backend_info = {
  433. .name = TYPE_MEMORY_BACKEND,
  434. .parent = TYPE_OBJECT,
  435. .abstract = true,
  436. .class_size = sizeof(HostMemoryBackendClass),
  437. .class_init = host_memory_backend_class_init,
  438. .instance_size = sizeof(HostMemoryBackend),
  439. .instance_init = host_memory_backend_init,
  440. .instance_post_init = host_memory_backend_post_init,
  441. .interfaces = (InterfaceInfo[]) {
  442. { TYPE_USER_CREATABLE },
  443. { }
  444. }
  445. };
  446. static void register_types(void)
  447. {
  448. type_register_static(&host_memory_backend_info);
  449. }
  450. type_init(register_types);