hostmem.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507
  1. /*
  2. * QEMU Host Memory Backend
  3. *
  4. * Copyright (C) 2013-2014 Red Hat Inc
  5. *
  6. * Authors:
  7. * Igor Mammedov <imammedo@redhat.com>
  8. *
  9. * This work is licensed under the terms of the GNU GPL, version 2 or later.
  10. * See the COPYING file in the top-level directory.
  11. */
  12. #include "qemu/osdep.h"
  13. #include "sysemu/hostmem.h"
  14. #include "sysemu/sysemu.h"
  15. #include "hw/boards.h"
  16. #include "qapi/error.h"
  17. #include "qapi/qapi-builtin-visit.h"
  18. #include "qapi/visitor.h"
  19. #include "qemu/config-file.h"
  20. #include "qom/object_interfaces.h"
  21. #include "qemu/mmap-alloc.h"
  22. #ifdef CONFIG_NUMA
  23. #include <numaif.h>
  24. QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_DEFAULT != MPOL_DEFAULT);
  25. QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_PREFERRED != MPOL_PREFERRED);
  26. QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_BIND != MPOL_BIND);
  27. QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_INTERLEAVE != MPOL_INTERLEAVE);
  28. #endif
  29. char *
  30. host_memory_backend_get_name(HostMemoryBackend *backend)
  31. {
  32. if (!backend->use_canonical_path) {
  33. return object_get_canonical_path_component(OBJECT(backend));
  34. }
  35. return object_get_canonical_path(OBJECT(backend));
  36. }
  37. static void
  38. host_memory_backend_get_size(Object *obj, Visitor *v, const char *name,
  39. void *opaque, Error **errp)
  40. {
  41. HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  42. uint64_t value = backend->size;
  43. visit_type_size(v, name, &value, errp);
  44. }
  45. static void
  46. host_memory_backend_set_size(Object *obj, Visitor *v, const char *name,
  47. void *opaque, Error **errp)
  48. {
  49. HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  50. Error *local_err = NULL;
  51. uint64_t value;
  52. if (host_memory_backend_mr_inited(backend)) {
  53. error_setg(&local_err, "cannot change property %s of %s ",
  54. name, object_get_typename(obj));
  55. goto out;
  56. }
  57. visit_type_size(v, name, &value, &local_err);
  58. if (local_err) {
  59. goto out;
  60. }
  61. if (!value) {
  62. error_setg(&local_err,
  63. "property '%s' of %s doesn't take value '%" PRIu64 "'",
  64. name, object_get_typename(obj), value);
  65. goto out;
  66. }
  67. backend->size = value;
  68. out:
  69. error_propagate(errp, local_err);
  70. }
  71. static void
  72. host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name,
  73. void *opaque, Error **errp)
  74. {
  75. HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  76. uint16List *host_nodes = NULL;
  77. uint16List **node = &host_nodes;
  78. unsigned long value;
  79. value = find_first_bit(backend->host_nodes, MAX_NODES);
  80. if (value == MAX_NODES) {
  81. goto ret;
  82. }
  83. *node = g_malloc0(sizeof(**node));
  84. (*node)->value = value;
  85. node = &(*node)->next;
  86. do {
  87. value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1);
  88. if (value == MAX_NODES) {
  89. break;
  90. }
  91. *node = g_malloc0(sizeof(**node));
  92. (*node)->value = value;
  93. node = &(*node)->next;
  94. } while (true);
  95. ret:
  96. visit_type_uint16List(v, name, &host_nodes, errp);
  97. }
  98. static void
  99. host_memory_backend_set_host_nodes(Object *obj, Visitor *v, const char *name,
  100. void *opaque, Error **errp)
  101. {
  102. #ifdef CONFIG_NUMA
  103. HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  104. uint16List *l, *host_nodes = NULL;
  105. visit_type_uint16List(v, name, &host_nodes, errp);
  106. for (l = host_nodes; l; l = l->next) {
  107. if (l->value >= MAX_NODES) {
  108. error_setg(errp, "Invalid host-nodes value: %d", l->value);
  109. goto out;
  110. }
  111. }
  112. for (l = host_nodes; l; l = l->next) {
  113. bitmap_set(backend->host_nodes, l->value, 1);
  114. }
  115. out:
  116. qapi_free_uint16List(host_nodes);
  117. #else
  118. error_setg(errp, "NUMA node binding are not supported by this QEMU");
  119. #endif
  120. }
  121. static int
  122. host_memory_backend_get_policy(Object *obj, Error **errp G_GNUC_UNUSED)
  123. {
  124. HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  125. return backend->policy;
  126. }
  127. static void
  128. host_memory_backend_set_policy(Object *obj, int policy, Error **errp)
  129. {
  130. HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  131. backend->policy = policy;
  132. #ifndef CONFIG_NUMA
  133. if (policy != HOST_MEM_POLICY_DEFAULT) {
  134. error_setg(errp, "NUMA policies are not supported by this QEMU");
  135. }
  136. #endif
  137. }
  138. static bool host_memory_backend_get_merge(Object *obj, Error **errp)
  139. {
  140. HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  141. return backend->merge;
  142. }
  143. static void host_memory_backend_set_merge(Object *obj, bool value, Error **errp)
  144. {
  145. HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  146. if (!host_memory_backend_mr_inited(backend)) {
  147. backend->merge = value;
  148. return;
  149. }
  150. if (value != backend->merge) {
  151. void *ptr = memory_region_get_ram_ptr(&backend->mr);
  152. uint64_t sz = memory_region_size(&backend->mr);
  153. qemu_madvise(ptr, sz,
  154. value ? QEMU_MADV_MERGEABLE : QEMU_MADV_UNMERGEABLE);
  155. backend->merge = value;
  156. }
  157. }
  158. static bool host_memory_backend_get_dump(Object *obj, Error **errp)
  159. {
  160. HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  161. return backend->dump;
  162. }
  163. static void host_memory_backend_set_dump(Object *obj, bool value, Error **errp)
  164. {
  165. HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  166. if (!host_memory_backend_mr_inited(backend)) {
  167. backend->dump = value;
  168. return;
  169. }
  170. if (value != backend->dump) {
  171. void *ptr = memory_region_get_ram_ptr(&backend->mr);
  172. uint64_t sz = memory_region_size(&backend->mr);
  173. qemu_madvise(ptr, sz,
  174. value ? QEMU_MADV_DODUMP : QEMU_MADV_DONTDUMP);
  175. backend->dump = value;
  176. }
  177. }
  178. static bool host_memory_backend_get_prealloc(Object *obj, Error **errp)
  179. {
  180. HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  181. return backend->prealloc || backend->force_prealloc;
  182. }
  183. static void host_memory_backend_set_prealloc(Object *obj, bool value,
  184. Error **errp)
  185. {
  186. Error *local_err = NULL;
  187. HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  188. MachineState *ms = MACHINE(qdev_get_machine());
  189. if (backend->force_prealloc) {
  190. if (value) {
  191. error_setg(errp,
  192. "remove -mem-prealloc to use the prealloc property");
  193. return;
  194. }
  195. }
  196. if (!host_memory_backend_mr_inited(backend)) {
  197. backend->prealloc = value;
  198. return;
  199. }
  200. if (value && !backend->prealloc) {
  201. int fd = memory_region_get_fd(&backend->mr);
  202. void *ptr = memory_region_get_ram_ptr(&backend->mr);
  203. uint64_t sz = memory_region_size(&backend->mr);
  204. os_mem_prealloc(fd, ptr, sz, ms->smp.cpus, &local_err);
  205. if (local_err) {
  206. error_propagate(errp, local_err);
  207. return;
  208. }
  209. backend->prealloc = true;
  210. }
  211. }
  212. static void host_memory_backend_init(Object *obj)
  213. {
  214. HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  215. MachineState *machine = MACHINE(qdev_get_machine());
  216. backend->merge = machine_mem_merge(machine);
  217. backend->dump = machine_dump_guest_core(machine);
  218. backend->prealloc = mem_prealloc;
  219. }
  220. static void host_memory_backend_post_init(Object *obj)
  221. {
  222. object_apply_compat_props(obj);
  223. }
  224. bool host_memory_backend_mr_inited(HostMemoryBackend *backend)
  225. {
  226. /*
  227. * NOTE: We forbid zero-length memory backend, so here zero means
  228. * "we haven't inited the backend memory region yet".
  229. */
  230. return memory_region_size(&backend->mr) != 0;
  231. }
  232. MemoryRegion *host_memory_backend_get_memory(HostMemoryBackend *backend)
  233. {
  234. return host_memory_backend_mr_inited(backend) ? &backend->mr : NULL;
  235. }
  236. void host_memory_backend_set_mapped(HostMemoryBackend *backend, bool mapped)
  237. {
  238. backend->is_mapped = mapped;
  239. }
  240. bool host_memory_backend_is_mapped(HostMemoryBackend *backend)
  241. {
  242. return backend->is_mapped;
  243. }
  244. #ifdef __linux__
  245. size_t host_memory_backend_pagesize(HostMemoryBackend *memdev)
  246. {
  247. Object *obj = OBJECT(memdev);
  248. char *path = object_property_get_str(obj, "mem-path", NULL);
  249. size_t pagesize = qemu_mempath_getpagesize(path);
  250. g_free(path);
  251. return pagesize;
  252. }
  253. #else
  254. size_t host_memory_backend_pagesize(HostMemoryBackend *memdev)
  255. {
  256. return qemu_real_host_page_size;
  257. }
  258. #endif
  259. static void
  260. host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
  261. {
  262. HostMemoryBackend *backend = MEMORY_BACKEND(uc);
  263. HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc);
  264. MachineState *ms = MACHINE(qdev_get_machine());
  265. Error *local_err = NULL;
  266. void *ptr;
  267. uint64_t sz;
  268. if (bc->alloc) {
  269. bc->alloc(backend, &local_err);
  270. if (local_err) {
  271. goto out;
  272. }
  273. ptr = memory_region_get_ram_ptr(&backend->mr);
  274. sz = memory_region_size(&backend->mr);
  275. if (backend->merge) {
  276. qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE);
  277. }
  278. if (!backend->dump) {
  279. qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP);
  280. }
  281. #ifdef CONFIG_NUMA
  282. unsigned long lastbit = find_last_bit(backend->host_nodes, MAX_NODES);
  283. /* lastbit == MAX_NODES means maxnode = 0 */
  284. unsigned long maxnode = (lastbit + 1) % (MAX_NODES + 1);
  285. /* ensure policy won't be ignored in case memory is preallocated
  286. * before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so
  287. * this doesn't catch hugepage case. */
  288. unsigned flags = MPOL_MF_STRICT | MPOL_MF_MOVE;
  289. /* check for invalid host-nodes and policies and give more verbose
  290. * error messages than mbind(). */
  291. if (maxnode && backend->policy == MPOL_DEFAULT) {
  292. error_setg(errp, "host-nodes must be empty for policy default,"
  293. " or you should explicitly specify a policy other"
  294. " than default");
  295. return;
  296. } else if (maxnode == 0 && backend->policy != MPOL_DEFAULT) {
  297. error_setg(errp, "host-nodes must be set for policy %s",
  298. HostMemPolicy_str(backend->policy));
  299. return;
  300. }
  301. /* We can have up to MAX_NODES nodes, but we need to pass maxnode+1
  302. * as argument to mbind() due to an old Linux bug (feature?) which
  303. * cuts off the last specified node. This means backend->host_nodes
  304. * must have MAX_NODES+1 bits available.
  305. */
  306. assert(sizeof(backend->host_nodes) >=
  307. BITS_TO_LONGS(MAX_NODES + 1) * sizeof(unsigned long));
  308. assert(maxnode <= MAX_NODES);
  309. if (mbind(ptr, sz, backend->policy,
  310. maxnode ? backend->host_nodes : NULL, maxnode + 1, flags)) {
  311. if (backend->policy != MPOL_DEFAULT || errno != ENOSYS) {
  312. error_setg_errno(errp, errno,
  313. "cannot bind memory to host NUMA nodes");
  314. return;
  315. }
  316. }
  317. #endif
  318. /* Preallocate memory after the NUMA policy has been instantiated.
  319. * This is necessary to guarantee memory is allocated with
  320. * specified NUMA policy in place.
  321. */
  322. if (backend->prealloc) {
  323. os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz,
  324. ms->smp.cpus, &local_err);
  325. if (local_err) {
  326. goto out;
  327. }
  328. }
  329. }
  330. out:
  331. error_propagate(errp, local_err);
  332. }
  333. static bool
  334. host_memory_backend_can_be_deleted(UserCreatable *uc)
  335. {
  336. if (host_memory_backend_is_mapped(MEMORY_BACKEND(uc))) {
  337. return false;
  338. } else {
  339. return true;
  340. }
  341. }
  342. static bool host_memory_backend_get_share(Object *o, Error **errp)
  343. {
  344. HostMemoryBackend *backend = MEMORY_BACKEND(o);
  345. return backend->share;
  346. }
  347. static void host_memory_backend_set_share(Object *o, bool value, Error **errp)
  348. {
  349. HostMemoryBackend *backend = MEMORY_BACKEND(o);
  350. if (host_memory_backend_mr_inited(backend)) {
  351. error_setg(errp, "cannot change property value");
  352. return;
  353. }
  354. backend->share = value;
  355. }
  356. static bool
  357. host_memory_backend_get_use_canonical_path(Object *obj, Error **errp)
  358. {
  359. HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  360. return backend->use_canonical_path;
  361. }
  362. static void
  363. host_memory_backend_set_use_canonical_path(Object *obj, bool value,
  364. Error **errp)
  365. {
  366. HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  367. backend->use_canonical_path = value;
  368. }
  369. static void
  370. host_memory_backend_class_init(ObjectClass *oc, void *data)
  371. {
  372. UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
  373. ucc->complete = host_memory_backend_memory_complete;
  374. ucc->can_be_deleted = host_memory_backend_can_be_deleted;
  375. object_class_property_add_bool(oc, "merge",
  376. host_memory_backend_get_merge,
  377. host_memory_backend_set_merge, &error_abort);
  378. object_class_property_set_description(oc, "merge",
  379. "Mark memory as mergeable", &error_abort);
  380. object_class_property_add_bool(oc, "dump",
  381. host_memory_backend_get_dump,
  382. host_memory_backend_set_dump, &error_abort);
  383. object_class_property_set_description(oc, "dump",
  384. "Set to 'off' to exclude from core dump", &error_abort);
  385. object_class_property_add_bool(oc, "prealloc",
  386. host_memory_backend_get_prealloc,
  387. host_memory_backend_set_prealloc, &error_abort);
  388. object_class_property_set_description(oc, "prealloc",
  389. "Preallocate memory", &error_abort);
  390. object_class_property_add(oc, "size", "int",
  391. host_memory_backend_get_size,
  392. host_memory_backend_set_size,
  393. NULL, NULL, &error_abort);
  394. object_class_property_set_description(oc, "size",
  395. "Size of the memory region (ex: 500M)", &error_abort);
  396. object_class_property_add(oc, "host-nodes", "int",
  397. host_memory_backend_get_host_nodes,
  398. host_memory_backend_set_host_nodes,
  399. NULL, NULL, &error_abort);
  400. object_class_property_set_description(oc, "host-nodes",
  401. "Binds memory to the list of NUMA host nodes", &error_abort);
  402. object_class_property_add_enum(oc, "policy", "HostMemPolicy",
  403. &HostMemPolicy_lookup,
  404. host_memory_backend_get_policy,
  405. host_memory_backend_set_policy, &error_abort);
  406. object_class_property_set_description(oc, "policy",
  407. "Set the NUMA policy", &error_abort);
  408. object_class_property_add_bool(oc, "share",
  409. host_memory_backend_get_share, host_memory_backend_set_share,
  410. &error_abort);
  411. object_class_property_set_description(oc, "share",
  412. "Mark the memory as private to QEMU or shared", &error_abort);
  413. object_class_property_add_bool(oc, "x-use-canonical-path-for-ramblock-id",
  414. host_memory_backend_get_use_canonical_path,
  415. host_memory_backend_set_use_canonical_path, &error_abort);
  416. }
  417. static const TypeInfo host_memory_backend_info = {
  418. .name = TYPE_MEMORY_BACKEND,
  419. .parent = TYPE_OBJECT,
  420. .abstract = true,
  421. .class_size = sizeof(HostMemoryBackendClass),
  422. .class_init = host_memory_backend_class_init,
  423. .instance_size = sizeof(HostMemoryBackend),
  424. .instance_init = host_memory_backend_init,
  425. .instance_post_init = host_memory_backend_post_init,
  426. .interfaces = (InterfaceInfo[]) {
  427. { TYPE_USER_CREATABLE },
  428. { }
  429. }
  430. };
  431. static void register_types(void)
  432. {
  433. type_register_static(&host_memory_backend_info);
  434. }
  435. type_init(register_types);