2
0

hostmem.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421
  1. /*
  2. * QEMU Host Memory Backend
  3. *
  4. * Copyright (C) 2013-2014 Red Hat Inc
  5. *
  6. * Authors:
  7. * Igor Mammedov <imammedo@redhat.com>
  8. *
  9. * This work is licensed under the terms of the GNU GPL, version 2 or later.
  10. * See the COPYING file in the top-level directory.
  11. */
  12. #include "qemu/osdep.h"
  13. #include "sysemu/hostmem.h"
  14. #include "hw/boards.h"
  15. #include "qapi/error.h"
  16. #include "qapi/visitor.h"
  17. #include "qapi-types.h"
  18. #include "qapi-visit.h"
  19. #include "qemu/config-file.h"
  20. #include "qom/object_interfaces.h"
  21. #ifdef CONFIG_NUMA
  22. #include <numaif.h>
  23. QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_DEFAULT != MPOL_DEFAULT);
  24. QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_PREFERRED != MPOL_PREFERRED);
  25. QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_BIND != MPOL_BIND);
  26. QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_INTERLEAVE != MPOL_INTERLEAVE);
  27. #endif
  28. static void
  29. host_memory_backend_get_size(Object *obj, Visitor *v, const char *name,
  30. void *opaque, Error **errp)
  31. {
  32. HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  33. uint64_t value = backend->size;
  34. visit_type_size(v, name, &value, errp);
  35. }
  36. static void
  37. host_memory_backend_set_size(Object *obj, Visitor *v, const char *name,
  38. void *opaque, Error **errp)
  39. {
  40. HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  41. Error *local_err = NULL;
  42. uint64_t value;
  43. if (memory_region_size(&backend->mr)) {
  44. error_setg(&local_err, "cannot change property value");
  45. goto out;
  46. }
  47. visit_type_size(v, name, &value, &local_err);
  48. if (local_err) {
  49. goto out;
  50. }
  51. if (!value) {
  52. error_setg(&local_err, "Property '%s.%s' doesn't take value '%"
  53. PRIu64 "'", object_get_typename(obj), name, value);
  54. goto out;
  55. }
  56. backend->size = value;
  57. out:
  58. error_propagate(errp, local_err);
  59. }
  60. static void
  61. host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name,
  62. void *opaque, Error **errp)
  63. {
  64. HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  65. uint16List *host_nodes = NULL;
  66. uint16List **node = &host_nodes;
  67. unsigned long value;
  68. value = find_first_bit(backend->host_nodes, MAX_NODES);
  69. if (value == MAX_NODES) {
  70. return;
  71. }
  72. *node = g_malloc0(sizeof(**node));
  73. (*node)->value = value;
  74. node = &(*node)->next;
  75. do {
  76. value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1);
  77. if (value == MAX_NODES) {
  78. break;
  79. }
  80. *node = g_malloc0(sizeof(**node));
  81. (*node)->value = value;
  82. node = &(*node)->next;
  83. } while (true);
  84. visit_type_uint16List(v, name, &host_nodes, errp);
  85. }
  86. static void
  87. host_memory_backend_set_host_nodes(Object *obj, Visitor *v, const char *name,
  88. void *opaque, Error **errp)
  89. {
  90. #ifdef CONFIG_NUMA
  91. HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  92. uint16List *l = NULL;
  93. visit_type_uint16List(v, name, &l, errp);
  94. while (l) {
  95. bitmap_set(backend->host_nodes, l->value, 1);
  96. l = l->next;
  97. }
  98. #else
  99. error_setg(errp, "NUMA node binding are not supported by this QEMU");
  100. #endif
  101. }
  102. static int
  103. host_memory_backend_get_policy(Object *obj, Error **errp G_GNUC_UNUSED)
  104. {
  105. HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  106. return backend->policy;
  107. }
  108. static void
  109. host_memory_backend_set_policy(Object *obj, int policy, Error **errp)
  110. {
  111. HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  112. backend->policy = policy;
  113. #ifndef CONFIG_NUMA
  114. if (policy != HOST_MEM_POLICY_DEFAULT) {
  115. error_setg(errp, "NUMA policies are not supported by this QEMU");
  116. }
  117. #endif
  118. }
  119. static bool host_memory_backend_get_merge(Object *obj, Error **errp)
  120. {
  121. HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  122. return backend->merge;
  123. }
  124. static void host_memory_backend_set_merge(Object *obj, bool value, Error **errp)
  125. {
  126. HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  127. if (!memory_region_size(&backend->mr)) {
  128. backend->merge = value;
  129. return;
  130. }
  131. if (value != backend->merge) {
  132. void *ptr = memory_region_get_ram_ptr(&backend->mr);
  133. uint64_t sz = memory_region_size(&backend->mr);
  134. qemu_madvise(ptr, sz,
  135. value ? QEMU_MADV_MERGEABLE : QEMU_MADV_UNMERGEABLE);
  136. backend->merge = value;
  137. }
  138. }
  139. static bool host_memory_backend_get_dump(Object *obj, Error **errp)
  140. {
  141. HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  142. return backend->dump;
  143. }
  144. static void host_memory_backend_set_dump(Object *obj, bool value, Error **errp)
  145. {
  146. HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  147. if (!memory_region_size(&backend->mr)) {
  148. backend->dump = value;
  149. return;
  150. }
  151. if (value != backend->dump) {
  152. void *ptr = memory_region_get_ram_ptr(&backend->mr);
  153. uint64_t sz = memory_region_size(&backend->mr);
  154. qemu_madvise(ptr, sz,
  155. value ? QEMU_MADV_DODUMP : QEMU_MADV_DONTDUMP);
  156. backend->dump = value;
  157. }
  158. }
  159. static bool host_memory_backend_get_prealloc(Object *obj, Error **errp)
  160. {
  161. HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  162. return backend->prealloc || backend->force_prealloc;
  163. }
  164. static void host_memory_backend_set_prealloc(Object *obj, bool value,
  165. Error **errp)
  166. {
  167. Error *local_err = NULL;
  168. HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  169. if (backend->force_prealloc) {
  170. if (value) {
  171. error_setg(errp,
  172. "remove -mem-prealloc to use the prealloc property");
  173. return;
  174. }
  175. }
  176. if (!memory_region_size(&backend->mr)) {
  177. backend->prealloc = value;
  178. return;
  179. }
  180. if (value && !backend->prealloc) {
  181. int fd = memory_region_get_fd(&backend->mr);
  182. void *ptr = memory_region_get_ram_ptr(&backend->mr);
  183. uint64_t sz = memory_region_size(&backend->mr);
  184. os_mem_prealloc(fd, ptr, sz, smp_cpus, &local_err);
  185. if (local_err) {
  186. error_propagate(errp, local_err);
  187. return;
  188. }
  189. backend->prealloc = true;
  190. }
  191. }
  192. static void host_memory_backend_init(Object *obj)
  193. {
  194. HostMemoryBackend *backend = MEMORY_BACKEND(obj);
  195. MachineState *machine = MACHINE(qdev_get_machine());
  196. backend->merge = machine_mem_merge(machine);
  197. backend->dump = machine_dump_guest_core(machine);
  198. backend->prealloc = mem_prealloc;
  199. }
  200. MemoryRegion *
  201. host_memory_backend_get_memory(HostMemoryBackend *backend, Error **errp)
  202. {
  203. return memory_region_size(&backend->mr) ? &backend->mr : NULL;
  204. }
  205. void host_memory_backend_set_mapped(HostMemoryBackend *backend, bool mapped)
  206. {
  207. backend->is_mapped = mapped;
  208. }
  209. bool host_memory_backend_is_mapped(HostMemoryBackend *backend)
  210. {
  211. return backend->is_mapped;
  212. }
  213. static void
  214. host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
  215. {
  216. HostMemoryBackend *backend = MEMORY_BACKEND(uc);
  217. HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc);
  218. Error *local_err = NULL;
  219. void *ptr;
  220. uint64_t sz;
  221. if (bc->alloc) {
  222. bc->alloc(backend, &local_err);
  223. if (local_err) {
  224. goto out;
  225. }
  226. ptr = memory_region_get_ram_ptr(&backend->mr);
  227. sz = memory_region_size(&backend->mr);
  228. if (backend->merge) {
  229. qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE);
  230. }
  231. if (!backend->dump) {
  232. qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP);
  233. }
  234. #ifdef CONFIG_NUMA
  235. unsigned long lastbit = find_last_bit(backend->host_nodes, MAX_NODES);
  236. /* lastbit == MAX_NODES means maxnode = 0 */
  237. unsigned long maxnode = (lastbit + 1) % (MAX_NODES + 1);
  238. /* ensure policy won't be ignored in case memory is preallocated
  239. * before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so
  240. * this doesn't catch hugepage case. */
  241. unsigned flags = MPOL_MF_STRICT | MPOL_MF_MOVE;
  242. /* check for invalid host-nodes and policies and give more verbose
  243. * error messages than mbind(). */
  244. if (maxnode && backend->policy == MPOL_DEFAULT) {
  245. error_setg(errp, "host-nodes must be empty for policy default,"
  246. " or you should explicitly specify a policy other"
  247. " than default");
  248. return;
  249. } else if (maxnode == 0 && backend->policy != MPOL_DEFAULT) {
  250. error_setg(errp, "host-nodes must be set for policy %s",
  251. HostMemPolicy_lookup[backend->policy]);
  252. return;
  253. }
  254. /* We can have up to MAX_NODES nodes, but we need to pass maxnode+1
  255. * as argument to mbind() due to an old Linux bug (feature?) which
  256. * cuts off the last specified node. This means backend->host_nodes
  257. * must have MAX_NODES+1 bits available.
  258. */
  259. assert(sizeof(backend->host_nodes) >=
  260. BITS_TO_LONGS(MAX_NODES + 1) * sizeof(unsigned long));
  261. assert(maxnode <= MAX_NODES);
  262. if (mbind(ptr, sz, backend->policy,
  263. maxnode ? backend->host_nodes : NULL, maxnode + 1, flags)) {
  264. if (backend->policy != MPOL_DEFAULT || errno != ENOSYS) {
  265. error_setg_errno(errp, errno,
  266. "cannot bind memory to host NUMA nodes");
  267. return;
  268. }
  269. }
  270. #endif
  271. /* Preallocate memory after the NUMA policy has been instantiated.
  272. * This is necessary to guarantee memory is allocated with
  273. * specified NUMA policy in place.
  274. */
  275. if (backend->prealloc) {
  276. os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz,
  277. smp_cpus, &local_err);
  278. if (local_err) {
  279. goto out;
  280. }
  281. }
  282. }
  283. out:
  284. error_propagate(errp, local_err);
  285. }
  286. static bool
  287. host_memory_backend_can_be_deleted(UserCreatable *uc, Error **errp)
  288. {
  289. if (host_memory_backend_is_mapped(MEMORY_BACKEND(uc))) {
  290. return false;
  291. } else {
  292. return true;
  293. }
  294. }
  295. static char *get_id(Object *o, Error **errp)
  296. {
  297. HostMemoryBackend *backend = MEMORY_BACKEND(o);
  298. return g_strdup(backend->id);
  299. }
  300. static void set_id(Object *o, const char *str, Error **errp)
  301. {
  302. HostMemoryBackend *backend = MEMORY_BACKEND(o);
  303. if (backend->id) {
  304. error_setg(errp, "cannot change property value");
  305. return;
  306. }
  307. backend->id = g_strdup(str);
  308. }
  309. static void
  310. host_memory_backend_class_init(ObjectClass *oc, void *data)
  311. {
  312. UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
  313. ucc->complete = host_memory_backend_memory_complete;
  314. ucc->can_be_deleted = host_memory_backend_can_be_deleted;
  315. object_class_property_add_bool(oc, "merge",
  316. host_memory_backend_get_merge,
  317. host_memory_backend_set_merge, &error_abort);
  318. object_class_property_add_bool(oc, "dump",
  319. host_memory_backend_get_dump,
  320. host_memory_backend_set_dump, &error_abort);
  321. object_class_property_add_bool(oc, "prealloc",
  322. host_memory_backend_get_prealloc,
  323. host_memory_backend_set_prealloc, &error_abort);
  324. object_class_property_add(oc, "size", "int",
  325. host_memory_backend_get_size,
  326. host_memory_backend_set_size,
  327. NULL, NULL, &error_abort);
  328. object_class_property_add(oc, "host-nodes", "int",
  329. host_memory_backend_get_host_nodes,
  330. host_memory_backend_set_host_nodes,
  331. NULL, NULL, &error_abort);
  332. object_class_property_add_enum(oc, "policy", "HostMemPolicy",
  333. HostMemPolicy_lookup,
  334. host_memory_backend_get_policy,
  335. host_memory_backend_set_policy, &error_abort);
  336. object_class_property_add_str(oc, "id", get_id, set_id, &error_abort);
  337. }
  338. static void host_memory_backend_finalize(Object *o)
  339. {
  340. HostMemoryBackend *backend = MEMORY_BACKEND(o);
  341. g_free(backend->id);
  342. }
  343. static const TypeInfo host_memory_backend_info = {
  344. .name = TYPE_MEMORY_BACKEND,
  345. .parent = TYPE_OBJECT,
  346. .abstract = true,
  347. .class_size = sizeof(HostMemoryBackendClass),
  348. .class_init = host_memory_backend_class_init,
  349. .instance_size = sizeof(HostMemoryBackend),
  350. .instance_init = host_memory_backend_init,
  351. .instance_finalize = host_memory_backend_finalize,
  352. .interfaces = (InterfaceInfo[]) {
  353. { TYPE_USER_CREATABLE },
  354. { }
  355. }
  356. };
  357. static void register_types(void)
  358. {
  359. type_register_static(&host_memory_backend_info);
  360. }
  361. type_init(register_types);