2
0

mmap-alloc.c 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212
  1. /*
  2. * Support for RAM backed by mmaped host memory.
  3. *
  4. * Copyright (c) 2015 Red Hat, Inc.
  5. *
  6. * Authors:
  7. * Michael S. Tsirkin <mst@redhat.com>
  8. *
  9. * This work is licensed under the terms of the GNU GPL, version 2 or
  10. * later. See the COPYING file in the top-level directory.
  11. */
  12. #ifdef CONFIG_LINUX
  13. #include <linux/mman.h>
  14. #else /* !CONFIG_LINUX */
  15. #define MAP_SYNC 0x0
  16. #define MAP_SHARED_VALIDATE 0x0
  17. #endif /* CONFIG_LINUX */
  18. #include "qemu/osdep.h"
  19. #include "qemu/mmap-alloc.h"
  20. #include "qemu/host-utils.h"
  21. #define HUGETLBFS_MAGIC 0x958458f6
  22. #ifdef CONFIG_LINUX
  23. #include <sys/vfs.h>
  24. #endif
  25. size_t qemu_fd_getpagesize(int fd)
  26. {
  27. #ifdef CONFIG_LINUX
  28. struct statfs fs;
  29. int ret;
  30. if (fd != -1) {
  31. do {
  32. ret = fstatfs(fd, &fs);
  33. } while (ret != 0 && errno == EINTR);
  34. if (ret == 0 && fs.f_type == HUGETLBFS_MAGIC) {
  35. return fs.f_bsize;
  36. }
  37. }
  38. #ifdef __sparc__
  39. /* SPARC Linux needs greater alignment than the pagesize */
  40. return QEMU_VMALLOC_ALIGN;
  41. #endif
  42. #endif
  43. return qemu_real_host_page_size;
  44. }
  45. size_t qemu_mempath_getpagesize(const char *mem_path)
  46. {
  47. #ifdef CONFIG_LINUX
  48. struct statfs fs;
  49. int ret;
  50. if (mem_path) {
  51. do {
  52. ret = statfs(mem_path, &fs);
  53. } while (ret != 0 && errno == EINTR);
  54. if (ret != 0) {
  55. fprintf(stderr, "Couldn't statfs() memory path: %s\n",
  56. strerror(errno));
  57. exit(1);
  58. }
  59. if (fs.f_type == HUGETLBFS_MAGIC) {
  60. /* It's hugepage, return the huge page size */
  61. return fs.f_bsize;
  62. }
  63. }
  64. #ifdef __sparc__
  65. /* SPARC Linux needs greater alignment than the pagesize */
  66. return QEMU_VMALLOC_ALIGN;
  67. #endif
  68. #endif
  69. return qemu_real_host_page_size;
  70. }
  71. void *qemu_ram_mmap(int fd,
  72. size_t size,
  73. size_t align,
  74. bool shared,
  75. bool is_pmem)
  76. {
  77. int flags;
  78. int map_sync_flags = 0;
  79. int guardfd;
  80. size_t offset;
  81. size_t pagesize;
  82. size_t total;
  83. void *guardptr;
  84. void *ptr;
  85. /*
  86. * Note: this always allocates at least one extra page of virtual address
  87. * space, even if size is already aligned.
  88. */
  89. total = size + align;
  90. #if defined(__powerpc64__) && defined(__linux__)
  91. /* On ppc64 mappings in the same segment (aka slice) must share the same
  92. * page size. Since we will be re-allocating part of this segment
  93. * from the supplied fd, we should make sure to use the same page size, to
  94. * this end we mmap the supplied fd. In this case, set MAP_NORESERVE to
  95. * avoid allocating backing store memory.
  96. * We do this unless we are using the system page size, in which case
  97. * anonymous memory is OK.
  98. */
  99. flags = MAP_PRIVATE;
  100. pagesize = qemu_fd_getpagesize(fd);
  101. if (fd == -1 || pagesize == qemu_real_host_page_size) {
  102. guardfd = -1;
  103. flags |= MAP_ANONYMOUS;
  104. } else {
  105. guardfd = fd;
  106. flags |= MAP_NORESERVE;
  107. }
  108. #else
  109. guardfd = -1;
  110. pagesize = qemu_real_host_page_size;
  111. flags = MAP_PRIVATE | MAP_ANONYMOUS;
  112. #endif
  113. guardptr = mmap(0, total, PROT_NONE, flags, guardfd, 0);
  114. if (guardptr == MAP_FAILED) {
  115. return MAP_FAILED;
  116. }
  117. assert(is_power_of_2(align));
  118. /* Always align to host page size */
  119. assert(align >= pagesize);
  120. flags = MAP_FIXED;
  121. flags |= fd == -1 ? MAP_ANONYMOUS : 0;
  122. flags |= shared ? MAP_SHARED : MAP_PRIVATE;
  123. if (shared && is_pmem) {
  124. map_sync_flags = MAP_SYNC | MAP_SHARED_VALIDATE;
  125. }
  126. offset = QEMU_ALIGN_UP((uintptr_t)guardptr, align) - (uintptr_t)guardptr;
  127. ptr = mmap(guardptr + offset, size, PROT_READ | PROT_WRITE,
  128. flags | map_sync_flags, fd, 0);
  129. if (ptr == MAP_FAILED && map_sync_flags) {
  130. if (errno == ENOTSUP) {
  131. char *proc_link, *file_name;
  132. int len;
  133. proc_link = g_strdup_printf("/proc/self/fd/%d", fd);
  134. file_name = g_malloc0(PATH_MAX);
  135. len = readlink(proc_link, file_name, PATH_MAX - 1);
  136. if (len < 0) {
  137. len = 0;
  138. }
  139. file_name[len] = '\0';
  140. fprintf(stderr, "Warning: requesting persistence across crashes "
  141. "for backend file %s failed. Proceeding without "
  142. "persistence, data might become corrupted in case of host "
  143. "crash.\n", file_name);
  144. g_free(proc_link);
  145. g_free(file_name);
  146. }
  147. /*
  148. * if map failed with MAP_SHARED_VALIDATE | MAP_SYNC,
  149. * we will remove these flags to handle compatibility.
  150. */
  151. ptr = mmap(guardptr + offset, size, PROT_READ | PROT_WRITE,
  152. flags, fd, 0);
  153. }
  154. if (ptr == MAP_FAILED) {
  155. munmap(guardptr, total);
  156. return MAP_FAILED;
  157. }
  158. if (offset > 0) {
  159. munmap(guardptr, offset);
  160. }
  161. /*
  162. * Leave a single PROT_NONE page allocated after the RAM block, to serve as
  163. * a guard page guarding against potential buffer overflows.
  164. */
  165. total -= offset;
  166. if (total > size + pagesize) {
  167. munmap(ptr + size + pagesize, total - size - pagesize);
  168. }
  169. return ptr;
  170. }
  171. void qemu_ram_munmap(int fd, void *ptr, size_t size)
  172. {
  173. size_t pagesize;
  174. if (ptr) {
  175. /* Unmap both the RAM block and the guard page */
  176. #if defined(__powerpc64__) && defined(__linux__)
  177. pagesize = qemu_fd_getpagesize(fd);
  178. #else
  179. pagesize = qemu_real_host_page_size;
  180. #endif
  181. munmap(ptr, size + pagesize);
  182. }
  183. }