2
0

mmap.c 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956
  1. /*
  2. * mmap support for qemu
  3. *
  4. * Copyright (c) 2003 Fabrice Bellard
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation; either version 2 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with this program; if not, see <http://www.gnu.org/licenses/>.
  18. */
  19. #include "qemu/osdep.h"
  20. #include "trace.h"
  21. #include "exec/log.h"
  22. #include "qemu.h"
  23. #include "user-internals.h"
  24. #include "user-mmap.h"
  25. #include "target_mman.h"
  26. static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
  27. static __thread int mmap_lock_count;
  28. void mmap_lock(void)
  29. {
  30. if (mmap_lock_count++ == 0) {
  31. pthread_mutex_lock(&mmap_mutex);
  32. }
  33. }
  34. void mmap_unlock(void)
  35. {
  36. if (--mmap_lock_count == 0) {
  37. pthread_mutex_unlock(&mmap_mutex);
  38. }
  39. }
  40. bool have_mmap_lock(void)
  41. {
  42. return mmap_lock_count > 0 ? true : false;
  43. }
  44. /* Grab lock to make sure things are in a consistent state after fork(). */
  45. void mmap_fork_start(void)
  46. {
  47. if (mmap_lock_count)
  48. abort();
  49. pthread_mutex_lock(&mmap_mutex);
  50. }
  51. void mmap_fork_end(int child)
  52. {
  53. if (child)
  54. pthread_mutex_init(&mmap_mutex, NULL);
  55. else
  56. pthread_mutex_unlock(&mmap_mutex);
  57. }
  58. /*
  59. * Validate target prot bitmask.
  60. * Return the prot bitmask for the host in *HOST_PROT.
  61. * Return 0 if the target prot bitmask is invalid, otherwise
  62. * the internal qemu page_flags (which will include PAGE_VALID).
  63. */
  64. static int validate_prot_to_pageflags(int *host_prot, int prot)
  65. {
  66. int valid = PROT_READ | PROT_WRITE | PROT_EXEC | TARGET_PROT_SEM;
  67. int page_flags = (prot & PAGE_BITS) | PAGE_VALID;
  68. /*
  69. * For the host, we need not pass anything except read/write/exec.
  70. * While PROT_SEM is allowed by all hosts, it is also ignored, so
  71. * don't bother transforming guest bit to host bit. Any other
  72. * target-specific prot bits will not be understood by the host
  73. * and will need to be encoded into page_flags for qemu emulation.
  74. *
  75. * Pages that are executable by the guest will never be executed
  76. * by the host, but the host will need to be able to read them.
  77. */
  78. *host_prot = (prot & (PROT_READ | PROT_WRITE))
  79. | (prot & PROT_EXEC ? PROT_READ : 0);
  80. #ifdef TARGET_AARCH64
  81. {
  82. ARMCPU *cpu = ARM_CPU(thread_cpu);
  83. /*
  84. * The PROT_BTI bit is only accepted if the cpu supports the feature.
  85. * Since this is the unusual case, don't bother checking unless
  86. * the bit has been requested. If set and valid, record the bit
  87. * within QEMU's page_flags.
  88. */
  89. if ((prot & TARGET_PROT_BTI) && cpu_isar_feature(aa64_bti, cpu)) {
  90. valid |= TARGET_PROT_BTI;
  91. page_flags |= PAGE_BTI;
  92. }
  93. /* Similarly for the PROT_MTE bit. */
  94. if ((prot & TARGET_PROT_MTE) && cpu_isar_feature(aa64_mte, cpu)) {
  95. valid |= TARGET_PROT_MTE;
  96. page_flags |= PAGE_MTE;
  97. }
  98. }
  99. #elif defined(TARGET_HPPA)
  100. valid |= PROT_GROWSDOWN | PROT_GROWSUP;
  101. #endif
  102. return prot & ~valid ? 0 : page_flags;
  103. }
  104. /* NOTE: all the constants are the HOST ones, but addresses are target. */
  105. int target_mprotect(abi_ulong start, abi_ulong len, int target_prot)
  106. {
  107. abi_ulong end, host_start, host_end, addr;
  108. int prot1, ret, page_flags, host_prot;
  109. trace_target_mprotect(start, len, target_prot);
  110. if ((start & ~TARGET_PAGE_MASK) != 0) {
  111. return -TARGET_EINVAL;
  112. }
  113. page_flags = validate_prot_to_pageflags(&host_prot, target_prot);
  114. if (!page_flags) {
  115. return -TARGET_EINVAL;
  116. }
  117. len = TARGET_PAGE_ALIGN(len);
  118. end = start + len;
  119. if (!guest_range_valid_untagged(start, len)) {
  120. return -TARGET_ENOMEM;
  121. }
  122. if (len == 0) {
  123. return 0;
  124. }
  125. mmap_lock();
  126. host_start = start & qemu_host_page_mask;
  127. host_end = HOST_PAGE_ALIGN(end);
  128. if (start > host_start) {
  129. /* handle host page containing start */
  130. prot1 = host_prot;
  131. for (addr = host_start; addr < start; addr += TARGET_PAGE_SIZE) {
  132. prot1 |= page_get_flags(addr);
  133. }
  134. if (host_end == host_start + qemu_host_page_size) {
  135. for (addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
  136. prot1 |= page_get_flags(addr);
  137. }
  138. end = host_end;
  139. }
  140. ret = mprotect(g2h_untagged(host_start), qemu_host_page_size,
  141. prot1 & PAGE_BITS);
  142. if (ret != 0) {
  143. goto error;
  144. }
  145. host_start += qemu_host_page_size;
  146. }
  147. if (end < host_end) {
  148. prot1 = host_prot;
  149. for (addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
  150. prot1 |= page_get_flags(addr);
  151. }
  152. ret = mprotect(g2h_untagged(host_end - qemu_host_page_size),
  153. qemu_host_page_size, prot1 & PAGE_BITS);
  154. if (ret != 0) {
  155. goto error;
  156. }
  157. host_end -= qemu_host_page_size;
  158. }
  159. /* handle the pages in the middle */
  160. if (host_start < host_end) {
  161. ret = mprotect(g2h_untagged(host_start),
  162. host_end - host_start, host_prot);
  163. if (ret != 0) {
  164. goto error;
  165. }
  166. }
  167. page_set_flags(start, start + len - 1, page_flags);
  168. ret = 0;
  169. error:
  170. mmap_unlock();
  171. return ret;
  172. }
  173. /* map an incomplete host page */
  174. static int mmap_frag(abi_ulong real_start,
  175. abi_ulong start, abi_ulong end,
  176. int prot, int flags, int fd, abi_ulong offset)
  177. {
  178. abi_ulong real_end, addr;
  179. void *host_start;
  180. int prot1, prot_new;
  181. real_end = real_start + qemu_host_page_size;
  182. host_start = g2h_untagged(real_start);
  183. /* get the protection of the target pages outside the mapping */
  184. prot1 = 0;
  185. for(addr = real_start; addr < real_end; addr++) {
  186. if (addr < start || addr >= end)
  187. prot1 |= page_get_flags(addr);
  188. }
  189. if (prot1 == 0) {
  190. /* no page was there, so we allocate one */
  191. void *p = mmap(host_start, qemu_host_page_size, prot,
  192. flags | MAP_ANONYMOUS, -1, 0);
  193. if (p == MAP_FAILED)
  194. return -1;
  195. prot1 = prot;
  196. }
  197. prot1 &= PAGE_BITS;
  198. prot_new = prot | prot1;
  199. if (!(flags & MAP_ANONYMOUS)) {
  200. /* msync() won't work here, so we return an error if write is
  201. possible while it is a shared mapping */
  202. if ((flags & MAP_TYPE) == MAP_SHARED &&
  203. (prot & PROT_WRITE))
  204. return -1;
  205. /* adjust protection to be able to read */
  206. if (!(prot1 & PROT_WRITE))
  207. mprotect(host_start, qemu_host_page_size, prot1 | PROT_WRITE);
  208. /* read the corresponding file data */
  209. if (pread(fd, g2h_untagged(start), end - start, offset) == -1)
  210. return -1;
  211. /* put final protection */
  212. if (prot_new != (prot1 | PROT_WRITE))
  213. mprotect(host_start, qemu_host_page_size, prot_new);
  214. } else {
  215. if (prot_new != prot1) {
  216. mprotect(host_start, qemu_host_page_size, prot_new);
  217. }
  218. if (prot_new & PROT_WRITE) {
  219. memset(g2h_untagged(start), 0, end - start);
  220. }
  221. }
  222. return 0;
  223. }
  224. #if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
  225. #ifdef TARGET_AARCH64
  226. # define TASK_UNMAPPED_BASE 0x5500000000
  227. #else
  228. # define TASK_UNMAPPED_BASE (1ul << 38)
  229. #endif
  230. #else
  231. #ifdef TARGET_HPPA
  232. # define TASK_UNMAPPED_BASE 0xfa000000
  233. #else
  234. # define TASK_UNMAPPED_BASE 0x40000000
  235. #endif
  236. #endif
  237. abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
  238. unsigned long last_brk;
  239. /* Subroutine of mmap_find_vma, used when we have pre-allocated a chunk
  240. of guest address space. */
  241. static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size,
  242. abi_ulong align)
  243. {
  244. abi_ulong addr, end_addr, incr = qemu_host_page_size;
  245. int prot;
  246. bool looped = false;
  247. if (size > reserved_va) {
  248. return (abi_ulong)-1;
  249. }
  250. /* Note that start and size have already been aligned by mmap_find_vma. */
  251. end_addr = start + size;
  252. if (start > reserved_va - size) {
  253. /* Start at the top of the address space. */
  254. end_addr = ((reserved_va + 1 - size) & -align) + size;
  255. looped = true;
  256. }
  257. /* Search downward from END_ADDR, checking to see if a page is in use. */
  258. addr = end_addr;
  259. while (1) {
  260. addr -= incr;
  261. if (addr > end_addr) {
  262. if (looped) {
  263. /* Failure. The entire address space has been searched. */
  264. return (abi_ulong)-1;
  265. }
  266. /* Re-start at the top of the address space. */
  267. addr = end_addr = ((reserved_va + 1 - size) & -align) + size;
  268. looped = true;
  269. } else {
  270. prot = page_get_flags(addr);
  271. if (prot) {
  272. /* Page in use. Restart below this page. */
  273. addr = end_addr = ((addr - size) & -align) + size;
  274. } else if (addr && addr + size == end_addr) {
  275. /* Success! All pages between ADDR and END_ADDR are free. */
  276. if (start == mmap_next_start) {
  277. mmap_next_start = addr;
  278. }
  279. return addr;
  280. }
  281. }
  282. }
  283. }
  284. /*
  285. * Find and reserve a free memory area of size 'size'. The search
  286. * starts at 'start'.
  287. * It must be called with mmap_lock() held.
  288. * Return -1 if error.
  289. */
  290. abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align)
  291. {
  292. void *ptr, *prev;
  293. abi_ulong addr;
  294. int wrapped, repeat;
  295. align = MAX(align, qemu_host_page_size);
  296. /* If 'start' == 0, then a default start address is used. */
  297. if (start == 0) {
  298. start = mmap_next_start;
  299. } else {
  300. start &= qemu_host_page_mask;
  301. }
  302. start = ROUND_UP(start, align);
  303. size = HOST_PAGE_ALIGN(size);
  304. if (reserved_va) {
  305. return mmap_find_vma_reserved(start, size, align);
  306. }
  307. addr = start;
  308. wrapped = repeat = 0;
  309. prev = 0;
  310. for (;; prev = ptr) {
  311. /*
  312. * Reserve needed memory area to avoid a race.
  313. * It should be discarded using:
  314. * - mmap() with MAP_FIXED flag
  315. * - mremap() with MREMAP_FIXED flag
  316. * - shmat() with SHM_REMAP flag
  317. */
  318. ptr = mmap(g2h_untagged(addr), size, PROT_NONE,
  319. MAP_ANONYMOUS|MAP_PRIVATE|MAP_NORESERVE, -1, 0);
  320. /* ENOMEM, if host address space has no memory */
  321. if (ptr == MAP_FAILED) {
  322. return (abi_ulong)-1;
  323. }
  324. /* Count the number of sequential returns of the same address.
  325. This is used to modify the search algorithm below. */
  326. repeat = (ptr == prev ? repeat + 1 : 0);
  327. if (h2g_valid(ptr + size - 1)) {
  328. addr = h2g(ptr);
  329. if ((addr & (align - 1)) == 0) {
  330. /* Success. */
  331. if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) {
  332. mmap_next_start = addr + size;
  333. }
  334. return addr;
  335. }
  336. /* The address is not properly aligned for the target. */
  337. switch (repeat) {
  338. case 0:
  339. /* Assume the result that the kernel gave us is the
  340. first with enough free space, so start again at the
  341. next higher target page. */
  342. addr = ROUND_UP(addr, align);
  343. break;
  344. case 1:
  345. /* Sometimes the kernel decides to perform the allocation
  346. at the top end of memory instead. */
  347. addr &= -align;
  348. break;
  349. case 2:
  350. /* Start over at low memory. */
  351. addr = 0;
  352. break;
  353. default:
  354. /* Fail. This unaligned block must the last. */
  355. addr = -1;
  356. break;
  357. }
  358. } else {
  359. /* Since the result the kernel gave didn't fit, start
  360. again at low memory. If any repetition, fail. */
  361. addr = (repeat ? -1 : 0);
  362. }
  363. /* Unmap and try again. */
  364. munmap(ptr, size);
  365. /* ENOMEM if we checked the whole of the target address space. */
  366. if (addr == (abi_ulong)-1) {
  367. return (abi_ulong)-1;
  368. } else if (addr == 0) {
  369. if (wrapped) {
  370. return (abi_ulong)-1;
  371. }
  372. wrapped = 1;
  373. /* Don't actually use 0 when wrapping, instead indicate
  374. that we'd truly like an allocation in low memory. */
  375. addr = (mmap_min_addr > TARGET_PAGE_SIZE
  376. ? TARGET_PAGE_ALIGN(mmap_min_addr)
  377. : TARGET_PAGE_SIZE);
  378. } else if (wrapped && addr >= start) {
  379. return (abi_ulong)-1;
  380. }
  381. }
  382. }
  383. /* NOTE: all the constants are the HOST ones */
  384. abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot,
  385. int flags, int fd, abi_ulong offset)
  386. {
  387. abi_ulong ret, end, real_start, real_end, retaddr, host_offset, host_len,
  388. passthrough_start = -1, passthrough_end = -1;
  389. int page_flags, host_prot;
  390. mmap_lock();
  391. trace_target_mmap(start, len, target_prot, flags, fd, offset);
  392. if (!len) {
  393. errno = EINVAL;
  394. goto fail;
  395. }
  396. page_flags = validate_prot_to_pageflags(&host_prot, target_prot);
  397. if (!page_flags) {
  398. errno = EINVAL;
  399. goto fail;
  400. }
  401. /* Also check for overflows... */
  402. len = TARGET_PAGE_ALIGN(len);
  403. if (!len) {
  404. errno = ENOMEM;
  405. goto fail;
  406. }
  407. if (offset & ~TARGET_PAGE_MASK) {
  408. errno = EINVAL;
  409. goto fail;
  410. }
  411. /*
  412. * If we're mapping shared memory, ensure we generate code for parallel
  413. * execution and flush old translations. This will work up to the level
  414. * supported by the host -- anything that requires EXCP_ATOMIC will not
  415. * be atomic with respect to an external process.
  416. */
  417. if (flags & MAP_SHARED) {
  418. CPUState *cpu = thread_cpu;
  419. if (!(cpu->tcg_cflags & CF_PARALLEL)) {
  420. cpu->tcg_cflags |= CF_PARALLEL;
  421. tb_flush(cpu);
  422. }
  423. }
  424. real_start = start & qemu_host_page_mask;
  425. host_offset = offset & qemu_host_page_mask;
  426. /* If the user is asking for the kernel to find a location, do that
  427. before we truncate the length for mapping files below. */
  428. if (!(flags & MAP_FIXED)) {
  429. host_len = len + offset - host_offset;
  430. host_len = HOST_PAGE_ALIGN(host_len);
  431. start = mmap_find_vma(real_start, host_len, TARGET_PAGE_SIZE);
  432. if (start == (abi_ulong)-1) {
  433. errno = ENOMEM;
  434. goto fail;
  435. }
  436. }
  437. /* When mapping files into a memory area larger than the file, accesses
  438. to pages beyond the file size will cause a SIGBUS.
  439. For example, if mmaping a file of 100 bytes on a host with 4K pages
  440. emulating a target with 8K pages, the target expects to be able to
  441. access the first 8K. But the host will trap us on any access beyond
  442. 4K.
  443. When emulating a target with a larger page-size than the hosts, we
  444. may need to truncate file maps at EOF and add extra anonymous pages
  445. up to the targets page boundary. */
  446. if ((qemu_real_host_page_size() < qemu_host_page_size) &&
  447. !(flags & MAP_ANONYMOUS)) {
  448. struct stat sb;
  449. if (fstat (fd, &sb) == -1)
  450. goto fail;
  451. /* Are we trying to create a map beyond EOF?. */
  452. if (offset + len > sb.st_size) {
  453. /* If so, truncate the file map at eof aligned with
  454. the hosts real pagesize. Additional anonymous maps
  455. will be created beyond EOF. */
  456. len = REAL_HOST_PAGE_ALIGN(sb.st_size - offset);
  457. }
  458. }
  459. if (!(flags & MAP_FIXED)) {
  460. unsigned long host_start;
  461. void *p;
  462. host_len = len + offset - host_offset;
  463. host_len = HOST_PAGE_ALIGN(host_len);
  464. /* Note: we prefer to control the mapping address. It is
  465. especially important if qemu_host_page_size >
  466. qemu_real_host_page_size */
  467. p = mmap(g2h_untagged(start), host_len, host_prot,
  468. flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
  469. if (p == MAP_FAILED) {
  470. goto fail;
  471. }
  472. /* update start so that it points to the file position at 'offset' */
  473. host_start = (unsigned long)p;
  474. if (!(flags & MAP_ANONYMOUS)) {
  475. p = mmap(g2h_untagged(start), len, host_prot,
  476. flags | MAP_FIXED, fd, host_offset);
  477. if (p == MAP_FAILED) {
  478. munmap(g2h_untagged(start), host_len);
  479. goto fail;
  480. }
  481. host_start += offset - host_offset;
  482. }
  483. start = h2g(host_start);
  484. passthrough_start = start;
  485. passthrough_end = start + len;
  486. } else {
  487. if (start & ~TARGET_PAGE_MASK) {
  488. errno = EINVAL;
  489. goto fail;
  490. }
  491. end = start + len;
  492. real_end = HOST_PAGE_ALIGN(end);
  493. /*
  494. * Test if requested memory area fits target address space
  495. * It can fail only on 64-bit host with 32-bit target.
  496. * On any other target/host host mmap() handles this error correctly.
  497. */
  498. if (end < start || !guest_range_valid_untagged(start, len)) {
  499. errno = ENOMEM;
  500. goto fail;
  501. }
  502. /* worst case: we cannot map the file because the offset is not
  503. aligned, so we read it */
  504. if (!(flags & MAP_ANONYMOUS) &&
  505. (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) {
  506. /* msync() won't work here, so we return an error if write is
  507. possible while it is a shared mapping */
  508. if ((flags & MAP_TYPE) == MAP_SHARED &&
  509. (host_prot & PROT_WRITE)) {
  510. errno = EINVAL;
  511. goto fail;
  512. }
  513. retaddr = target_mmap(start, len, target_prot | PROT_WRITE,
  514. MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
  515. -1, 0);
  516. if (retaddr == -1)
  517. goto fail;
  518. if (pread(fd, g2h_untagged(start), len, offset) == -1)
  519. goto fail;
  520. if (!(host_prot & PROT_WRITE)) {
  521. ret = target_mprotect(start, len, target_prot);
  522. assert(ret == 0);
  523. }
  524. goto the_end;
  525. }
  526. /* handle the start of the mapping */
  527. if (start > real_start) {
  528. if (real_end == real_start + qemu_host_page_size) {
  529. /* one single host page */
  530. ret = mmap_frag(real_start, start, end,
  531. host_prot, flags, fd, offset);
  532. if (ret == -1)
  533. goto fail;
  534. goto the_end1;
  535. }
  536. ret = mmap_frag(real_start, start, real_start + qemu_host_page_size,
  537. host_prot, flags, fd, offset);
  538. if (ret == -1)
  539. goto fail;
  540. real_start += qemu_host_page_size;
  541. }
  542. /* handle the end of the mapping */
  543. if (end < real_end) {
  544. ret = mmap_frag(real_end - qemu_host_page_size,
  545. real_end - qemu_host_page_size, end,
  546. host_prot, flags, fd,
  547. offset + real_end - qemu_host_page_size - start);
  548. if (ret == -1)
  549. goto fail;
  550. real_end -= qemu_host_page_size;
  551. }
  552. /* map the middle (easier) */
  553. if (real_start < real_end) {
  554. void *p;
  555. unsigned long offset1;
  556. if (flags & MAP_ANONYMOUS)
  557. offset1 = 0;
  558. else
  559. offset1 = offset + real_start - start;
  560. p = mmap(g2h_untagged(real_start), real_end - real_start,
  561. host_prot, flags, fd, offset1);
  562. if (p == MAP_FAILED)
  563. goto fail;
  564. passthrough_start = real_start;
  565. passthrough_end = real_end;
  566. }
  567. }
  568. the_end1:
  569. if (flags & MAP_ANONYMOUS) {
  570. page_flags |= PAGE_ANON;
  571. }
  572. page_flags |= PAGE_RESET;
  573. if (passthrough_start == passthrough_end) {
  574. page_set_flags(start, start + len - 1, page_flags);
  575. } else {
  576. if (start < passthrough_start) {
  577. page_set_flags(start, passthrough_start - 1, page_flags);
  578. }
  579. page_set_flags(passthrough_start, passthrough_end - 1,
  580. page_flags | PAGE_PASSTHROUGH);
  581. if (passthrough_end < start + len) {
  582. page_set_flags(passthrough_end, start + len - 1, page_flags);
  583. }
  584. }
  585. the_end:
  586. trace_target_mmap_complete(start);
  587. if (qemu_loglevel_mask(CPU_LOG_PAGE)) {
  588. FILE *f = qemu_log_trylock();
  589. if (f) {
  590. fprintf(f, "page layout changed following mmap\n");
  591. page_dump(f);
  592. qemu_log_unlock(f);
  593. }
  594. }
  595. mmap_unlock();
  596. return start;
  597. fail:
  598. mmap_unlock();
  599. return -1;
  600. }
  601. static void mmap_reserve(abi_ulong start, abi_ulong size)
  602. {
  603. abi_ulong real_start;
  604. abi_ulong real_end;
  605. abi_ulong addr;
  606. abi_ulong end;
  607. int prot;
  608. real_start = start & qemu_host_page_mask;
  609. real_end = HOST_PAGE_ALIGN(start + size);
  610. end = start + size;
  611. if (start > real_start) {
  612. /* handle host page containing start */
  613. prot = 0;
  614. for (addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
  615. prot |= page_get_flags(addr);
  616. }
  617. if (real_end == real_start + qemu_host_page_size) {
  618. for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
  619. prot |= page_get_flags(addr);
  620. }
  621. end = real_end;
  622. }
  623. if (prot != 0)
  624. real_start += qemu_host_page_size;
  625. }
  626. if (end < real_end) {
  627. prot = 0;
  628. for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
  629. prot |= page_get_flags(addr);
  630. }
  631. if (prot != 0)
  632. real_end -= qemu_host_page_size;
  633. }
  634. if (real_start != real_end) {
  635. mmap(g2h_untagged(real_start), real_end - real_start, PROT_NONE,
  636. MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE,
  637. -1, 0);
  638. }
  639. }
  640. int target_munmap(abi_ulong start, abi_ulong len)
  641. {
  642. abi_ulong end, real_start, real_end, addr;
  643. int prot, ret;
  644. trace_target_munmap(start, len);
  645. if (start & ~TARGET_PAGE_MASK)
  646. return -TARGET_EINVAL;
  647. len = TARGET_PAGE_ALIGN(len);
  648. if (len == 0 || !guest_range_valid_untagged(start, len)) {
  649. return -TARGET_EINVAL;
  650. }
  651. mmap_lock();
  652. end = start + len;
  653. real_start = start & qemu_host_page_mask;
  654. real_end = HOST_PAGE_ALIGN(end);
  655. if (start > real_start) {
  656. /* handle host page containing start */
  657. prot = 0;
  658. for(addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
  659. prot |= page_get_flags(addr);
  660. }
  661. if (real_end == real_start + qemu_host_page_size) {
  662. for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
  663. prot |= page_get_flags(addr);
  664. }
  665. end = real_end;
  666. }
  667. if (prot != 0)
  668. real_start += qemu_host_page_size;
  669. }
  670. if (end < real_end) {
  671. prot = 0;
  672. for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
  673. prot |= page_get_flags(addr);
  674. }
  675. if (prot != 0)
  676. real_end -= qemu_host_page_size;
  677. }
  678. ret = 0;
  679. /* unmap what we can */
  680. if (real_start < real_end) {
  681. if (reserved_va) {
  682. mmap_reserve(real_start, real_end - real_start);
  683. } else {
  684. ret = munmap(g2h_untagged(real_start), real_end - real_start);
  685. }
  686. }
  687. if (ret == 0) {
  688. page_set_flags(start, start + len - 1, 0);
  689. }
  690. mmap_unlock();
  691. return ret;
  692. }
  693. abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
  694. abi_ulong new_size, unsigned long flags,
  695. abi_ulong new_addr)
  696. {
  697. int prot;
  698. void *host_addr;
  699. if (!guest_range_valid_untagged(old_addr, old_size) ||
  700. ((flags & MREMAP_FIXED) &&
  701. !guest_range_valid_untagged(new_addr, new_size)) ||
  702. ((flags & MREMAP_MAYMOVE) == 0 &&
  703. !guest_range_valid_untagged(old_addr, new_size))) {
  704. errno = ENOMEM;
  705. return -1;
  706. }
  707. mmap_lock();
  708. if (flags & MREMAP_FIXED) {
  709. host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
  710. flags, g2h_untagged(new_addr));
  711. if (reserved_va && host_addr != MAP_FAILED) {
  712. /* If new and old addresses overlap then the above mremap will
  713. already have failed with EINVAL. */
  714. mmap_reserve(old_addr, old_size);
  715. }
  716. } else if (flags & MREMAP_MAYMOVE) {
  717. abi_ulong mmap_start;
  718. mmap_start = mmap_find_vma(0, new_size, TARGET_PAGE_SIZE);
  719. if (mmap_start == -1) {
  720. errno = ENOMEM;
  721. host_addr = MAP_FAILED;
  722. } else {
  723. host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
  724. flags | MREMAP_FIXED,
  725. g2h_untagged(mmap_start));
  726. if (reserved_va) {
  727. mmap_reserve(old_addr, old_size);
  728. }
  729. }
  730. } else {
  731. int prot = 0;
  732. if (reserved_va && old_size < new_size) {
  733. abi_ulong addr;
  734. for (addr = old_addr + old_size;
  735. addr < old_addr + new_size;
  736. addr++) {
  737. prot |= page_get_flags(addr);
  738. }
  739. }
  740. if (prot == 0) {
  741. host_addr = mremap(g2h_untagged(old_addr),
  742. old_size, new_size, flags);
  743. if (host_addr != MAP_FAILED) {
  744. /* Check if address fits target address space */
  745. if (!guest_range_valid_untagged(h2g(host_addr), new_size)) {
  746. /* Revert mremap() changes */
  747. host_addr = mremap(g2h_untagged(old_addr),
  748. new_size, old_size, flags);
  749. errno = ENOMEM;
  750. host_addr = MAP_FAILED;
  751. } else if (reserved_va && old_size > new_size) {
  752. mmap_reserve(old_addr + old_size, old_size - new_size);
  753. }
  754. }
  755. } else {
  756. errno = ENOMEM;
  757. host_addr = MAP_FAILED;
  758. }
  759. }
  760. if (host_addr == MAP_FAILED) {
  761. new_addr = -1;
  762. } else {
  763. new_addr = h2g(host_addr);
  764. prot = page_get_flags(old_addr);
  765. page_set_flags(old_addr, old_addr + old_size - 1, 0);
  766. page_set_flags(new_addr, new_addr + new_size - 1,
  767. prot | PAGE_VALID | PAGE_RESET);
  768. }
  769. mmap_unlock();
  770. return new_addr;
  771. }
  772. static bool can_passthrough_madvise(abi_ulong start, abi_ulong end)
  773. {
  774. ulong addr;
  775. if ((start | end) & ~qemu_host_page_mask) {
  776. return false;
  777. }
  778. for (addr = start; addr < end; addr += TARGET_PAGE_SIZE) {
  779. if (!(page_get_flags(addr) & PAGE_PASSTHROUGH)) {
  780. return false;
  781. }
  782. }
  783. return true;
  784. }
  785. abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice)
  786. {
  787. abi_ulong len, end;
  788. int ret = 0;
  789. if (start & ~TARGET_PAGE_MASK) {
  790. return -TARGET_EINVAL;
  791. }
  792. len = TARGET_PAGE_ALIGN(len_in);
  793. if (len_in && !len) {
  794. return -TARGET_EINVAL;
  795. }
  796. end = start + len;
  797. if (end < start) {
  798. return -TARGET_EINVAL;
  799. }
  800. if (end == start) {
  801. return 0;
  802. }
  803. if (!guest_range_valid_untagged(start, len)) {
  804. return -TARGET_EINVAL;
  805. }
  806. /* Translate for some architectures which have different MADV_xxx values */
  807. switch (advice) {
  808. case TARGET_MADV_DONTNEED: /* alpha */
  809. advice = MADV_DONTNEED;
  810. break;
  811. case TARGET_MADV_WIPEONFORK: /* parisc */
  812. advice = MADV_WIPEONFORK;
  813. break;
  814. case TARGET_MADV_KEEPONFORK: /* parisc */
  815. advice = MADV_KEEPONFORK;
  816. break;
  817. /* we do not care about the other MADV_xxx values yet */
  818. }
  819. /*
  820. * Most advice values are hints, so ignoring and returning success is ok.
  821. *
  822. * However, some advice values such as MADV_DONTNEED, MADV_WIPEONFORK and
  823. * MADV_KEEPONFORK are not hints and need to be emulated.
  824. *
  825. * A straight passthrough for those may not be safe because qemu sometimes
  826. * turns private file-backed mappings into anonymous mappings.
  827. * can_passthrough_madvise() helps to check if a passthrough is possible by
  828. * comparing mappings that are known to have the same semantics in the host
  829. * and the guest. In this case passthrough is safe.
  830. *
  831. * We pass through MADV_WIPEONFORK and MADV_KEEPONFORK if possible and
  832. * return failure if not.
  833. *
  834. * MADV_DONTNEED is passed through as well, if possible.
  835. * If passthrough isn't possible, we nevertheless (wrongly!) return
  836. * success, which is broken but some userspace programs fail to work
  837. * otherwise. Completely implementing such emulation is quite complicated
  838. * though.
  839. */
  840. mmap_lock();
  841. switch (advice) {
  842. case MADV_WIPEONFORK:
  843. case MADV_KEEPONFORK:
  844. ret = -EINVAL;
  845. /* fall through */
  846. case MADV_DONTNEED:
  847. if (can_passthrough_madvise(start, end)) {
  848. ret = get_errno(madvise(g2h_untagged(start), len, advice));
  849. if ((advice == MADV_DONTNEED) && (ret == 0)) {
  850. page_reset_target_data(start, start + len - 1);
  851. }
  852. }
  853. }
  854. mmap_unlock();
  855. return ret;
  856. }