mmap.c 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658
  1. /*
  2. * mmap support for qemu
  3. *
  4. * Copyright (c) 2003 Fabrice Bellard
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation; either version 2 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with this program; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston,
  19. * MA 02110-1301, USA.
  20. */
  21. #include <stdlib.h>
  22. #include <stdio.h>
  23. #include <stdarg.h>
  24. #include <string.h>
  25. #include <unistd.h>
  26. #include <errno.h>
  27. #include <sys/types.h>
  28. #include <sys/stat.h>
  29. #include <sys/mman.h>
  30. #include <linux/mman.h>
  31. #include <linux/unistd.h>
  32. #include "qemu.h"
  33. #include "qemu-common.h"
  34. //#define DEBUG_MMAP
  35. #if defined(USE_NPTL)
  36. pthread_mutex_t mmap_mutex;
  37. static int __thread mmap_lock_count;
  38. void mmap_lock(void)
  39. {
  40. if (mmap_lock_count++ == 0) {
  41. pthread_mutex_lock(&mmap_mutex);
  42. }
  43. }
  44. void mmap_unlock(void)
  45. {
  46. if (--mmap_lock_count == 0) {
  47. pthread_mutex_unlock(&mmap_mutex);
  48. }
  49. }
  50. /* Grab lock to make sure things are in a consistent state after fork(). */
  51. void mmap_fork_start(void)
  52. {
  53. if (mmap_lock_count)
  54. abort();
  55. pthread_mutex_lock(&mmap_mutex);
  56. }
  57. void mmap_fork_end(int child)
  58. {
  59. if (child)
  60. pthread_mutex_init(&mmap_mutex, NULL);
  61. else
  62. pthread_mutex_unlock(&mmap_mutex);
  63. }
  64. #else
  65. /* We aren't threadsafe to start with, so no need to worry about locking. */
  66. void mmap_lock(void)
  67. {
  68. }
  69. void mmap_unlock(void)
  70. {
  71. }
  72. #endif
  73. void *qemu_vmalloc(size_t size)
  74. {
  75. void *p;
  76. unsigned long addr;
  77. mmap_lock();
  78. /* Use map and mark the pages as used. */
  79. p = mmap(NULL, size, PROT_READ | PROT_WRITE,
  80. MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
  81. addr = (unsigned long)p;
  82. if (addr == (target_ulong) addr) {
  83. /* Allocated region overlaps guest address space.
  84. This may recurse. */
  85. page_set_flags(addr & TARGET_PAGE_MASK, TARGET_PAGE_ALIGN(addr + size),
  86. PAGE_RESERVED);
  87. }
  88. mmap_unlock();
  89. return p;
  90. }
  91. void *qemu_malloc(size_t size)
  92. {
  93. char * p;
  94. size += 16;
  95. p = qemu_vmalloc(size);
  96. *(size_t *)p = size;
  97. return p + 16;
  98. }
  99. /* We use map, which is always zero initialized. */
  100. void * qemu_mallocz(size_t size)
  101. {
  102. return qemu_malloc(size);
  103. }
  104. void qemu_free(void *ptr)
  105. {
  106. /* FIXME: We should unmark the reserved pages here. However this gets
  107. complicated when one target page spans multiple host pages, so we
  108. don't bother. */
  109. size_t *p;
  110. p = (size_t *)((char *)ptr - 16);
  111. munmap(p, *p);
  112. }
  113. void *qemu_realloc(void *ptr, size_t size)
  114. {
  115. size_t old_size, copy;
  116. void *new_ptr;
  117. if (!ptr)
  118. return qemu_malloc(size);
  119. old_size = *(size_t *)((char *)ptr - 16);
  120. copy = old_size < size ? old_size : size;
  121. new_ptr = qemu_malloc(size);
  122. memcpy(new_ptr, ptr, copy);
  123. qemu_free(ptr);
  124. return new_ptr;
  125. }
  126. /* NOTE: all the constants are the HOST ones, but addresses are target. */
  127. int target_mprotect(abi_ulong start, abi_ulong len, int prot)
  128. {
  129. abi_ulong end, host_start, host_end, addr;
  130. int prot1, ret;
  131. #ifdef DEBUG_MMAP
  132. printf("mprotect: start=0x" TARGET_FMT_lx
  133. "len=0x" TARGET_FMT_lx " prot=%c%c%c\n", start, len,
  134. prot & PROT_READ ? 'r' : '-',
  135. prot & PROT_WRITE ? 'w' : '-',
  136. prot & PROT_EXEC ? 'x' : '-');
  137. #endif
  138. if ((start & ~TARGET_PAGE_MASK) != 0)
  139. return -EINVAL;
  140. len = TARGET_PAGE_ALIGN(len);
  141. end = start + len;
  142. if (end < start)
  143. return -EINVAL;
  144. prot &= PROT_READ | PROT_WRITE | PROT_EXEC;
  145. if (len == 0)
  146. return 0;
  147. mmap_lock();
  148. host_start = start & qemu_host_page_mask;
  149. host_end = HOST_PAGE_ALIGN(end);
  150. if (start > host_start) {
  151. /* handle host page containing start */
  152. prot1 = prot;
  153. for(addr = host_start; addr < start; addr += TARGET_PAGE_SIZE) {
  154. prot1 |= page_get_flags(addr);
  155. }
  156. if (host_end == host_start + qemu_host_page_size) {
  157. for(addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
  158. prot1 |= page_get_flags(addr);
  159. }
  160. end = host_end;
  161. }
  162. ret = mprotect(g2h(host_start), qemu_host_page_size, prot1 & PAGE_BITS);
  163. if (ret != 0)
  164. goto error;
  165. host_start += qemu_host_page_size;
  166. }
  167. if (end < host_end) {
  168. prot1 = prot;
  169. for(addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
  170. prot1 |= page_get_flags(addr);
  171. }
  172. ret = mprotect(g2h(host_end - qemu_host_page_size), qemu_host_page_size,
  173. prot1 & PAGE_BITS);
  174. if (ret != 0)
  175. goto error;
  176. host_end -= qemu_host_page_size;
  177. }
  178. /* handle the pages in the middle */
  179. if (host_start < host_end) {
  180. ret = mprotect(g2h(host_start), host_end - host_start, prot);
  181. if (ret != 0)
  182. goto error;
  183. }
  184. page_set_flags(start, start + len, prot | PAGE_VALID);
  185. mmap_unlock();
  186. return 0;
  187. error:
  188. mmap_unlock();
  189. return ret;
  190. }
  191. /* map an incomplete host page */
  192. static int mmap_frag(abi_ulong real_start,
  193. abi_ulong start, abi_ulong end,
  194. int prot, int flags, int fd, abi_ulong offset)
  195. {
  196. abi_ulong real_end, addr;
  197. void *host_start;
  198. int prot1, prot_new;
  199. real_end = real_start + qemu_host_page_size;
  200. host_start = g2h(real_start);
  201. /* get the protection of the target pages outside the mapping */
  202. prot1 = 0;
  203. for(addr = real_start; addr < real_end; addr++) {
  204. if (addr < start || addr >= end)
  205. prot1 |= page_get_flags(addr);
  206. }
  207. if (prot1 == 0) {
  208. /* no page was there, so we allocate one */
  209. void *p = mmap(host_start, qemu_host_page_size, prot,
  210. flags | MAP_ANONYMOUS, -1, 0);
  211. if (p == MAP_FAILED)
  212. return -1;
  213. prot1 = prot;
  214. }
  215. prot1 &= PAGE_BITS;
  216. prot_new = prot | prot1;
  217. if (!(flags & MAP_ANONYMOUS)) {
  218. /* msync() won't work here, so we return an error if write is
  219. possible while it is a shared mapping */
  220. if ((flags & MAP_TYPE) == MAP_SHARED &&
  221. (prot & PROT_WRITE))
  222. return -EINVAL;
  223. /* adjust protection to be able to read */
  224. if (!(prot1 & PROT_WRITE))
  225. mprotect(host_start, qemu_host_page_size, prot1 | PROT_WRITE);
  226. /* read the corresponding file data */
  227. pread(fd, g2h(start), end - start, offset);
  228. /* put final protection */
  229. if (prot_new != (prot1 | PROT_WRITE))
  230. mprotect(host_start, qemu_host_page_size, prot_new);
  231. } else {
  232. /* just update the protection */
  233. if (prot_new != prot1) {
  234. mprotect(host_start, qemu_host_page_size, prot_new);
  235. }
  236. }
  237. return 0;
  238. }
  239. #if defined(__CYGWIN__)
  240. /* Cygwin doesn't have a whole lot of address space. */
  241. static abi_ulong mmap_next_start = 0x18000000;
  242. #else
  243. static abi_ulong mmap_next_start = 0x40000000;
  244. #endif
  245. unsigned long last_brk;
  246. /* find a free memory area of size 'size'. The search starts at
  247. 'start'. If 'start' == 0, then a default start address is used.
  248. Return -1 if error.
  249. */
  250. /* page_init() marks pages used by the host as reserved to be sure not
  251. to use them. */
  252. static abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size)
  253. {
  254. abi_ulong addr, addr1, addr_start;
  255. int prot;
  256. unsigned long new_brk;
  257. new_brk = (unsigned long)sbrk(0);
  258. if (last_brk && last_brk < new_brk && last_brk == (target_ulong)last_brk) {
  259. /* This is a hack to catch the host allocating memory with brk().
  260. If it uses mmap then we loose.
  261. FIXME: We really want to avoid the host allocating memory in
  262. the first place, and maybe leave some slack to avoid switching
  263. to mmap. */
  264. page_set_flags(last_brk & TARGET_PAGE_MASK,
  265. TARGET_PAGE_ALIGN(new_brk),
  266. PAGE_RESERVED);
  267. }
  268. last_brk = new_brk;
  269. size = HOST_PAGE_ALIGN(size);
  270. start = start & qemu_host_page_mask;
  271. addr = start;
  272. if (addr == 0)
  273. addr = mmap_next_start;
  274. addr_start = addr;
  275. for(;;) {
  276. prot = 0;
  277. for(addr1 = addr; addr1 < (addr + size); addr1 += TARGET_PAGE_SIZE) {
  278. prot |= page_get_flags(addr1);
  279. }
  280. if (prot == 0)
  281. break;
  282. addr += qemu_host_page_size;
  283. /* we found nothing */
  284. if (addr == addr_start)
  285. return (abi_ulong)-1;
  286. }
  287. if (start == 0)
  288. mmap_next_start = addr + size;
  289. return addr;
  290. }
  291. /* NOTE: all the constants are the HOST ones */
  292. abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
  293. int flags, int fd, abi_ulong offset)
  294. {
  295. abi_ulong ret, end, real_start, real_end, retaddr, host_offset, host_len;
  296. unsigned long host_start;
  297. mmap_lock();
  298. #ifdef DEBUG_MMAP
  299. {
  300. printf("mmap: start=0x" TARGET_FMT_lx
  301. " len=0x" TARGET_FMT_lx " prot=%c%c%c flags=",
  302. start, len,
  303. prot & PROT_READ ? 'r' : '-',
  304. prot & PROT_WRITE ? 'w' : '-',
  305. prot & PROT_EXEC ? 'x' : '-');
  306. if (flags & MAP_FIXED)
  307. printf("MAP_FIXED ");
  308. if (flags & MAP_ANONYMOUS)
  309. printf("MAP_ANON ");
  310. switch(flags & MAP_TYPE) {
  311. case MAP_PRIVATE:
  312. printf("MAP_PRIVATE ");
  313. break;
  314. case MAP_SHARED:
  315. printf("MAP_SHARED ");
  316. break;
  317. default:
  318. printf("[MAP_TYPE=0x%x] ", flags & MAP_TYPE);
  319. break;
  320. }
  321. printf("fd=%d offset=" TARGET_FMT_lx "\n", fd, offset);
  322. }
  323. #endif
  324. if (offset & ~TARGET_PAGE_MASK) {
  325. errno = EINVAL;
  326. goto fail;
  327. }
  328. len = TARGET_PAGE_ALIGN(len);
  329. if (len == 0)
  330. goto the_end;
  331. real_start = start & qemu_host_page_mask;
  332. /* When mapping files into a memory area larger than the file, accesses
  333. to pages beyond the file size will cause a SIGBUS.
  334. For example, if mmaping a file of 100 bytes on a host with 4K pages
  335. emulating a target with 8K pages, the target expects to be able to
  336. access the first 8K. But the host will trap us on any access beyond
  337. 4K.
  338. When emulating a target with a larger page-size than the hosts, we
  339. may need to truncate file maps at EOF and add extra anonymous pages
  340. up to the targets page boundary. */
  341. if ((qemu_real_host_page_size < TARGET_PAGE_SIZE)
  342. && !(flags & MAP_ANONYMOUS)) {
  343. struct stat sb;
  344. if (fstat (fd, &sb) == -1)
  345. goto fail;
  346. /* Are we trying to create a map beyond EOF?. */
  347. if (offset + len > sb.st_size) {
  348. /* If so, truncate the file map at eof aligned with
  349. the hosts real pagesize. Additional anonymous maps
  350. will be created beyond EOF. */
  351. len = (sb.st_size - offset);
  352. len += qemu_real_host_page_size - 1;
  353. len &= ~(qemu_real_host_page_size - 1);
  354. }
  355. }
  356. if (!(flags & MAP_FIXED)) {
  357. abi_ulong mmap_start;
  358. void *p;
  359. host_offset = offset & qemu_host_page_mask;
  360. host_len = len + offset - host_offset;
  361. host_len = HOST_PAGE_ALIGN(host_len);
  362. mmap_start = mmap_find_vma(real_start, host_len);
  363. if (mmap_start == (abi_ulong)-1) {
  364. errno = ENOMEM;
  365. goto fail;
  366. }
  367. /* Note: we prefer to control the mapping address. It is
  368. especially important if qemu_host_page_size >
  369. qemu_real_host_page_size */
  370. p = mmap(g2h(mmap_start),
  371. host_len, prot, flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
  372. if (p == MAP_FAILED)
  373. goto fail;
  374. /* update start so that it points to the file position at 'offset' */
  375. host_start = (unsigned long)p;
  376. if (!(flags & MAP_ANONYMOUS)) {
  377. p = mmap(g2h(mmap_start), len, prot,
  378. flags | MAP_FIXED, fd, host_offset);
  379. host_start += offset - host_offset;
  380. }
  381. start = h2g(host_start);
  382. } else {
  383. int flg;
  384. target_ulong addr;
  385. if (start & ~TARGET_PAGE_MASK) {
  386. errno = EINVAL;
  387. goto fail;
  388. }
  389. end = start + len;
  390. real_end = HOST_PAGE_ALIGN(end);
  391. /*
  392. * Test if requested memory area fits target address space
  393. * It can fail only on 64-bit host with 32-bit target.
  394. * On any other target/host host mmap() handles this error correctly.
  395. */
  396. if ((unsigned long)start + len - 1 > (abi_ulong) -1) {
  397. errno = EINVAL;
  398. goto fail;
  399. }
  400. for(addr = real_start; addr < real_end; addr += TARGET_PAGE_SIZE) {
  401. flg = page_get_flags(addr);
  402. if (flg & PAGE_RESERVED) {
  403. errno = ENXIO;
  404. goto fail;
  405. }
  406. }
  407. /* worst case: we cannot map the file because the offset is not
  408. aligned, so we read it */
  409. if (!(flags & MAP_ANONYMOUS) &&
  410. (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) {
  411. /* msync() won't work here, so we return an error if write is
  412. possible while it is a shared mapping */
  413. if ((flags & MAP_TYPE) == MAP_SHARED &&
  414. (prot & PROT_WRITE)) {
  415. errno = EINVAL;
  416. goto fail;
  417. }
  418. retaddr = target_mmap(start, len, prot | PROT_WRITE,
  419. MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
  420. -1, 0);
  421. if (retaddr == -1)
  422. goto fail;
  423. pread(fd, g2h(start), len, offset);
  424. if (!(prot & PROT_WRITE)) {
  425. ret = target_mprotect(start, len, prot);
  426. if (ret != 0) {
  427. start = ret;
  428. goto the_end;
  429. }
  430. }
  431. goto the_end;
  432. }
  433. /* handle the start of the mapping */
  434. if (start > real_start) {
  435. if (real_end == real_start + qemu_host_page_size) {
  436. /* one single host page */
  437. ret = mmap_frag(real_start, start, end,
  438. prot, flags, fd, offset);
  439. if (ret == -1)
  440. goto fail;
  441. goto the_end1;
  442. }
  443. ret = mmap_frag(real_start, start, real_start + qemu_host_page_size,
  444. prot, flags, fd, offset);
  445. if (ret == -1)
  446. goto fail;
  447. real_start += qemu_host_page_size;
  448. }
  449. /* handle the end of the mapping */
  450. if (end < real_end) {
  451. ret = mmap_frag(real_end - qemu_host_page_size,
  452. real_end - qemu_host_page_size, real_end,
  453. prot, flags, fd,
  454. offset + real_end - qemu_host_page_size - start);
  455. if (ret == -1)
  456. goto fail;
  457. real_end -= qemu_host_page_size;
  458. }
  459. /* map the middle (easier) */
  460. if (real_start < real_end) {
  461. void *p;
  462. unsigned long offset1;
  463. if (flags & MAP_ANONYMOUS)
  464. offset1 = 0;
  465. else
  466. offset1 = offset + real_start - start;
  467. p = mmap(g2h(real_start), real_end - real_start,
  468. prot, flags, fd, offset1);
  469. if (p == MAP_FAILED)
  470. goto fail;
  471. }
  472. }
  473. the_end1:
  474. page_set_flags(start, start + len, prot | PAGE_VALID);
  475. the_end:
  476. #ifdef DEBUG_MMAP
  477. printf("ret=0x" TARGET_FMT_lx "\n", start);
  478. page_dump(stdout);
  479. printf("\n");
  480. #endif
  481. mmap_unlock();
  482. return start;
  483. fail:
  484. mmap_unlock();
  485. return -1;
  486. }
  487. int target_munmap(abi_ulong start, abi_ulong len)
  488. {
  489. abi_ulong end, real_start, real_end, addr;
  490. int prot, ret;
  491. #ifdef DEBUG_MMAP
  492. printf("munmap: start=0x%lx len=0x%lx\n", start, len);
  493. #endif
  494. if (start & ~TARGET_PAGE_MASK)
  495. return -EINVAL;
  496. len = TARGET_PAGE_ALIGN(len);
  497. if (len == 0)
  498. return -EINVAL;
  499. mmap_lock();
  500. end = start + len;
  501. real_start = start & qemu_host_page_mask;
  502. real_end = HOST_PAGE_ALIGN(end);
  503. if (start > real_start) {
  504. /* handle host page containing start */
  505. prot = 0;
  506. for(addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
  507. prot |= page_get_flags(addr);
  508. }
  509. if (real_end == real_start + qemu_host_page_size) {
  510. for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
  511. prot |= page_get_flags(addr);
  512. }
  513. end = real_end;
  514. }
  515. if (prot != 0)
  516. real_start += qemu_host_page_size;
  517. }
  518. if (end < real_end) {
  519. prot = 0;
  520. for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
  521. prot |= page_get_flags(addr);
  522. }
  523. if (prot != 0)
  524. real_end -= qemu_host_page_size;
  525. }
  526. ret = 0;
  527. /* unmap what we can */
  528. if (real_start < real_end) {
  529. ret = munmap(g2h(real_start), real_end - real_start);
  530. }
  531. if (ret == 0)
  532. page_set_flags(start, start + len, 0);
  533. mmap_unlock();
  534. return ret;
  535. }
  536. abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
  537. abi_ulong new_size, unsigned long flags,
  538. abi_ulong new_addr)
  539. {
  540. int prot;
  541. void *host_addr;
  542. mmap_lock();
  543. if (flags & MREMAP_FIXED)
  544. host_addr = (void *) syscall(__NR_mremap, g2h(old_addr),
  545. old_size, new_size,
  546. flags,
  547. new_addr);
  548. else if (flags & MREMAP_MAYMOVE) {
  549. abi_ulong mmap_start;
  550. mmap_start = mmap_find_vma(0, new_size);
  551. if (mmap_start == -1) {
  552. errno = ENOMEM;
  553. host_addr = MAP_FAILED;
  554. } else
  555. host_addr = (void *) syscall(__NR_mremap, g2h(old_addr),
  556. old_size, new_size,
  557. flags | MREMAP_FIXED,
  558. g2h(mmap_start));
  559. } else {
  560. host_addr = mremap(g2h(old_addr), old_size, new_size, flags);
  561. /* Check if address fits target address space */
  562. if ((unsigned long)host_addr + new_size > (abi_ulong)-1) {
  563. /* Revert mremap() changes */
  564. host_addr = mremap(g2h(old_addr), new_size, old_size, flags);
  565. errno = ENOMEM;
  566. host_addr = MAP_FAILED;
  567. }
  568. }
  569. if (host_addr == MAP_FAILED) {
  570. new_addr = -1;
  571. } else {
  572. new_addr = h2g(host_addr);
  573. prot = page_get_flags(old_addr);
  574. page_set_flags(old_addr, old_addr + old_size, 0);
  575. page_set_flags(new_addr, new_addr + new_size, prot | PAGE_VALID);
  576. }
  577. mmap_unlock();
  578. return new_addr;
  579. }
  580. int target_msync(abi_ulong start, abi_ulong len, int flags)
  581. {
  582. abi_ulong end;
  583. if (start & ~TARGET_PAGE_MASK)
  584. return -EINVAL;
  585. len = TARGET_PAGE_ALIGN(len);
  586. end = start + len;
  587. if (end < start)
  588. return -EINVAL;
  589. if (end == start)
  590. return 0;
  591. start &= qemu_host_page_mask;
  592. return msync(g2h(start), end - start, flags);
  593. }