mmap.c 45 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488
  1. /*
  2. * mmap support for qemu
  3. *
  4. * Copyright (c) 2003 Fabrice Bellard
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation; either version 2 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with this program; if not, see <http://www.gnu.org/licenses/>.
  18. */
  19. #include "qemu/osdep.h"
  20. #include <sys/shm.h>
  21. #include "trace.h"
  22. #include "exec/log.h"
  23. #include "exec/page-protection.h"
  24. #include "exec/tb-flush.h"
  25. #include "exec/translation-block.h"
  26. #include "qemu.h"
  27. #include "user/page-protection.h"
  28. #include "user-internals.h"
  29. #include "user-mmap.h"
  30. #include "target_mman.h"
  31. #include "qemu/interval-tree.h"
  32. #ifdef TARGET_ARM
  33. #include "target/arm/cpu-features.h"
  34. #endif
  35. static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
  36. static __thread int mmap_lock_count;
  37. void mmap_lock(void)
  38. {
  39. if (mmap_lock_count++ == 0) {
  40. pthread_mutex_lock(&mmap_mutex);
  41. }
  42. }
  43. void mmap_unlock(void)
  44. {
  45. assert(mmap_lock_count > 0);
  46. if (--mmap_lock_count == 0) {
  47. pthread_mutex_unlock(&mmap_mutex);
  48. }
  49. }
  50. bool have_mmap_lock(void)
  51. {
  52. return mmap_lock_count > 0 ? true : false;
  53. }
  54. /* Grab lock to make sure things are in a consistent state after fork(). */
  55. void mmap_fork_start(void)
  56. {
  57. if (mmap_lock_count)
  58. abort();
  59. pthread_mutex_lock(&mmap_mutex);
  60. }
  61. void mmap_fork_end(int child)
  62. {
  63. if (child) {
  64. pthread_mutex_init(&mmap_mutex, NULL);
  65. } else {
  66. pthread_mutex_unlock(&mmap_mutex);
  67. }
  68. }
  69. /* Protected by mmap_lock. */
  70. static IntervalTreeRoot shm_regions;
  71. static void shm_region_add(abi_ptr start, abi_ptr last)
  72. {
  73. IntervalTreeNode *i = g_new0(IntervalTreeNode, 1);
  74. i->start = start;
  75. i->last = last;
  76. interval_tree_insert(i, &shm_regions);
  77. }
  78. static abi_ptr shm_region_find(abi_ptr start)
  79. {
  80. IntervalTreeNode *i;
  81. for (i = interval_tree_iter_first(&shm_regions, start, start); i;
  82. i = interval_tree_iter_next(i, start, start)) {
  83. if (i->start == start) {
  84. return i->last;
  85. }
  86. }
  87. return 0;
  88. }
  89. static void shm_region_rm_complete(abi_ptr start, abi_ptr last)
  90. {
  91. IntervalTreeNode *i, *n;
  92. for (i = interval_tree_iter_first(&shm_regions, start, last); i; i = n) {
  93. n = interval_tree_iter_next(i, start, last);
  94. if (i->start >= start && i->last <= last) {
  95. interval_tree_remove(i, &shm_regions);
  96. g_free(i);
  97. }
  98. }
  99. }
  100. /*
  101. * Validate target prot bitmask.
  102. * Return the prot bitmask for the host in *HOST_PROT.
  103. * Return 0 if the target prot bitmask is invalid, otherwise
  104. * the internal qemu page_flags (which will include PAGE_VALID).
  105. */
  106. static int validate_prot_to_pageflags(int prot)
  107. {
  108. int valid = PROT_READ | PROT_WRITE | PROT_EXEC | TARGET_PROT_SEM;
  109. int page_flags = (prot & PAGE_RWX) | PAGE_VALID;
  110. #ifdef TARGET_AARCH64
  111. {
  112. ARMCPU *cpu = ARM_CPU(thread_cpu);
  113. /*
  114. * The PROT_BTI bit is only accepted if the cpu supports the feature.
  115. * Since this is the unusual case, don't bother checking unless
  116. * the bit has been requested. If set and valid, record the bit
  117. * within QEMU's page_flags.
  118. */
  119. if ((prot & TARGET_PROT_BTI) && cpu_isar_feature(aa64_bti, cpu)) {
  120. valid |= TARGET_PROT_BTI;
  121. page_flags |= PAGE_BTI;
  122. }
  123. /* Similarly for the PROT_MTE bit. */
  124. if ((prot & TARGET_PROT_MTE) && cpu_isar_feature(aa64_mte, cpu)) {
  125. valid |= TARGET_PROT_MTE;
  126. page_flags |= PAGE_MTE;
  127. }
  128. }
  129. #elif defined(TARGET_HPPA)
  130. valid |= PROT_GROWSDOWN | PROT_GROWSUP;
  131. #endif
  132. return prot & ~valid ? 0 : page_flags;
  133. }
  134. /*
  135. * For the host, we need not pass anything except read/write/exec.
  136. * While PROT_SEM is allowed by all hosts, it is also ignored, so
  137. * don't bother transforming guest bit to host bit. Any other
  138. * target-specific prot bits will not be understood by the host
  139. * and will need to be encoded into page_flags for qemu emulation.
  140. *
  141. * Pages that are executable by the guest will never be executed
  142. * by the host, but the host will need to be able to read them.
  143. */
  144. static int target_to_host_prot(int prot)
  145. {
  146. return (prot & (PROT_READ | PROT_WRITE)) |
  147. (prot & PROT_EXEC ? PROT_READ : 0);
  148. }
  149. /* NOTE: all the constants are the HOST ones, but addresses are target. */
  150. int target_mprotect(abi_ulong start, abi_ulong len, int target_prot)
  151. {
  152. int host_page_size = qemu_real_host_page_size();
  153. abi_ulong starts[3];
  154. abi_ulong lens[3];
  155. int prots[3];
  156. abi_ulong host_start, host_last, last;
  157. int prot1, ret, page_flags, nranges;
  158. trace_target_mprotect(start, len, target_prot);
  159. if ((start & ~TARGET_PAGE_MASK) != 0) {
  160. return -TARGET_EINVAL;
  161. }
  162. page_flags = validate_prot_to_pageflags(target_prot);
  163. if (!page_flags) {
  164. return -TARGET_EINVAL;
  165. }
  166. if (len == 0) {
  167. return 0;
  168. }
  169. len = TARGET_PAGE_ALIGN(len);
  170. if (!guest_range_valid_untagged(start, len)) {
  171. return -TARGET_ENOMEM;
  172. }
  173. last = start + len - 1;
  174. host_start = start & -host_page_size;
  175. host_last = ROUND_UP(last, host_page_size) - 1;
  176. nranges = 0;
  177. mmap_lock();
  178. if (host_last - host_start < host_page_size) {
  179. /* Single host page contains all guest pages: sum the prot. */
  180. prot1 = target_prot;
  181. for (abi_ulong a = host_start; a < start; a += TARGET_PAGE_SIZE) {
  182. prot1 |= page_get_flags(a);
  183. }
  184. for (abi_ulong a = last; a < host_last; a += TARGET_PAGE_SIZE) {
  185. prot1 |= page_get_flags(a + 1);
  186. }
  187. starts[nranges] = host_start;
  188. lens[nranges] = host_page_size;
  189. prots[nranges] = prot1;
  190. nranges++;
  191. } else {
  192. if (host_start < start) {
  193. /* Host page contains more than one guest page: sum the prot. */
  194. prot1 = target_prot;
  195. for (abi_ulong a = host_start; a < start; a += TARGET_PAGE_SIZE) {
  196. prot1 |= page_get_flags(a);
  197. }
  198. /* If the resulting sum differs, create a new range. */
  199. if (prot1 != target_prot) {
  200. starts[nranges] = host_start;
  201. lens[nranges] = host_page_size;
  202. prots[nranges] = prot1;
  203. nranges++;
  204. host_start += host_page_size;
  205. }
  206. }
  207. if (last < host_last) {
  208. /* Host page contains more than one guest page: sum the prot. */
  209. prot1 = target_prot;
  210. for (abi_ulong a = last; a < host_last; a += TARGET_PAGE_SIZE) {
  211. prot1 |= page_get_flags(a + 1);
  212. }
  213. /* If the resulting sum differs, create a new range. */
  214. if (prot1 != target_prot) {
  215. host_last -= host_page_size;
  216. starts[nranges] = host_last + 1;
  217. lens[nranges] = host_page_size;
  218. prots[nranges] = prot1;
  219. nranges++;
  220. }
  221. }
  222. /* Create a range for the middle, if any remains. */
  223. if (host_start < host_last) {
  224. starts[nranges] = host_start;
  225. lens[nranges] = host_last - host_start + 1;
  226. prots[nranges] = target_prot;
  227. nranges++;
  228. }
  229. }
  230. for (int i = 0; i < nranges; ++i) {
  231. ret = mprotect(g2h_untagged(starts[i]), lens[i],
  232. target_to_host_prot(prots[i]));
  233. if (ret != 0) {
  234. goto error;
  235. }
  236. }
  237. page_set_flags(start, last, page_flags);
  238. ret = 0;
  239. error:
  240. mmap_unlock();
  241. return ret;
  242. }
  243. /*
  244. * Perform munmap on behalf of the target, with host parameters.
  245. * If reserved_va, we must replace the memory reservation.
  246. */
  247. static int do_munmap(void *addr, size_t len)
  248. {
  249. if (reserved_va) {
  250. void *ptr = mmap(addr, len, PROT_NONE,
  251. MAP_FIXED | MAP_ANONYMOUS
  252. | MAP_PRIVATE | MAP_NORESERVE, -1, 0);
  253. return ptr == addr ? 0 : -1;
  254. }
  255. return munmap(addr, len);
  256. }
  257. /*
  258. * Perform a pread on behalf of target_mmap. We can reach EOF, we can be
  259. * interrupted by signals, and in general there's no good error return path.
  260. * If @zero, zero the rest of the block at EOF.
  261. * Return true on success.
  262. */
  263. static bool mmap_pread(int fd, void *p, size_t len, off_t offset, bool zero)
  264. {
  265. while (1) {
  266. ssize_t r = pread(fd, p, len, offset);
  267. if (likely(r == len)) {
  268. /* Complete */
  269. return true;
  270. }
  271. if (r == 0) {
  272. /* EOF */
  273. if (zero) {
  274. memset(p, 0, len);
  275. }
  276. return true;
  277. }
  278. if (r > 0) {
  279. /* Short read */
  280. p += r;
  281. len -= r;
  282. offset += r;
  283. } else if (errno != EINTR) {
  284. /* Error */
  285. return false;
  286. }
  287. }
  288. }
  289. /*
  290. * Map an incomplete host page.
  291. *
  292. * Here be dragons. This case will not work if there is an existing
  293. * overlapping host page, which is file mapped, and for which the mapping
  294. * is beyond the end of the file. In that case, we will see SIGBUS when
  295. * trying to write a portion of this page.
  296. *
  297. * FIXME: Work around this with a temporary signal handler and longjmp.
  298. */
  299. static bool mmap_frag(abi_ulong real_start, abi_ulong start, abi_ulong last,
  300. int prot, int flags, int fd, off_t offset)
  301. {
  302. int host_page_size = qemu_real_host_page_size();
  303. abi_ulong real_last;
  304. void *host_start;
  305. int prot_old, prot_new;
  306. int host_prot_old, host_prot_new;
  307. if (!(flags & MAP_ANONYMOUS)
  308. && (flags & MAP_TYPE) == MAP_SHARED
  309. && (prot & PROT_WRITE)) {
  310. /*
  311. * msync() won't work with the partial page, so we return an
  312. * error if write is possible while it is a shared mapping.
  313. */
  314. errno = EINVAL;
  315. return false;
  316. }
  317. real_last = real_start + host_page_size - 1;
  318. host_start = g2h_untagged(real_start);
  319. /* Get the protection of the target pages outside the mapping. */
  320. prot_old = 0;
  321. for (abi_ulong a = real_start; a < start; a += TARGET_PAGE_SIZE) {
  322. prot_old |= page_get_flags(a);
  323. }
  324. for (abi_ulong a = real_last; a > last; a -= TARGET_PAGE_SIZE) {
  325. prot_old |= page_get_flags(a);
  326. }
  327. if (prot_old == 0) {
  328. /*
  329. * Since !(prot_old & PAGE_VALID), there were no guest pages
  330. * outside of the fragment we need to map. Allocate a new host
  331. * page to cover, discarding whatever else may have been present.
  332. */
  333. void *p = mmap(host_start, host_page_size,
  334. target_to_host_prot(prot),
  335. flags | MAP_ANONYMOUS, -1, 0);
  336. if (p != host_start) {
  337. if (p != MAP_FAILED) {
  338. do_munmap(p, host_page_size);
  339. errno = EEXIST;
  340. }
  341. return false;
  342. }
  343. prot_old = prot;
  344. }
  345. prot_new = prot | prot_old;
  346. host_prot_old = target_to_host_prot(prot_old);
  347. host_prot_new = target_to_host_prot(prot_new);
  348. /* Adjust protection to be able to write. */
  349. if (!(host_prot_old & PROT_WRITE)) {
  350. host_prot_old |= PROT_WRITE;
  351. mprotect(host_start, host_page_size, host_prot_old);
  352. }
  353. /* Read or zero the new guest pages. */
  354. if (flags & MAP_ANONYMOUS) {
  355. memset(g2h_untagged(start), 0, last - start + 1);
  356. } else if (!mmap_pread(fd, g2h_untagged(start), last - start + 1,
  357. offset, true)) {
  358. return false;
  359. }
  360. /* Put final protection */
  361. if (host_prot_new != host_prot_old) {
  362. mprotect(host_start, host_page_size, host_prot_new);
  363. }
  364. return true;
  365. }
  366. abi_ulong task_unmapped_base;
  367. abi_ulong elf_et_dyn_base;
  368. abi_ulong mmap_next_start;
  369. /*
  370. * Subroutine of mmap_find_vma, used when we have pre-allocated
  371. * a chunk of guest address space.
  372. */
  373. static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size,
  374. abi_ulong align)
  375. {
  376. target_ulong ret;
  377. ret = page_find_range_empty(start, reserved_va, size, align);
  378. if (ret == -1 && start > mmap_min_addr) {
  379. /* Restart at the beginning of the address space. */
  380. ret = page_find_range_empty(mmap_min_addr, start - 1, size, align);
  381. }
  382. return ret;
  383. }
  384. /*
  385. * Find and reserve a free memory area of size 'size'. The search
  386. * starts at 'start'.
  387. * It must be called with mmap_lock() held.
  388. * Return -1 if error.
  389. */
  390. abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align)
  391. {
  392. int host_page_size = qemu_real_host_page_size();
  393. void *ptr, *prev;
  394. abi_ulong addr;
  395. int wrapped, repeat;
  396. align = MAX(align, host_page_size);
  397. /* If 'start' == 0, then a default start address is used. */
  398. if (start == 0) {
  399. start = mmap_next_start;
  400. } else {
  401. start &= -host_page_size;
  402. }
  403. start = ROUND_UP(start, align);
  404. size = ROUND_UP(size, host_page_size);
  405. if (reserved_va) {
  406. return mmap_find_vma_reserved(start, size, align);
  407. }
  408. addr = start;
  409. wrapped = repeat = 0;
  410. prev = 0;
  411. for (;; prev = ptr) {
  412. /*
  413. * Reserve needed memory area to avoid a race.
  414. * It should be discarded using:
  415. * - mmap() with MAP_FIXED flag
  416. * - mremap() with MREMAP_FIXED flag
  417. * - shmat() with SHM_REMAP flag
  418. */
  419. ptr = mmap(g2h_untagged(addr), size, PROT_NONE,
  420. MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, -1, 0);
  421. /* ENOMEM, if host address space has no memory */
  422. if (ptr == MAP_FAILED) {
  423. return (abi_ulong)-1;
  424. }
  425. /*
  426. * Count the number of sequential returns of the same address.
  427. * This is used to modify the search algorithm below.
  428. */
  429. repeat = (ptr == prev ? repeat + 1 : 0);
  430. if (h2g_valid(ptr + size - 1)) {
  431. addr = h2g(ptr);
  432. if ((addr & (align - 1)) == 0) {
  433. /* Success. */
  434. if (start == mmap_next_start && addr >= task_unmapped_base) {
  435. mmap_next_start = addr + size;
  436. }
  437. return addr;
  438. }
  439. /* The address is not properly aligned for the target. */
  440. switch (repeat) {
  441. case 0:
  442. /*
  443. * Assume the result that the kernel gave us is the
  444. * first with enough free space, so start again at the
  445. * next higher target page.
  446. */
  447. addr = ROUND_UP(addr, align);
  448. break;
  449. case 1:
  450. /*
  451. * Sometimes the kernel decides to perform the allocation
  452. * at the top end of memory instead.
  453. */
  454. addr &= -align;
  455. break;
  456. case 2:
  457. /* Start over at low memory. */
  458. addr = 0;
  459. break;
  460. default:
  461. /* Fail. This unaligned block must the last. */
  462. addr = -1;
  463. break;
  464. }
  465. } else {
  466. /*
  467. * Since the result the kernel gave didn't fit, start
  468. * again at low memory. If any repetition, fail.
  469. */
  470. addr = (repeat ? -1 : 0);
  471. }
  472. /* Unmap and try again. */
  473. munmap(ptr, size);
  474. /* ENOMEM if we checked the whole of the target address space. */
  475. if (addr == (abi_ulong)-1) {
  476. return (abi_ulong)-1;
  477. } else if (addr == 0) {
  478. if (wrapped) {
  479. return (abi_ulong)-1;
  480. }
  481. wrapped = 1;
  482. /*
  483. * Don't actually use 0 when wrapping, instead indicate
  484. * that we'd truly like an allocation in low memory.
  485. */
  486. addr = (mmap_min_addr > TARGET_PAGE_SIZE
  487. ? TARGET_PAGE_ALIGN(mmap_min_addr)
  488. : TARGET_PAGE_SIZE);
  489. } else if (wrapped && addr >= start) {
  490. return (abi_ulong)-1;
  491. }
  492. }
  493. }
  494. /*
  495. * Record a successful mmap within the user-exec interval tree.
  496. */
  497. static abi_long mmap_end(abi_ulong start, abi_ulong last,
  498. abi_ulong passthrough_start,
  499. abi_ulong passthrough_last,
  500. int flags, int page_flags)
  501. {
  502. if (flags & MAP_ANONYMOUS) {
  503. page_flags |= PAGE_ANON;
  504. }
  505. page_flags |= PAGE_RESET;
  506. if (passthrough_start > passthrough_last) {
  507. page_set_flags(start, last, page_flags);
  508. } else {
  509. if (start < passthrough_start) {
  510. page_set_flags(start, passthrough_start - 1, page_flags);
  511. }
  512. page_set_flags(passthrough_start, passthrough_last,
  513. page_flags | PAGE_PASSTHROUGH);
  514. if (passthrough_last < last) {
  515. page_set_flags(passthrough_last + 1, last, page_flags);
  516. }
  517. }
  518. shm_region_rm_complete(start, last);
  519. trace_target_mmap_complete(start);
  520. if (qemu_loglevel_mask(CPU_LOG_PAGE)) {
  521. FILE *f = qemu_log_trylock();
  522. if (f) {
  523. fprintf(f, "page layout changed following mmap\n");
  524. page_dump(f);
  525. qemu_log_unlock(f);
  526. }
  527. }
  528. return start;
  529. }
  530. /*
  531. * Special case host page size == target page size,
  532. * where there are no edge conditions.
  533. */
  534. static abi_long mmap_h_eq_g(abi_ulong start, abi_ulong len,
  535. int host_prot, int flags, int page_flags,
  536. int fd, off_t offset)
  537. {
  538. void *p, *want_p = NULL;
  539. abi_ulong last;
  540. if (start || (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) {
  541. want_p = g2h_untagged(start);
  542. }
  543. p = mmap(want_p, len, host_prot, flags, fd, offset);
  544. if (p == MAP_FAILED) {
  545. return -1;
  546. }
  547. /* If the host kernel does not support MAP_FIXED_NOREPLACE, emulate. */
  548. if ((flags & MAP_FIXED_NOREPLACE) && p != want_p) {
  549. do_munmap(p, len);
  550. errno = EEXIST;
  551. return -1;
  552. }
  553. start = h2g(p);
  554. last = start + len - 1;
  555. return mmap_end(start, last, start, last, flags, page_flags);
  556. }
  557. /*
  558. * Special case host page size < target page size.
  559. *
  560. * The two special cases are increased guest alignment, and mapping
  561. * past the end of a file.
  562. *
  563. * When mapping files into a memory area larger than the file,
  564. * accesses to pages beyond the file size will cause a SIGBUS.
  565. *
  566. * For example, if mmaping a file of 100 bytes on a host with 4K
  567. * pages emulating a target with 8K pages, the target expects to
  568. * be able to access the first 8K. But the host will trap us on
  569. * any access beyond 4K.
  570. *
  571. * When emulating a target with a larger page-size than the hosts,
  572. * we may need to truncate file maps at EOF and add extra anonymous
  573. * pages up to the targets page boundary.
  574. *
  575. * This workaround only works for files that do not change.
  576. * If the file is later extended (e.g. ftruncate), the SIGBUS
  577. * vanishes and the proper behaviour is that changes within the
  578. * anon page should be reflected in the file.
  579. *
  580. * However, this case is rather common with executable images,
  581. * so the workaround is important for even trivial tests, whereas
  582. * the mmap of of a file being extended is less common.
  583. */
  584. static abi_long mmap_h_lt_g(abi_ulong start, abi_ulong len, int host_prot,
  585. int mmap_flags, int page_flags, int fd,
  586. off_t offset, int host_page_size)
  587. {
  588. void *p, *want_p = NULL;
  589. off_t fileend_adj = 0;
  590. int flags = mmap_flags;
  591. abi_ulong last, pass_last;
  592. if (start || (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) {
  593. want_p = g2h_untagged(start);
  594. }
  595. if (!(flags & MAP_ANONYMOUS)) {
  596. struct stat sb;
  597. if (fstat(fd, &sb) == -1) {
  598. return -1;
  599. }
  600. if (offset >= sb.st_size) {
  601. /*
  602. * The entire map is beyond the end of the file.
  603. * Transform it to an anonymous mapping.
  604. */
  605. flags |= MAP_ANONYMOUS;
  606. fd = -1;
  607. offset = 0;
  608. } else if (offset + len > sb.st_size) {
  609. /*
  610. * A portion of the map is beyond the end of the file.
  611. * Truncate the file portion of the allocation.
  612. */
  613. fileend_adj = offset + len - sb.st_size;
  614. }
  615. }
  616. if (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE)) {
  617. if (fileend_adj) {
  618. p = mmap(want_p, len, host_prot, flags | MAP_ANONYMOUS, -1, 0);
  619. } else {
  620. p = mmap(want_p, len, host_prot, flags, fd, offset);
  621. }
  622. if (p != want_p) {
  623. if (p != MAP_FAILED) {
  624. /* Host does not support MAP_FIXED_NOREPLACE: emulate. */
  625. do_munmap(p, len);
  626. errno = EEXIST;
  627. }
  628. return -1;
  629. }
  630. if (fileend_adj) {
  631. void *t = mmap(p, len - fileend_adj, host_prot,
  632. (flags & ~MAP_FIXED_NOREPLACE) | MAP_FIXED,
  633. fd, offset);
  634. if (t == MAP_FAILED) {
  635. int save_errno = errno;
  636. /*
  637. * We failed a map over the top of the successful anonymous
  638. * mapping above. The only failure mode is running out of VMAs,
  639. * and there's nothing that we can do to detect that earlier.
  640. * If we have replaced an existing mapping with MAP_FIXED,
  641. * then we cannot properly recover. It's a coin toss whether
  642. * it would be better to exit or continue here.
  643. */
  644. if (!(flags & MAP_FIXED_NOREPLACE) &&
  645. !page_check_range_empty(start, start + len - 1)) {
  646. qemu_log("QEMU target_mmap late failure: %s",
  647. strerror(save_errno));
  648. }
  649. do_munmap(want_p, len);
  650. errno = save_errno;
  651. return -1;
  652. }
  653. }
  654. } else {
  655. size_t host_len, part_len;
  656. /*
  657. * Take care to align the host memory. Perform a larger anonymous
  658. * allocation and extract the aligned portion. Remap the file on
  659. * top of that.
  660. */
  661. host_len = len + TARGET_PAGE_SIZE - host_page_size;
  662. p = mmap(want_p, host_len, host_prot, flags | MAP_ANONYMOUS, -1, 0);
  663. if (p == MAP_FAILED) {
  664. return -1;
  665. }
  666. part_len = (uintptr_t)p & (TARGET_PAGE_SIZE - 1);
  667. if (part_len) {
  668. part_len = TARGET_PAGE_SIZE - part_len;
  669. do_munmap(p, part_len);
  670. p += part_len;
  671. host_len -= part_len;
  672. }
  673. if (len < host_len) {
  674. do_munmap(p + len, host_len - len);
  675. }
  676. if (!(flags & MAP_ANONYMOUS)) {
  677. void *t = mmap(p, len - fileend_adj, host_prot,
  678. flags | MAP_FIXED, fd, offset);
  679. if (t == MAP_FAILED) {
  680. int save_errno = errno;
  681. do_munmap(p, len);
  682. errno = save_errno;
  683. return -1;
  684. }
  685. }
  686. start = h2g(p);
  687. }
  688. last = start + len - 1;
  689. if (fileend_adj) {
  690. pass_last = ROUND_UP(last - fileend_adj, host_page_size) - 1;
  691. } else {
  692. pass_last = last;
  693. }
  694. return mmap_end(start, last, start, pass_last, mmap_flags, page_flags);
  695. }
  696. /*
  697. * Special case host page size > target page size.
  698. *
  699. * The two special cases are address and file offsets that are valid
  700. * for the guest that cannot be directly represented by the host.
  701. */
  702. static abi_long mmap_h_gt_g(abi_ulong start, abi_ulong len,
  703. int target_prot, int host_prot,
  704. int flags, int page_flags, int fd,
  705. off_t offset, int host_page_size)
  706. {
  707. void *p, *want_p = NULL;
  708. off_t host_offset = offset & -host_page_size;
  709. abi_ulong last, real_start, real_last;
  710. bool misaligned_offset = false;
  711. size_t host_len;
  712. if (start || (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) {
  713. want_p = g2h_untagged(start);
  714. }
  715. if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) {
  716. /*
  717. * Adjust the offset to something representable on the host.
  718. */
  719. host_len = len + offset - host_offset;
  720. p = mmap(want_p, host_len, host_prot, flags, fd, host_offset);
  721. if (p == MAP_FAILED) {
  722. return -1;
  723. }
  724. /* Update start to the file position at offset. */
  725. p += offset - host_offset;
  726. start = h2g(p);
  727. last = start + len - 1;
  728. return mmap_end(start, last, start, last, flags, page_flags);
  729. }
  730. if (!(flags & MAP_ANONYMOUS)) {
  731. misaligned_offset = (start ^ offset) & (host_page_size - 1);
  732. /*
  733. * The fallback for misalignment is a private mapping + read.
  734. * This carries none of semantics required of MAP_SHARED.
  735. */
  736. if (misaligned_offset && (flags & MAP_TYPE) != MAP_PRIVATE) {
  737. errno = EINVAL;
  738. return -1;
  739. }
  740. }
  741. last = start + len - 1;
  742. real_start = start & -host_page_size;
  743. real_last = ROUND_UP(last, host_page_size) - 1;
  744. /*
  745. * Handle the start and end of the mapping.
  746. */
  747. if (real_start < start) {
  748. abi_ulong real_page_last = real_start + host_page_size - 1;
  749. if (last <= real_page_last) {
  750. /* Entire allocation a subset of one host page. */
  751. if (!mmap_frag(real_start, start, last, target_prot,
  752. flags, fd, offset)) {
  753. return -1;
  754. }
  755. return mmap_end(start, last, -1, 0, flags, page_flags);
  756. }
  757. if (!mmap_frag(real_start, start, real_page_last, target_prot,
  758. flags, fd, offset)) {
  759. return -1;
  760. }
  761. real_start = real_page_last + 1;
  762. }
  763. if (last < real_last) {
  764. abi_ulong real_page_start = real_last - host_page_size + 1;
  765. if (!mmap_frag(real_page_start, real_page_start, last,
  766. target_prot, flags, fd,
  767. offset + real_page_start - start)) {
  768. return -1;
  769. }
  770. real_last = real_page_start - 1;
  771. }
  772. if (real_start > real_last) {
  773. return mmap_end(start, last, -1, 0, flags, page_flags);
  774. }
  775. /*
  776. * Handle the middle of the mapping.
  777. */
  778. host_len = real_last - real_start + 1;
  779. want_p += real_start - start;
  780. if (flags & MAP_ANONYMOUS) {
  781. p = mmap(want_p, host_len, host_prot, flags, -1, 0);
  782. } else if (!misaligned_offset) {
  783. p = mmap(want_p, host_len, host_prot, flags, fd,
  784. offset + real_start - start);
  785. } else {
  786. p = mmap(want_p, host_len, host_prot | PROT_WRITE,
  787. flags | MAP_ANONYMOUS, -1, 0);
  788. }
  789. if (p != want_p) {
  790. if (p != MAP_FAILED) {
  791. do_munmap(p, host_len);
  792. errno = EEXIST;
  793. }
  794. return -1;
  795. }
  796. if (misaligned_offset) {
  797. if (!mmap_pread(fd, p, host_len, offset + real_start - start, false)) {
  798. do_munmap(p, host_len);
  799. return -1;
  800. }
  801. if (!(host_prot & PROT_WRITE)) {
  802. mprotect(p, host_len, host_prot);
  803. }
  804. }
  805. return mmap_end(start, last, -1, 0, flags, page_flags);
  806. }
  807. static abi_long target_mmap__locked(abi_ulong start, abi_ulong len,
  808. int target_prot, int flags, int page_flags,
  809. int fd, off_t offset)
  810. {
  811. int host_page_size = qemu_real_host_page_size();
  812. int host_prot;
  813. /*
  814. * For reserved_va, we are in full control of the allocation.
  815. * Find a suitable hole and convert to MAP_FIXED.
  816. */
  817. if (reserved_va) {
  818. if (flags & MAP_FIXED_NOREPLACE) {
  819. /* Validate that the chosen range is empty. */
  820. if (!page_check_range_empty(start, start + len - 1)) {
  821. errno = EEXIST;
  822. return -1;
  823. }
  824. flags = (flags & ~MAP_FIXED_NOREPLACE) | MAP_FIXED;
  825. } else if (!(flags & MAP_FIXED)) {
  826. abi_ulong real_start = start & -host_page_size;
  827. off_t host_offset = offset & -host_page_size;
  828. size_t real_len = len + offset - host_offset;
  829. abi_ulong align = MAX(host_page_size, TARGET_PAGE_SIZE);
  830. start = mmap_find_vma(real_start, real_len, align);
  831. if (start == (abi_ulong)-1) {
  832. errno = ENOMEM;
  833. return -1;
  834. }
  835. start += offset - host_offset;
  836. flags |= MAP_FIXED;
  837. }
  838. }
  839. host_prot = target_to_host_prot(target_prot);
  840. if (host_page_size == TARGET_PAGE_SIZE) {
  841. return mmap_h_eq_g(start, len, host_prot, flags,
  842. page_flags, fd, offset);
  843. } else if (host_page_size < TARGET_PAGE_SIZE) {
  844. return mmap_h_lt_g(start, len, host_prot, flags,
  845. page_flags, fd, offset, host_page_size);
  846. } else {
  847. return mmap_h_gt_g(start, len, target_prot, host_prot, flags,
  848. page_flags, fd, offset, host_page_size);
  849. }
  850. }
  851. /* NOTE: all the constants are the HOST ones */
  852. abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot,
  853. int flags, int fd, off_t offset)
  854. {
  855. abi_long ret;
  856. int page_flags;
  857. trace_target_mmap(start, len, target_prot, flags, fd, offset);
  858. if (!len) {
  859. errno = EINVAL;
  860. return -1;
  861. }
  862. page_flags = validate_prot_to_pageflags(target_prot);
  863. if (!page_flags) {
  864. errno = EINVAL;
  865. return -1;
  866. }
  867. /* Also check for overflows... */
  868. len = TARGET_PAGE_ALIGN(len);
  869. if (!len || len != (size_t)len) {
  870. errno = ENOMEM;
  871. return -1;
  872. }
  873. if (offset & ~TARGET_PAGE_MASK) {
  874. errno = EINVAL;
  875. return -1;
  876. }
  877. if (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE)) {
  878. if (start & ~TARGET_PAGE_MASK) {
  879. errno = EINVAL;
  880. return -1;
  881. }
  882. if (!guest_range_valid_untagged(start, len)) {
  883. errno = ENOMEM;
  884. return -1;
  885. }
  886. }
  887. mmap_lock();
  888. ret = target_mmap__locked(start, len, target_prot, flags,
  889. page_flags, fd, offset);
  890. mmap_unlock();
  891. /*
  892. * If we're mapping shared memory, ensure we generate code for parallel
  893. * execution and flush old translations. This will work up to the level
  894. * supported by the host -- anything that requires EXCP_ATOMIC will not
  895. * be atomic with respect to an external process.
  896. */
  897. if (ret != -1 && (flags & MAP_TYPE) != MAP_PRIVATE) {
  898. CPUState *cpu = thread_cpu;
  899. if (!tcg_cflags_has(cpu, CF_PARALLEL)) {
  900. tcg_cflags_set(cpu, CF_PARALLEL);
  901. tb_flush(cpu);
  902. }
  903. }
  904. return ret;
  905. }
  906. static int mmap_reserve_or_unmap(abi_ulong start, abi_ulong len)
  907. {
  908. int host_page_size = qemu_real_host_page_size();
  909. abi_ulong real_start;
  910. abi_ulong real_last;
  911. abi_ulong real_len;
  912. abi_ulong last;
  913. abi_ulong a;
  914. void *host_start;
  915. int prot;
  916. last = start + len - 1;
  917. real_start = start & -host_page_size;
  918. real_last = ROUND_UP(last, host_page_size) - 1;
  919. /*
  920. * If guest pages remain on the first or last host pages,
  921. * adjust the deallocation to retain those guest pages.
  922. * The single page special case is required for the last page,
  923. * lest real_start overflow to zero.
  924. */
  925. if (real_last - real_start < host_page_size) {
  926. prot = 0;
  927. for (a = real_start; a < start; a += TARGET_PAGE_SIZE) {
  928. prot |= page_get_flags(a);
  929. }
  930. for (a = last; a < real_last; a += TARGET_PAGE_SIZE) {
  931. prot |= page_get_flags(a + 1);
  932. }
  933. if (prot != 0) {
  934. return 0;
  935. }
  936. } else {
  937. for (prot = 0, a = real_start; a < start; a += TARGET_PAGE_SIZE) {
  938. prot |= page_get_flags(a);
  939. }
  940. if (prot != 0) {
  941. real_start += host_page_size;
  942. }
  943. for (prot = 0, a = last; a < real_last; a += TARGET_PAGE_SIZE) {
  944. prot |= page_get_flags(a + 1);
  945. }
  946. if (prot != 0) {
  947. real_last -= host_page_size;
  948. }
  949. if (real_last < real_start) {
  950. return 0;
  951. }
  952. }
  953. real_len = real_last - real_start + 1;
  954. host_start = g2h_untagged(real_start);
  955. return do_munmap(host_start, real_len);
  956. }
  957. int target_munmap(abi_ulong start, abi_ulong len)
  958. {
  959. int ret;
  960. trace_target_munmap(start, len);
  961. if (start & ~TARGET_PAGE_MASK) {
  962. errno = EINVAL;
  963. return -1;
  964. }
  965. len = TARGET_PAGE_ALIGN(len);
  966. if (len == 0 || !guest_range_valid_untagged(start, len)) {
  967. errno = EINVAL;
  968. return -1;
  969. }
  970. mmap_lock();
  971. ret = mmap_reserve_or_unmap(start, len);
  972. if (likely(ret == 0)) {
  973. page_set_flags(start, start + len - 1, 0);
  974. shm_region_rm_complete(start, start + len - 1);
  975. }
  976. mmap_unlock();
  977. return ret;
  978. }
  979. abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
  980. abi_ulong new_size, unsigned long flags,
  981. abi_ulong new_addr)
  982. {
  983. int prot;
  984. void *host_addr;
  985. if (!guest_range_valid_untagged(old_addr, old_size) ||
  986. ((flags & MREMAP_FIXED) &&
  987. !guest_range_valid_untagged(new_addr, new_size)) ||
  988. ((flags & MREMAP_MAYMOVE) == 0 &&
  989. !guest_range_valid_untagged(old_addr, new_size))) {
  990. errno = ENOMEM;
  991. return -1;
  992. }
  993. mmap_lock();
  994. if (flags & MREMAP_FIXED) {
  995. host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
  996. flags, g2h_untagged(new_addr));
  997. if (reserved_va && host_addr != MAP_FAILED) {
  998. /*
  999. * If new and old addresses overlap then the above mremap will
  1000. * already have failed with EINVAL.
  1001. */
  1002. mmap_reserve_or_unmap(old_addr, old_size);
  1003. }
  1004. } else if (flags & MREMAP_MAYMOVE) {
  1005. abi_ulong mmap_start;
  1006. mmap_start = mmap_find_vma(0, new_size, TARGET_PAGE_SIZE);
  1007. if (mmap_start == -1) {
  1008. errno = ENOMEM;
  1009. host_addr = MAP_FAILED;
  1010. } else {
  1011. host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
  1012. flags | MREMAP_FIXED,
  1013. g2h_untagged(mmap_start));
  1014. if (reserved_va) {
  1015. mmap_reserve_or_unmap(old_addr, old_size);
  1016. }
  1017. }
  1018. } else {
  1019. int page_flags = 0;
  1020. if (reserved_va && old_size < new_size) {
  1021. abi_ulong addr;
  1022. for (addr = old_addr + old_size;
  1023. addr < old_addr + new_size;
  1024. addr++) {
  1025. page_flags |= page_get_flags(addr);
  1026. }
  1027. }
  1028. if (page_flags == 0) {
  1029. host_addr = mremap(g2h_untagged(old_addr),
  1030. old_size, new_size, flags);
  1031. if (host_addr != MAP_FAILED) {
  1032. /* Check if address fits target address space */
  1033. if (!guest_range_valid_untagged(h2g(host_addr), new_size)) {
  1034. /* Revert mremap() changes */
  1035. host_addr = mremap(g2h_untagged(old_addr),
  1036. new_size, old_size, flags);
  1037. errno = ENOMEM;
  1038. host_addr = MAP_FAILED;
  1039. } else if (reserved_va && old_size > new_size) {
  1040. mmap_reserve_or_unmap(old_addr + old_size,
  1041. old_size - new_size);
  1042. }
  1043. }
  1044. } else {
  1045. errno = ENOMEM;
  1046. host_addr = MAP_FAILED;
  1047. }
  1048. }
  1049. if (host_addr == MAP_FAILED) {
  1050. new_addr = -1;
  1051. } else {
  1052. new_addr = h2g(host_addr);
  1053. prot = page_get_flags(old_addr);
  1054. page_set_flags(old_addr, old_addr + old_size - 1, 0);
  1055. shm_region_rm_complete(old_addr, old_addr + old_size - 1);
  1056. page_set_flags(new_addr, new_addr + new_size - 1,
  1057. prot | PAGE_VALID | PAGE_RESET);
  1058. shm_region_rm_complete(new_addr, new_addr + new_size - 1);
  1059. }
  1060. mmap_unlock();
  1061. return new_addr;
  1062. }
  1063. abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice)
  1064. {
  1065. abi_ulong len;
  1066. int ret = 0;
  1067. if (start & ~TARGET_PAGE_MASK) {
  1068. return -TARGET_EINVAL;
  1069. }
  1070. if (len_in == 0) {
  1071. return 0;
  1072. }
  1073. len = TARGET_PAGE_ALIGN(len_in);
  1074. if (len == 0 || !guest_range_valid_untagged(start, len)) {
  1075. return -TARGET_EINVAL;
  1076. }
  1077. /* Translate for some architectures which have different MADV_xxx values */
  1078. switch (advice) {
  1079. case TARGET_MADV_DONTNEED: /* alpha */
  1080. advice = MADV_DONTNEED;
  1081. break;
  1082. case TARGET_MADV_WIPEONFORK: /* parisc */
  1083. advice = MADV_WIPEONFORK;
  1084. break;
  1085. case TARGET_MADV_KEEPONFORK: /* parisc */
  1086. advice = MADV_KEEPONFORK;
  1087. break;
  1088. /* we do not care about the other MADV_xxx values yet */
  1089. }
  1090. /*
  1091. * Most advice values are hints, so ignoring and returning success is ok.
  1092. *
  1093. * However, some advice values such as MADV_DONTNEED, MADV_WIPEONFORK and
  1094. * MADV_KEEPONFORK are not hints and need to be emulated.
  1095. *
  1096. * A straight passthrough for those may not be safe because qemu sometimes
  1097. * turns private file-backed mappings into anonymous mappings.
  1098. * If all guest pages have PAGE_PASSTHROUGH set, mappings have the
  1099. * same semantics for the host as for the guest.
  1100. *
  1101. * We pass through MADV_WIPEONFORK and MADV_KEEPONFORK if possible and
  1102. * return failure if not.
  1103. *
  1104. * MADV_DONTNEED is passed through as well, if possible.
  1105. * If passthrough isn't possible, we nevertheless (wrongly!) return
  1106. * success, which is broken but some userspace programs fail to work
  1107. * otherwise. Completely implementing such emulation is quite complicated
  1108. * though.
  1109. */
  1110. mmap_lock();
  1111. switch (advice) {
  1112. case MADV_WIPEONFORK:
  1113. case MADV_KEEPONFORK:
  1114. ret = -EINVAL;
  1115. /* fall through */
  1116. case MADV_DONTNEED:
  1117. if (page_check_range(start, len, PAGE_PASSTHROUGH)) {
  1118. ret = get_errno(madvise(g2h_untagged(start), len, advice));
  1119. if ((advice == MADV_DONTNEED) && (ret == 0)) {
  1120. page_reset_target_data(start, start + len - 1);
  1121. }
  1122. }
  1123. }
  1124. mmap_unlock();
  1125. return ret;
  1126. }
  1127. #ifndef TARGET_FORCE_SHMLBA
  1128. /*
  1129. * For most architectures, SHMLBA is the same as the page size;
  1130. * some architectures have larger values, in which case they should
  1131. * define TARGET_FORCE_SHMLBA and provide a target_shmlba() function.
  1132. * This corresponds to the kernel arch code defining __ARCH_FORCE_SHMLBA
  1133. * and defining its own value for SHMLBA.
  1134. *
  1135. * The kernel also permits SHMLBA to be set by the architecture to a
  1136. * value larger than the page size without setting __ARCH_FORCE_SHMLBA;
  1137. * this means that addresses are rounded to the large size if
  1138. * SHM_RND is set but addresses not aligned to that size are not rejected
  1139. * as long as they are at least page-aligned. Since the only architecture
  1140. * which uses this is ia64 this code doesn't provide for that oddity.
  1141. */
  1142. static inline abi_ulong target_shmlba(CPUArchState *cpu_env)
  1143. {
  1144. return TARGET_PAGE_SIZE;
  1145. }
  1146. #endif
  1147. #if defined(__arm__) || defined(__mips__) || defined(__sparc__)
  1148. #define HOST_FORCE_SHMLBA 1
  1149. #else
  1150. #define HOST_FORCE_SHMLBA 0
  1151. #endif
  1152. abi_ulong target_shmat(CPUArchState *cpu_env, int shmid,
  1153. abi_ulong shmaddr, int shmflg)
  1154. {
  1155. CPUState *cpu = env_cpu(cpu_env);
  1156. struct shmid_ds shm_info;
  1157. int ret;
  1158. int h_pagesize;
  1159. int t_shmlba, h_shmlba, m_shmlba;
  1160. size_t t_len, h_len, m_len;
  1161. /* shmat pointers are always untagged */
  1162. /*
  1163. * Because we can't use host shmat() unless the address is sufficiently
  1164. * aligned for the host, we'll need to check both.
  1165. * TODO: Could be fixed with softmmu.
  1166. */
  1167. t_shmlba = target_shmlba(cpu_env);
  1168. h_pagesize = qemu_real_host_page_size();
  1169. h_shmlba = (HOST_FORCE_SHMLBA ? SHMLBA : h_pagesize);
  1170. m_shmlba = MAX(t_shmlba, h_shmlba);
  1171. if (shmaddr) {
  1172. if (shmaddr & (m_shmlba - 1)) {
  1173. if (shmflg & SHM_RND) {
  1174. /*
  1175. * The guest is allowing the kernel to round the address.
  1176. * Assume that the guest is ok with us rounding to the
  1177. * host required alignment too. Anyway if we don't, we'll
  1178. * get an error from the kernel.
  1179. */
  1180. shmaddr &= ~(m_shmlba - 1);
  1181. if (shmaddr == 0 && (shmflg & SHM_REMAP)) {
  1182. return -TARGET_EINVAL;
  1183. }
  1184. } else {
  1185. int require = TARGET_PAGE_SIZE;
  1186. #ifdef TARGET_FORCE_SHMLBA
  1187. require = t_shmlba;
  1188. #endif
  1189. /*
  1190. * Include host required alignment, as otherwise we cannot
  1191. * use host shmat at all.
  1192. */
  1193. require = MAX(require, h_shmlba);
  1194. if (shmaddr & (require - 1)) {
  1195. return -TARGET_EINVAL;
  1196. }
  1197. }
  1198. }
  1199. } else {
  1200. if (shmflg & SHM_REMAP) {
  1201. return -TARGET_EINVAL;
  1202. }
  1203. }
  1204. /* All rounding now manually concluded. */
  1205. shmflg &= ~SHM_RND;
  1206. /* Find out the length of the shared memory segment. */
  1207. ret = get_errno(shmctl(shmid, IPC_STAT, &shm_info));
  1208. if (is_error(ret)) {
  1209. /* can't get length, bail out */
  1210. return ret;
  1211. }
  1212. t_len = TARGET_PAGE_ALIGN(shm_info.shm_segsz);
  1213. h_len = ROUND_UP(shm_info.shm_segsz, h_pagesize);
  1214. m_len = MAX(t_len, h_len);
  1215. if (!guest_range_valid_untagged(shmaddr, m_len)) {
  1216. return -TARGET_EINVAL;
  1217. }
  1218. WITH_MMAP_LOCK_GUARD() {
  1219. bool mapped = false;
  1220. void *want, *test;
  1221. abi_ulong last;
  1222. if (!shmaddr) {
  1223. shmaddr = mmap_find_vma(0, m_len, m_shmlba);
  1224. if (shmaddr == -1) {
  1225. return -TARGET_ENOMEM;
  1226. }
  1227. mapped = !reserved_va;
  1228. } else if (shmflg & SHM_REMAP) {
  1229. /*
  1230. * If host page size > target page size, the host shmat may map
  1231. * more memory than the guest expects. Reject a mapping that
  1232. * would replace memory in the unexpected gap.
  1233. * TODO: Could be fixed with softmmu.
  1234. */
  1235. if (t_len < h_len &&
  1236. !page_check_range_empty(shmaddr + t_len,
  1237. shmaddr + h_len - 1)) {
  1238. return -TARGET_EINVAL;
  1239. }
  1240. } else {
  1241. if (!page_check_range_empty(shmaddr, shmaddr + m_len - 1)) {
  1242. return -TARGET_EINVAL;
  1243. }
  1244. }
  1245. /* All placement is now complete. */
  1246. want = (void *)g2h_untagged(shmaddr);
  1247. /*
  1248. * Map anonymous pages across the entire range, then remap with
  1249. * the shared memory. This is required for a number of corner
  1250. * cases for which host and guest page sizes differ.
  1251. */
  1252. if (h_len != t_len) {
  1253. int mmap_p = PROT_READ | (shmflg & SHM_RDONLY ? 0 : PROT_WRITE);
  1254. int mmap_f = MAP_PRIVATE | MAP_ANONYMOUS
  1255. | (reserved_va || mapped || (shmflg & SHM_REMAP)
  1256. ? MAP_FIXED : MAP_FIXED_NOREPLACE);
  1257. test = mmap(want, m_len, mmap_p, mmap_f, -1, 0);
  1258. if (unlikely(test != want)) {
  1259. /* shmat returns EINVAL not EEXIST like mmap. */
  1260. ret = (test == MAP_FAILED && errno != EEXIST
  1261. ? get_errno(-1) : -TARGET_EINVAL);
  1262. if (mapped) {
  1263. do_munmap(want, m_len);
  1264. }
  1265. return ret;
  1266. }
  1267. mapped = true;
  1268. }
  1269. if (reserved_va || mapped) {
  1270. shmflg |= SHM_REMAP;
  1271. }
  1272. test = shmat(shmid, want, shmflg);
  1273. if (test == MAP_FAILED) {
  1274. ret = get_errno(-1);
  1275. if (mapped) {
  1276. do_munmap(want, m_len);
  1277. }
  1278. return ret;
  1279. }
  1280. assert(test == want);
  1281. last = shmaddr + m_len - 1;
  1282. page_set_flags(shmaddr, last,
  1283. PAGE_VALID | PAGE_RESET | PAGE_READ |
  1284. (shmflg & SHM_RDONLY ? 0 : PAGE_WRITE) |
  1285. (shmflg & SHM_EXEC ? PAGE_EXEC : 0));
  1286. shm_region_rm_complete(shmaddr, last);
  1287. shm_region_add(shmaddr, last);
  1288. }
  1289. /*
  1290. * We're mapping shared memory, so ensure we generate code for parallel
  1291. * execution and flush old translations. This will work up to the level
  1292. * supported by the host -- anything that requires EXCP_ATOMIC will not
  1293. * be atomic with respect to an external process.
  1294. */
  1295. if (!tcg_cflags_has(cpu, CF_PARALLEL)) {
  1296. tcg_cflags_set(cpu, CF_PARALLEL);
  1297. tb_flush(cpu);
  1298. }
  1299. if (qemu_loglevel_mask(CPU_LOG_PAGE)) {
  1300. FILE *f = qemu_log_trylock();
  1301. if (f) {
  1302. fprintf(f, "page layout changed following shmat\n");
  1303. page_dump(f);
  1304. qemu_log_unlock(f);
  1305. }
  1306. }
  1307. return shmaddr;
  1308. }
  1309. abi_long target_shmdt(abi_ulong shmaddr)
  1310. {
  1311. abi_long rv;
  1312. /* shmdt pointers are always untagged */
  1313. WITH_MMAP_LOCK_GUARD() {
  1314. abi_ulong last = shm_region_find(shmaddr);
  1315. if (last == 0) {
  1316. return -TARGET_EINVAL;
  1317. }
  1318. rv = get_errno(shmdt(g2h_untagged(shmaddr)));
  1319. if (rv == 0) {
  1320. abi_ulong size = last - shmaddr + 1;
  1321. page_set_flags(shmaddr, last, 0);
  1322. shm_region_rm_complete(shmaddr, last);
  1323. mmap_reserve_or_unmap(shmaddr, size);
  1324. }
  1325. }
  1326. return rv;
  1327. }