2
0

mmap.c 45 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487
  1. /*
  2. * mmap support for qemu
  3. *
  4. * Copyright (c) 2003 Fabrice Bellard
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation; either version 2 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with this program; if not, see <http://www.gnu.org/licenses/>.
  18. */
  19. #include "qemu/osdep.h"
  20. #include <sys/shm.h>
  21. #include "trace.h"
  22. #include "exec/log.h"
  23. #include "exec/page-protection.h"
  24. #include "exec/translation-block.h"
  25. #include "qemu.h"
  26. #include "user/page-protection.h"
  27. #include "user-internals.h"
  28. #include "user-mmap.h"
  29. #include "target_mman.h"
  30. #include "qemu/interval-tree.h"
  31. #ifdef TARGET_ARM
  32. #include "target/arm/cpu-features.h"
  33. #endif
  34. static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
  35. static __thread int mmap_lock_count;
  36. void mmap_lock(void)
  37. {
  38. if (mmap_lock_count++ == 0) {
  39. pthread_mutex_lock(&mmap_mutex);
  40. }
  41. }
  42. void mmap_unlock(void)
  43. {
  44. assert(mmap_lock_count > 0);
  45. if (--mmap_lock_count == 0) {
  46. pthread_mutex_unlock(&mmap_mutex);
  47. }
  48. }
  49. bool have_mmap_lock(void)
  50. {
  51. return mmap_lock_count > 0 ? true : false;
  52. }
  53. /* Grab lock to make sure things are in a consistent state after fork(). */
  54. void mmap_fork_start(void)
  55. {
  56. if (mmap_lock_count)
  57. abort();
  58. pthread_mutex_lock(&mmap_mutex);
  59. }
  60. void mmap_fork_end(int child)
  61. {
  62. if (child) {
  63. pthread_mutex_init(&mmap_mutex, NULL);
  64. } else {
  65. pthread_mutex_unlock(&mmap_mutex);
  66. }
  67. }
  68. /* Protected by mmap_lock. */
  69. static IntervalTreeRoot shm_regions;
  70. static void shm_region_add(abi_ptr start, abi_ptr last)
  71. {
  72. IntervalTreeNode *i = g_new0(IntervalTreeNode, 1);
  73. i->start = start;
  74. i->last = last;
  75. interval_tree_insert(i, &shm_regions);
  76. }
  77. static abi_ptr shm_region_find(abi_ptr start)
  78. {
  79. IntervalTreeNode *i;
  80. for (i = interval_tree_iter_first(&shm_regions, start, start); i;
  81. i = interval_tree_iter_next(i, start, start)) {
  82. if (i->start == start) {
  83. return i->last;
  84. }
  85. }
  86. return 0;
  87. }
  88. static void shm_region_rm_complete(abi_ptr start, abi_ptr last)
  89. {
  90. IntervalTreeNode *i, *n;
  91. for (i = interval_tree_iter_first(&shm_regions, start, last); i; i = n) {
  92. n = interval_tree_iter_next(i, start, last);
  93. if (i->start >= start && i->last <= last) {
  94. interval_tree_remove(i, &shm_regions);
  95. g_free(i);
  96. }
  97. }
  98. }
  99. /*
  100. * Validate target prot bitmask.
  101. * Return the prot bitmask for the host in *HOST_PROT.
  102. * Return 0 if the target prot bitmask is invalid, otherwise
  103. * the internal qemu page_flags (which will include PAGE_VALID).
  104. */
  105. static int validate_prot_to_pageflags(int prot)
  106. {
  107. int valid = PROT_READ | PROT_WRITE | PROT_EXEC | TARGET_PROT_SEM;
  108. int page_flags = (prot & PAGE_RWX) | PAGE_VALID;
  109. #ifdef TARGET_AARCH64
  110. {
  111. ARMCPU *cpu = ARM_CPU(thread_cpu);
  112. /*
  113. * The PROT_BTI bit is only accepted if the cpu supports the feature.
  114. * Since this is the unusual case, don't bother checking unless
  115. * the bit has been requested. If set and valid, record the bit
  116. * within QEMU's page_flags.
  117. */
  118. if ((prot & TARGET_PROT_BTI) && cpu_isar_feature(aa64_bti, cpu)) {
  119. valid |= TARGET_PROT_BTI;
  120. page_flags |= PAGE_BTI;
  121. }
  122. /* Similarly for the PROT_MTE bit. */
  123. if ((prot & TARGET_PROT_MTE) && cpu_isar_feature(aa64_mte, cpu)) {
  124. valid |= TARGET_PROT_MTE;
  125. page_flags |= PAGE_MTE;
  126. }
  127. }
  128. #elif defined(TARGET_HPPA)
  129. valid |= PROT_GROWSDOWN | PROT_GROWSUP;
  130. #endif
  131. return prot & ~valid ? 0 : page_flags;
  132. }
  133. /*
  134. * For the host, we need not pass anything except read/write/exec.
  135. * While PROT_SEM is allowed by all hosts, it is also ignored, so
  136. * don't bother transforming guest bit to host bit. Any other
  137. * target-specific prot bits will not be understood by the host
  138. * and will need to be encoded into page_flags for qemu emulation.
  139. *
  140. * Pages that are executable by the guest will never be executed
  141. * by the host, but the host will need to be able to read them.
  142. */
  143. static int target_to_host_prot(int prot)
  144. {
  145. return (prot & (PROT_READ | PROT_WRITE)) |
  146. (prot & PROT_EXEC ? PROT_READ : 0);
  147. }
  148. /* NOTE: all the constants are the HOST ones, but addresses are target. */
  149. int target_mprotect(abi_ulong start, abi_ulong len, int target_prot)
  150. {
  151. int host_page_size = qemu_real_host_page_size();
  152. abi_ulong starts[3];
  153. abi_ulong lens[3];
  154. int prots[3];
  155. abi_ulong host_start, host_last, last;
  156. int prot1, ret, page_flags, nranges;
  157. trace_target_mprotect(start, len, target_prot);
  158. if ((start & ~TARGET_PAGE_MASK) != 0) {
  159. return -TARGET_EINVAL;
  160. }
  161. page_flags = validate_prot_to_pageflags(target_prot);
  162. if (!page_flags) {
  163. return -TARGET_EINVAL;
  164. }
  165. if (len == 0) {
  166. return 0;
  167. }
  168. len = TARGET_PAGE_ALIGN(len);
  169. if (!guest_range_valid_untagged(start, len)) {
  170. return -TARGET_ENOMEM;
  171. }
  172. last = start + len - 1;
  173. host_start = start & -host_page_size;
  174. host_last = ROUND_UP(last, host_page_size) - 1;
  175. nranges = 0;
  176. mmap_lock();
  177. if (host_last - host_start < host_page_size) {
  178. /* Single host page contains all guest pages: sum the prot. */
  179. prot1 = target_prot;
  180. for (abi_ulong a = host_start; a < start; a += TARGET_PAGE_SIZE) {
  181. prot1 |= page_get_flags(a);
  182. }
  183. for (abi_ulong a = last; a < host_last; a += TARGET_PAGE_SIZE) {
  184. prot1 |= page_get_flags(a + 1);
  185. }
  186. starts[nranges] = host_start;
  187. lens[nranges] = host_page_size;
  188. prots[nranges] = prot1;
  189. nranges++;
  190. } else {
  191. if (host_start < start) {
  192. /* Host page contains more than one guest page: sum the prot. */
  193. prot1 = target_prot;
  194. for (abi_ulong a = host_start; a < start; a += TARGET_PAGE_SIZE) {
  195. prot1 |= page_get_flags(a);
  196. }
  197. /* If the resulting sum differs, create a new range. */
  198. if (prot1 != target_prot) {
  199. starts[nranges] = host_start;
  200. lens[nranges] = host_page_size;
  201. prots[nranges] = prot1;
  202. nranges++;
  203. host_start += host_page_size;
  204. }
  205. }
  206. if (last < host_last) {
  207. /* Host page contains more than one guest page: sum the prot. */
  208. prot1 = target_prot;
  209. for (abi_ulong a = last; a < host_last; a += TARGET_PAGE_SIZE) {
  210. prot1 |= page_get_flags(a + 1);
  211. }
  212. /* If the resulting sum differs, create a new range. */
  213. if (prot1 != target_prot) {
  214. host_last -= host_page_size;
  215. starts[nranges] = host_last + 1;
  216. lens[nranges] = host_page_size;
  217. prots[nranges] = prot1;
  218. nranges++;
  219. }
  220. }
  221. /* Create a range for the middle, if any remains. */
  222. if (host_start < host_last) {
  223. starts[nranges] = host_start;
  224. lens[nranges] = host_last - host_start + 1;
  225. prots[nranges] = target_prot;
  226. nranges++;
  227. }
  228. }
  229. for (int i = 0; i < nranges; ++i) {
  230. ret = mprotect(g2h_untagged(starts[i]), lens[i],
  231. target_to_host_prot(prots[i]));
  232. if (ret != 0) {
  233. goto error;
  234. }
  235. }
  236. page_set_flags(start, last, page_flags);
  237. ret = 0;
  238. error:
  239. mmap_unlock();
  240. return ret;
  241. }
  242. /*
  243. * Perform munmap on behalf of the target, with host parameters.
  244. * If reserved_va, we must replace the memory reservation.
  245. */
  246. static int do_munmap(void *addr, size_t len)
  247. {
  248. if (reserved_va) {
  249. void *ptr = mmap(addr, len, PROT_NONE,
  250. MAP_FIXED | MAP_ANONYMOUS
  251. | MAP_PRIVATE | MAP_NORESERVE, -1, 0);
  252. return ptr == addr ? 0 : -1;
  253. }
  254. return munmap(addr, len);
  255. }
  256. /*
  257. * Perform a pread on behalf of target_mmap. We can reach EOF, we can be
  258. * interrupted by signals, and in general there's no good error return path.
  259. * If @zero, zero the rest of the block at EOF.
  260. * Return true on success.
  261. */
  262. static bool mmap_pread(int fd, void *p, size_t len, off_t offset, bool zero)
  263. {
  264. while (1) {
  265. ssize_t r = pread(fd, p, len, offset);
  266. if (likely(r == len)) {
  267. /* Complete */
  268. return true;
  269. }
  270. if (r == 0) {
  271. /* EOF */
  272. if (zero) {
  273. memset(p, 0, len);
  274. }
  275. return true;
  276. }
  277. if (r > 0) {
  278. /* Short read */
  279. p += r;
  280. len -= r;
  281. offset += r;
  282. } else if (errno != EINTR) {
  283. /* Error */
  284. return false;
  285. }
  286. }
  287. }
  288. /*
  289. * Map an incomplete host page.
  290. *
  291. * Here be dragons. This case will not work if there is an existing
  292. * overlapping host page, which is file mapped, and for which the mapping
  293. * is beyond the end of the file. In that case, we will see SIGBUS when
  294. * trying to write a portion of this page.
  295. *
  296. * FIXME: Work around this with a temporary signal handler and longjmp.
  297. */
  298. static bool mmap_frag(abi_ulong real_start, abi_ulong start, abi_ulong last,
  299. int prot, int flags, int fd, off_t offset)
  300. {
  301. int host_page_size = qemu_real_host_page_size();
  302. abi_ulong real_last;
  303. void *host_start;
  304. int prot_old, prot_new;
  305. int host_prot_old, host_prot_new;
  306. if (!(flags & MAP_ANONYMOUS)
  307. && (flags & MAP_TYPE) == MAP_SHARED
  308. && (prot & PROT_WRITE)) {
  309. /*
  310. * msync() won't work with the partial page, so we return an
  311. * error if write is possible while it is a shared mapping.
  312. */
  313. errno = EINVAL;
  314. return false;
  315. }
  316. real_last = real_start + host_page_size - 1;
  317. host_start = g2h_untagged(real_start);
  318. /* Get the protection of the target pages outside the mapping. */
  319. prot_old = 0;
  320. for (abi_ulong a = real_start; a < start; a += TARGET_PAGE_SIZE) {
  321. prot_old |= page_get_flags(a);
  322. }
  323. for (abi_ulong a = real_last; a > last; a -= TARGET_PAGE_SIZE) {
  324. prot_old |= page_get_flags(a);
  325. }
  326. if (prot_old == 0) {
  327. /*
  328. * Since !(prot_old & PAGE_VALID), there were no guest pages
  329. * outside of the fragment we need to map. Allocate a new host
  330. * page to cover, discarding whatever else may have been present.
  331. */
  332. void *p = mmap(host_start, host_page_size,
  333. target_to_host_prot(prot),
  334. flags | MAP_ANONYMOUS, -1, 0);
  335. if (p != host_start) {
  336. if (p != MAP_FAILED) {
  337. do_munmap(p, host_page_size);
  338. errno = EEXIST;
  339. }
  340. return false;
  341. }
  342. prot_old = prot;
  343. }
  344. prot_new = prot | prot_old;
  345. host_prot_old = target_to_host_prot(prot_old);
  346. host_prot_new = target_to_host_prot(prot_new);
  347. /* Adjust protection to be able to write. */
  348. if (!(host_prot_old & PROT_WRITE)) {
  349. host_prot_old |= PROT_WRITE;
  350. mprotect(host_start, host_page_size, host_prot_old);
  351. }
  352. /* Read or zero the new guest pages. */
  353. if (flags & MAP_ANONYMOUS) {
  354. memset(g2h_untagged(start), 0, last - start + 1);
  355. } else if (!mmap_pread(fd, g2h_untagged(start), last - start + 1,
  356. offset, true)) {
  357. return false;
  358. }
  359. /* Put final protection */
  360. if (host_prot_new != host_prot_old) {
  361. mprotect(host_start, host_page_size, host_prot_new);
  362. }
  363. return true;
  364. }
  365. abi_ulong task_unmapped_base;
  366. abi_ulong elf_et_dyn_base;
  367. abi_ulong mmap_next_start;
  368. /*
  369. * Subroutine of mmap_find_vma, used when we have pre-allocated
  370. * a chunk of guest address space.
  371. */
  372. static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size,
  373. abi_ulong align)
  374. {
  375. target_ulong ret;
  376. ret = page_find_range_empty(start, reserved_va, size, align);
  377. if (ret == -1 && start > mmap_min_addr) {
  378. /* Restart at the beginning of the address space. */
  379. ret = page_find_range_empty(mmap_min_addr, start - 1, size, align);
  380. }
  381. return ret;
  382. }
  383. /*
  384. * Find and reserve a free memory area of size 'size'. The search
  385. * starts at 'start'.
  386. * It must be called with mmap_lock() held.
  387. * Return -1 if error.
  388. */
  389. abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align)
  390. {
  391. int host_page_size = qemu_real_host_page_size();
  392. void *ptr, *prev;
  393. abi_ulong addr;
  394. int wrapped, repeat;
  395. align = MAX(align, host_page_size);
  396. /* If 'start' == 0, then a default start address is used. */
  397. if (start == 0) {
  398. start = mmap_next_start;
  399. } else {
  400. start &= -host_page_size;
  401. }
  402. start = ROUND_UP(start, align);
  403. size = ROUND_UP(size, host_page_size);
  404. if (reserved_va) {
  405. return mmap_find_vma_reserved(start, size, align);
  406. }
  407. addr = start;
  408. wrapped = repeat = 0;
  409. prev = 0;
  410. for (;; prev = ptr) {
  411. /*
  412. * Reserve needed memory area to avoid a race.
  413. * It should be discarded using:
  414. * - mmap() with MAP_FIXED flag
  415. * - mremap() with MREMAP_FIXED flag
  416. * - shmat() with SHM_REMAP flag
  417. */
  418. ptr = mmap(g2h_untagged(addr), size, PROT_NONE,
  419. MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, -1, 0);
  420. /* ENOMEM, if host address space has no memory */
  421. if (ptr == MAP_FAILED) {
  422. return (abi_ulong)-1;
  423. }
  424. /*
  425. * Count the number of sequential returns of the same address.
  426. * This is used to modify the search algorithm below.
  427. */
  428. repeat = (ptr == prev ? repeat + 1 : 0);
  429. if (h2g_valid(ptr + size - 1)) {
  430. addr = h2g(ptr);
  431. if ((addr & (align - 1)) == 0) {
  432. /* Success. */
  433. if (start == mmap_next_start && addr >= task_unmapped_base) {
  434. mmap_next_start = addr + size;
  435. }
  436. return addr;
  437. }
  438. /* The address is not properly aligned for the target. */
  439. switch (repeat) {
  440. case 0:
  441. /*
  442. * Assume the result that the kernel gave us is the
  443. * first with enough free space, so start again at the
  444. * next higher target page.
  445. */
  446. addr = ROUND_UP(addr, align);
  447. break;
  448. case 1:
  449. /*
  450. * Sometimes the kernel decides to perform the allocation
  451. * at the top end of memory instead.
  452. */
  453. addr &= -align;
  454. break;
  455. case 2:
  456. /* Start over at low memory. */
  457. addr = 0;
  458. break;
  459. default:
  460. /* Fail. This unaligned block must the last. */
  461. addr = -1;
  462. break;
  463. }
  464. } else {
  465. /*
  466. * Since the result the kernel gave didn't fit, start
  467. * again at low memory. If any repetition, fail.
  468. */
  469. addr = (repeat ? -1 : 0);
  470. }
  471. /* Unmap and try again. */
  472. munmap(ptr, size);
  473. /* ENOMEM if we checked the whole of the target address space. */
  474. if (addr == (abi_ulong)-1) {
  475. return (abi_ulong)-1;
  476. } else if (addr == 0) {
  477. if (wrapped) {
  478. return (abi_ulong)-1;
  479. }
  480. wrapped = 1;
  481. /*
  482. * Don't actually use 0 when wrapping, instead indicate
  483. * that we'd truly like an allocation in low memory.
  484. */
  485. addr = (mmap_min_addr > TARGET_PAGE_SIZE
  486. ? TARGET_PAGE_ALIGN(mmap_min_addr)
  487. : TARGET_PAGE_SIZE);
  488. } else if (wrapped && addr >= start) {
  489. return (abi_ulong)-1;
  490. }
  491. }
  492. }
  493. /*
  494. * Record a successful mmap within the user-exec interval tree.
  495. */
  496. static abi_long mmap_end(abi_ulong start, abi_ulong last,
  497. abi_ulong passthrough_start,
  498. abi_ulong passthrough_last,
  499. int flags, int page_flags)
  500. {
  501. if (flags & MAP_ANONYMOUS) {
  502. page_flags |= PAGE_ANON;
  503. }
  504. page_flags |= PAGE_RESET;
  505. if (passthrough_start > passthrough_last) {
  506. page_set_flags(start, last, page_flags);
  507. } else {
  508. if (start < passthrough_start) {
  509. page_set_flags(start, passthrough_start - 1, page_flags);
  510. }
  511. page_set_flags(passthrough_start, passthrough_last,
  512. page_flags | PAGE_PASSTHROUGH);
  513. if (passthrough_last < last) {
  514. page_set_flags(passthrough_last + 1, last, page_flags);
  515. }
  516. }
  517. shm_region_rm_complete(start, last);
  518. trace_target_mmap_complete(start);
  519. if (qemu_loglevel_mask(CPU_LOG_PAGE)) {
  520. FILE *f = qemu_log_trylock();
  521. if (f) {
  522. fprintf(f, "page layout changed following mmap\n");
  523. page_dump(f);
  524. qemu_log_unlock(f);
  525. }
  526. }
  527. return start;
  528. }
  529. /*
  530. * Special case host page size == target page size,
  531. * where there are no edge conditions.
  532. */
  533. static abi_long mmap_h_eq_g(abi_ulong start, abi_ulong len,
  534. int host_prot, int flags, int page_flags,
  535. int fd, off_t offset)
  536. {
  537. void *p, *want_p = NULL;
  538. abi_ulong last;
  539. if (start || (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) {
  540. want_p = g2h_untagged(start);
  541. }
  542. p = mmap(want_p, len, host_prot, flags, fd, offset);
  543. if (p == MAP_FAILED) {
  544. return -1;
  545. }
  546. /* If the host kernel does not support MAP_FIXED_NOREPLACE, emulate. */
  547. if ((flags & MAP_FIXED_NOREPLACE) && p != want_p) {
  548. do_munmap(p, len);
  549. errno = EEXIST;
  550. return -1;
  551. }
  552. start = h2g(p);
  553. last = start + len - 1;
  554. return mmap_end(start, last, start, last, flags, page_flags);
  555. }
  556. /*
  557. * Special case host page size < target page size.
  558. *
  559. * The two special cases are increased guest alignment, and mapping
  560. * past the end of a file.
  561. *
  562. * When mapping files into a memory area larger than the file,
  563. * accesses to pages beyond the file size will cause a SIGBUS.
  564. *
  565. * For example, if mmaping a file of 100 bytes on a host with 4K
  566. * pages emulating a target with 8K pages, the target expects to
  567. * be able to access the first 8K. But the host will trap us on
  568. * any access beyond 4K.
  569. *
  570. * When emulating a target with a larger page-size than the hosts,
  571. * we may need to truncate file maps at EOF and add extra anonymous
  572. * pages up to the targets page boundary.
  573. *
  574. * This workaround only works for files that do not change.
  575. * If the file is later extended (e.g. ftruncate), the SIGBUS
  576. * vanishes and the proper behaviour is that changes within the
  577. * anon page should be reflected in the file.
  578. *
  579. * However, this case is rather common with executable images,
  580. * so the workaround is important for even trivial tests, whereas
  581. * the mmap of of a file being extended is less common.
  582. */
  583. static abi_long mmap_h_lt_g(abi_ulong start, abi_ulong len, int host_prot,
  584. int mmap_flags, int page_flags, int fd,
  585. off_t offset, int host_page_size)
  586. {
  587. void *p, *want_p = NULL;
  588. off_t fileend_adj = 0;
  589. int flags = mmap_flags;
  590. abi_ulong last, pass_last;
  591. if (start || (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) {
  592. want_p = g2h_untagged(start);
  593. }
  594. if (!(flags & MAP_ANONYMOUS)) {
  595. struct stat sb;
  596. if (fstat(fd, &sb) == -1) {
  597. return -1;
  598. }
  599. if (offset >= sb.st_size) {
  600. /*
  601. * The entire map is beyond the end of the file.
  602. * Transform it to an anonymous mapping.
  603. */
  604. flags |= MAP_ANONYMOUS;
  605. fd = -1;
  606. offset = 0;
  607. } else if (offset + len > sb.st_size) {
  608. /*
  609. * A portion of the map is beyond the end of the file.
  610. * Truncate the file portion of the allocation.
  611. */
  612. fileend_adj = offset + len - sb.st_size;
  613. }
  614. }
  615. if (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE)) {
  616. if (fileend_adj) {
  617. p = mmap(want_p, len, host_prot, flags | MAP_ANONYMOUS, -1, 0);
  618. } else {
  619. p = mmap(want_p, len, host_prot, flags, fd, offset);
  620. }
  621. if (p != want_p) {
  622. if (p != MAP_FAILED) {
  623. /* Host does not support MAP_FIXED_NOREPLACE: emulate. */
  624. do_munmap(p, len);
  625. errno = EEXIST;
  626. }
  627. return -1;
  628. }
  629. if (fileend_adj) {
  630. void *t = mmap(p, len - fileend_adj, host_prot,
  631. (flags & ~MAP_FIXED_NOREPLACE) | MAP_FIXED,
  632. fd, offset);
  633. if (t == MAP_FAILED) {
  634. int save_errno = errno;
  635. /*
  636. * We failed a map over the top of the successful anonymous
  637. * mapping above. The only failure mode is running out of VMAs,
  638. * and there's nothing that we can do to detect that earlier.
  639. * If we have replaced an existing mapping with MAP_FIXED,
  640. * then we cannot properly recover. It's a coin toss whether
  641. * it would be better to exit or continue here.
  642. */
  643. if (!(flags & MAP_FIXED_NOREPLACE) &&
  644. !page_check_range_empty(start, start + len - 1)) {
  645. qemu_log("QEMU target_mmap late failure: %s",
  646. strerror(save_errno));
  647. }
  648. do_munmap(want_p, len);
  649. errno = save_errno;
  650. return -1;
  651. }
  652. }
  653. } else {
  654. size_t host_len, part_len;
  655. /*
  656. * Take care to align the host memory. Perform a larger anonymous
  657. * allocation and extract the aligned portion. Remap the file on
  658. * top of that.
  659. */
  660. host_len = len + TARGET_PAGE_SIZE - host_page_size;
  661. p = mmap(want_p, host_len, host_prot, flags | MAP_ANONYMOUS, -1, 0);
  662. if (p == MAP_FAILED) {
  663. return -1;
  664. }
  665. part_len = (uintptr_t)p & (TARGET_PAGE_SIZE - 1);
  666. if (part_len) {
  667. part_len = TARGET_PAGE_SIZE - part_len;
  668. do_munmap(p, part_len);
  669. p += part_len;
  670. host_len -= part_len;
  671. }
  672. if (len < host_len) {
  673. do_munmap(p + len, host_len - len);
  674. }
  675. if (!(flags & MAP_ANONYMOUS)) {
  676. void *t = mmap(p, len - fileend_adj, host_prot,
  677. flags | MAP_FIXED, fd, offset);
  678. if (t == MAP_FAILED) {
  679. int save_errno = errno;
  680. do_munmap(p, len);
  681. errno = save_errno;
  682. return -1;
  683. }
  684. }
  685. start = h2g(p);
  686. }
  687. last = start + len - 1;
  688. if (fileend_adj) {
  689. pass_last = ROUND_UP(last - fileend_adj, host_page_size) - 1;
  690. } else {
  691. pass_last = last;
  692. }
  693. return mmap_end(start, last, start, pass_last, mmap_flags, page_flags);
  694. }
  695. /*
  696. * Special case host page size > target page size.
  697. *
  698. * The two special cases are address and file offsets that are valid
  699. * for the guest that cannot be directly represented by the host.
  700. */
  701. static abi_long mmap_h_gt_g(abi_ulong start, abi_ulong len,
  702. int target_prot, int host_prot,
  703. int flags, int page_flags, int fd,
  704. off_t offset, int host_page_size)
  705. {
  706. void *p, *want_p = NULL;
  707. off_t host_offset = offset & -host_page_size;
  708. abi_ulong last, real_start, real_last;
  709. bool misaligned_offset = false;
  710. size_t host_len;
  711. if (start || (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) {
  712. want_p = g2h_untagged(start);
  713. }
  714. if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) {
  715. /*
  716. * Adjust the offset to something representable on the host.
  717. */
  718. host_len = len + offset - host_offset;
  719. p = mmap(want_p, host_len, host_prot, flags, fd, host_offset);
  720. if (p == MAP_FAILED) {
  721. return -1;
  722. }
  723. /* Update start to the file position at offset. */
  724. p += offset - host_offset;
  725. start = h2g(p);
  726. last = start + len - 1;
  727. return mmap_end(start, last, start, last, flags, page_flags);
  728. }
  729. if (!(flags & MAP_ANONYMOUS)) {
  730. misaligned_offset = (start ^ offset) & (host_page_size - 1);
  731. /*
  732. * The fallback for misalignment is a private mapping + read.
  733. * This carries none of semantics required of MAP_SHARED.
  734. */
  735. if (misaligned_offset && (flags & MAP_TYPE) != MAP_PRIVATE) {
  736. errno = EINVAL;
  737. return -1;
  738. }
  739. }
  740. last = start + len - 1;
  741. real_start = start & -host_page_size;
  742. real_last = ROUND_UP(last, host_page_size) - 1;
  743. /*
  744. * Handle the start and end of the mapping.
  745. */
  746. if (real_start < start) {
  747. abi_ulong real_page_last = real_start + host_page_size - 1;
  748. if (last <= real_page_last) {
  749. /* Entire allocation a subset of one host page. */
  750. if (!mmap_frag(real_start, start, last, target_prot,
  751. flags, fd, offset)) {
  752. return -1;
  753. }
  754. return mmap_end(start, last, -1, 0, flags, page_flags);
  755. }
  756. if (!mmap_frag(real_start, start, real_page_last, target_prot,
  757. flags, fd, offset)) {
  758. return -1;
  759. }
  760. real_start = real_page_last + 1;
  761. }
  762. if (last < real_last) {
  763. abi_ulong real_page_start = real_last - host_page_size + 1;
  764. if (!mmap_frag(real_page_start, real_page_start, last,
  765. target_prot, flags, fd,
  766. offset + real_page_start - start)) {
  767. return -1;
  768. }
  769. real_last = real_page_start - 1;
  770. }
  771. if (real_start > real_last) {
  772. return mmap_end(start, last, -1, 0, flags, page_flags);
  773. }
  774. /*
  775. * Handle the middle of the mapping.
  776. */
  777. host_len = real_last - real_start + 1;
  778. want_p += real_start - start;
  779. if (flags & MAP_ANONYMOUS) {
  780. p = mmap(want_p, host_len, host_prot, flags, -1, 0);
  781. } else if (!misaligned_offset) {
  782. p = mmap(want_p, host_len, host_prot, flags, fd,
  783. offset + real_start - start);
  784. } else {
  785. p = mmap(want_p, host_len, host_prot | PROT_WRITE,
  786. flags | MAP_ANONYMOUS, -1, 0);
  787. }
  788. if (p != want_p) {
  789. if (p != MAP_FAILED) {
  790. do_munmap(p, host_len);
  791. errno = EEXIST;
  792. }
  793. return -1;
  794. }
  795. if (misaligned_offset) {
  796. if (!mmap_pread(fd, p, host_len, offset + real_start - start, false)) {
  797. do_munmap(p, host_len);
  798. return -1;
  799. }
  800. if (!(host_prot & PROT_WRITE)) {
  801. mprotect(p, host_len, host_prot);
  802. }
  803. }
  804. return mmap_end(start, last, -1, 0, flags, page_flags);
  805. }
  806. static abi_long target_mmap__locked(abi_ulong start, abi_ulong len,
  807. int target_prot, int flags, int page_flags,
  808. int fd, off_t offset)
  809. {
  810. int host_page_size = qemu_real_host_page_size();
  811. int host_prot;
  812. /*
  813. * For reserved_va, we are in full control of the allocation.
  814. * Find a suitable hole and convert to MAP_FIXED.
  815. */
  816. if (reserved_va) {
  817. if (flags & MAP_FIXED_NOREPLACE) {
  818. /* Validate that the chosen range is empty. */
  819. if (!page_check_range_empty(start, start + len - 1)) {
  820. errno = EEXIST;
  821. return -1;
  822. }
  823. flags = (flags & ~MAP_FIXED_NOREPLACE) | MAP_FIXED;
  824. } else if (!(flags & MAP_FIXED)) {
  825. abi_ulong real_start = start & -host_page_size;
  826. off_t host_offset = offset & -host_page_size;
  827. size_t real_len = len + offset - host_offset;
  828. abi_ulong align = MAX(host_page_size, TARGET_PAGE_SIZE);
  829. start = mmap_find_vma(real_start, real_len, align);
  830. if (start == (abi_ulong)-1) {
  831. errno = ENOMEM;
  832. return -1;
  833. }
  834. start += offset - host_offset;
  835. flags |= MAP_FIXED;
  836. }
  837. }
  838. host_prot = target_to_host_prot(target_prot);
  839. if (host_page_size == TARGET_PAGE_SIZE) {
  840. return mmap_h_eq_g(start, len, host_prot, flags,
  841. page_flags, fd, offset);
  842. } else if (host_page_size < TARGET_PAGE_SIZE) {
  843. return mmap_h_lt_g(start, len, host_prot, flags,
  844. page_flags, fd, offset, host_page_size);
  845. } else {
  846. return mmap_h_gt_g(start, len, target_prot, host_prot, flags,
  847. page_flags, fd, offset, host_page_size);
  848. }
  849. }
  850. /* NOTE: all the constants are the HOST ones */
  851. abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot,
  852. int flags, int fd, off_t offset)
  853. {
  854. abi_long ret;
  855. int page_flags;
  856. trace_target_mmap(start, len, target_prot, flags, fd, offset);
  857. if (!len) {
  858. errno = EINVAL;
  859. return -1;
  860. }
  861. page_flags = validate_prot_to_pageflags(target_prot);
  862. if (!page_flags) {
  863. errno = EINVAL;
  864. return -1;
  865. }
  866. /* Also check for overflows... */
  867. len = TARGET_PAGE_ALIGN(len);
  868. if (!len || len != (size_t)len) {
  869. errno = ENOMEM;
  870. return -1;
  871. }
  872. if (offset & ~TARGET_PAGE_MASK) {
  873. errno = EINVAL;
  874. return -1;
  875. }
  876. if (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE)) {
  877. if (start & ~TARGET_PAGE_MASK) {
  878. errno = EINVAL;
  879. return -1;
  880. }
  881. if (!guest_range_valid_untagged(start, len)) {
  882. errno = ENOMEM;
  883. return -1;
  884. }
  885. }
  886. mmap_lock();
  887. ret = target_mmap__locked(start, len, target_prot, flags,
  888. page_flags, fd, offset);
  889. mmap_unlock();
  890. /*
  891. * If we're mapping shared memory, ensure we generate code for parallel
  892. * execution and flush old translations. This will work up to the level
  893. * supported by the host -- anything that requires EXCP_ATOMIC will not
  894. * be atomic with respect to an external process.
  895. */
  896. if (ret != -1 && (flags & MAP_TYPE) != MAP_PRIVATE) {
  897. CPUState *cpu = thread_cpu;
  898. if (!tcg_cflags_has(cpu, CF_PARALLEL)) {
  899. tcg_cflags_set(cpu, CF_PARALLEL);
  900. tb_flush(cpu);
  901. }
  902. }
  903. return ret;
  904. }
  905. static int mmap_reserve_or_unmap(abi_ulong start, abi_ulong len)
  906. {
  907. int host_page_size = qemu_real_host_page_size();
  908. abi_ulong real_start;
  909. abi_ulong real_last;
  910. abi_ulong real_len;
  911. abi_ulong last;
  912. abi_ulong a;
  913. void *host_start;
  914. int prot;
  915. last = start + len - 1;
  916. real_start = start & -host_page_size;
  917. real_last = ROUND_UP(last, host_page_size) - 1;
  918. /*
  919. * If guest pages remain on the first or last host pages,
  920. * adjust the deallocation to retain those guest pages.
  921. * The single page special case is required for the last page,
  922. * lest real_start overflow to zero.
  923. */
  924. if (real_last - real_start < host_page_size) {
  925. prot = 0;
  926. for (a = real_start; a < start; a += TARGET_PAGE_SIZE) {
  927. prot |= page_get_flags(a);
  928. }
  929. for (a = last; a < real_last; a += TARGET_PAGE_SIZE) {
  930. prot |= page_get_flags(a + 1);
  931. }
  932. if (prot != 0) {
  933. return 0;
  934. }
  935. } else {
  936. for (prot = 0, a = real_start; a < start; a += TARGET_PAGE_SIZE) {
  937. prot |= page_get_flags(a);
  938. }
  939. if (prot != 0) {
  940. real_start += host_page_size;
  941. }
  942. for (prot = 0, a = last; a < real_last; a += TARGET_PAGE_SIZE) {
  943. prot |= page_get_flags(a + 1);
  944. }
  945. if (prot != 0) {
  946. real_last -= host_page_size;
  947. }
  948. if (real_last < real_start) {
  949. return 0;
  950. }
  951. }
  952. real_len = real_last - real_start + 1;
  953. host_start = g2h_untagged(real_start);
  954. return do_munmap(host_start, real_len);
  955. }
  956. int target_munmap(abi_ulong start, abi_ulong len)
  957. {
  958. int ret;
  959. trace_target_munmap(start, len);
  960. if (start & ~TARGET_PAGE_MASK) {
  961. errno = EINVAL;
  962. return -1;
  963. }
  964. len = TARGET_PAGE_ALIGN(len);
  965. if (len == 0 || !guest_range_valid_untagged(start, len)) {
  966. errno = EINVAL;
  967. return -1;
  968. }
  969. mmap_lock();
  970. ret = mmap_reserve_or_unmap(start, len);
  971. if (likely(ret == 0)) {
  972. page_set_flags(start, start + len - 1, 0);
  973. shm_region_rm_complete(start, start + len - 1);
  974. }
  975. mmap_unlock();
  976. return ret;
  977. }
  978. abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
  979. abi_ulong new_size, unsigned long flags,
  980. abi_ulong new_addr)
  981. {
  982. int prot;
  983. void *host_addr;
  984. if (!guest_range_valid_untagged(old_addr, old_size) ||
  985. ((flags & MREMAP_FIXED) &&
  986. !guest_range_valid_untagged(new_addr, new_size)) ||
  987. ((flags & MREMAP_MAYMOVE) == 0 &&
  988. !guest_range_valid_untagged(old_addr, new_size))) {
  989. errno = ENOMEM;
  990. return -1;
  991. }
  992. mmap_lock();
  993. if (flags & MREMAP_FIXED) {
  994. host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
  995. flags, g2h_untagged(new_addr));
  996. if (reserved_va && host_addr != MAP_FAILED) {
  997. /*
  998. * If new and old addresses overlap then the above mremap will
  999. * already have failed with EINVAL.
  1000. */
  1001. mmap_reserve_or_unmap(old_addr, old_size);
  1002. }
  1003. } else if (flags & MREMAP_MAYMOVE) {
  1004. abi_ulong mmap_start;
  1005. mmap_start = mmap_find_vma(0, new_size, TARGET_PAGE_SIZE);
  1006. if (mmap_start == -1) {
  1007. errno = ENOMEM;
  1008. host_addr = MAP_FAILED;
  1009. } else {
  1010. host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
  1011. flags | MREMAP_FIXED,
  1012. g2h_untagged(mmap_start));
  1013. if (reserved_va) {
  1014. mmap_reserve_or_unmap(old_addr, old_size);
  1015. }
  1016. }
  1017. } else {
  1018. int page_flags = 0;
  1019. if (reserved_va && old_size < new_size) {
  1020. abi_ulong addr;
  1021. for (addr = old_addr + old_size;
  1022. addr < old_addr + new_size;
  1023. addr++) {
  1024. page_flags |= page_get_flags(addr);
  1025. }
  1026. }
  1027. if (page_flags == 0) {
  1028. host_addr = mremap(g2h_untagged(old_addr),
  1029. old_size, new_size, flags);
  1030. if (host_addr != MAP_FAILED) {
  1031. /* Check if address fits target address space */
  1032. if (!guest_range_valid_untagged(h2g(host_addr), new_size)) {
  1033. /* Revert mremap() changes */
  1034. host_addr = mremap(g2h_untagged(old_addr),
  1035. new_size, old_size, flags);
  1036. errno = ENOMEM;
  1037. host_addr = MAP_FAILED;
  1038. } else if (reserved_va && old_size > new_size) {
  1039. mmap_reserve_or_unmap(old_addr + old_size,
  1040. old_size - new_size);
  1041. }
  1042. }
  1043. } else {
  1044. errno = ENOMEM;
  1045. host_addr = MAP_FAILED;
  1046. }
  1047. }
  1048. if (host_addr == MAP_FAILED) {
  1049. new_addr = -1;
  1050. } else {
  1051. new_addr = h2g(host_addr);
  1052. prot = page_get_flags(old_addr);
  1053. page_set_flags(old_addr, old_addr + old_size - 1, 0);
  1054. shm_region_rm_complete(old_addr, old_addr + old_size - 1);
  1055. page_set_flags(new_addr, new_addr + new_size - 1,
  1056. prot | PAGE_VALID | PAGE_RESET);
  1057. shm_region_rm_complete(new_addr, new_addr + new_size - 1);
  1058. }
  1059. mmap_unlock();
  1060. return new_addr;
  1061. }
  1062. abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice)
  1063. {
  1064. abi_ulong len;
  1065. int ret = 0;
  1066. if (start & ~TARGET_PAGE_MASK) {
  1067. return -TARGET_EINVAL;
  1068. }
  1069. if (len_in == 0) {
  1070. return 0;
  1071. }
  1072. len = TARGET_PAGE_ALIGN(len_in);
  1073. if (len == 0 || !guest_range_valid_untagged(start, len)) {
  1074. return -TARGET_EINVAL;
  1075. }
  1076. /* Translate for some architectures which have different MADV_xxx values */
  1077. switch (advice) {
  1078. case TARGET_MADV_DONTNEED: /* alpha */
  1079. advice = MADV_DONTNEED;
  1080. break;
  1081. case TARGET_MADV_WIPEONFORK: /* parisc */
  1082. advice = MADV_WIPEONFORK;
  1083. break;
  1084. case TARGET_MADV_KEEPONFORK: /* parisc */
  1085. advice = MADV_KEEPONFORK;
  1086. break;
  1087. /* we do not care about the other MADV_xxx values yet */
  1088. }
  1089. /*
  1090. * Most advice values are hints, so ignoring and returning success is ok.
  1091. *
  1092. * However, some advice values such as MADV_DONTNEED, MADV_WIPEONFORK and
  1093. * MADV_KEEPONFORK are not hints and need to be emulated.
  1094. *
  1095. * A straight passthrough for those may not be safe because qemu sometimes
  1096. * turns private file-backed mappings into anonymous mappings.
  1097. * If all guest pages have PAGE_PASSTHROUGH set, mappings have the
  1098. * same semantics for the host as for the guest.
  1099. *
  1100. * We pass through MADV_WIPEONFORK and MADV_KEEPONFORK if possible and
  1101. * return failure if not.
  1102. *
  1103. * MADV_DONTNEED is passed through as well, if possible.
  1104. * If passthrough isn't possible, we nevertheless (wrongly!) return
  1105. * success, which is broken but some userspace programs fail to work
  1106. * otherwise. Completely implementing such emulation is quite complicated
  1107. * though.
  1108. */
  1109. mmap_lock();
  1110. switch (advice) {
  1111. case MADV_WIPEONFORK:
  1112. case MADV_KEEPONFORK:
  1113. ret = -EINVAL;
  1114. /* fall through */
  1115. case MADV_DONTNEED:
  1116. if (page_check_range(start, len, PAGE_PASSTHROUGH)) {
  1117. ret = get_errno(madvise(g2h_untagged(start), len, advice));
  1118. if ((advice == MADV_DONTNEED) && (ret == 0)) {
  1119. page_reset_target_data(start, start + len - 1);
  1120. }
  1121. }
  1122. }
  1123. mmap_unlock();
  1124. return ret;
  1125. }
  1126. #ifndef TARGET_FORCE_SHMLBA
  1127. /*
  1128. * For most architectures, SHMLBA is the same as the page size;
  1129. * some architectures have larger values, in which case they should
  1130. * define TARGET_FORCE_SHMLBA and provide a target_shmlba() function.
  1131. * This corresponds to the kernel arch code defining __ARCH_FORCE_SHMLBA
  1132. * and defining its own value for SHMLBA.
  1133. *
  1134. * The kernel also permits SHMLBA to be set by the architecture to a
  1135. * value larger than the page size without setting __ARCH_FORCE_SHMLBA;
  1136. * this means that addresses are rounded to the large size if
  1137. * SHM_RND is set but addresses not aligned to that size are not rejected
  1138. * as long as they are at least page-aligned. Since the only architecture
  1139. * which uses this is ia64 this code doesn't provide for that oddity.
  1140. */
  1141. static inline abi_ulong target_shmlba(CPUArchState *cpu_env)
  1142. {
  1143. return TARGET_PAGE_SIZE;
  1144. }
  1145. #endif
  1146. #if defined(__arm__) || defined(__mips__) || defined(__sparc__)
  1147. #define HOST_FORCE_SHMLBA 1
  1148. #else
  1149. #define HOST_FORCE_SHMLBA 0
  1150. #endif
  1151. abi_ulong target_shmat(CPUArchState *cpu_env, int shmid,
  1152. abi_ulong shmaddr, int shmflg)
  1153. {
  1154. CPUState *cpu = env_cpu(cpu_env);
  1155. struct shmid_ds shm_info;
  1156. int ret;
  1157. int h_pagesize;
  1158. int t_shmlba, h_shmlba, m_shmlba;
  1159. size_t t_len, h_len, m_len;
  1160. /* shmat pointers are always untagged */
  1161. /*
  1162. * Because we can't use host shmat() unless the address is sufficiently
  1163. * aligned for the host, we'll need to check both.
  1164. * TODO: Could be fixed with softmmu.
  1165. */
  1166. t_shmlba = target_shmlba(cpu_env);
  1167. h_pagesize = qemu_real_host_page_size();
  1168. h_shmlba = (HOST_FORCE_SHMLBA ? SHMLBA : h_pagesize);
  1169. m_shmlba = MAX(t_shmlba, h_shmlba);
  1170. if (shmaddr) {
  1171. if (shmaddr & (m_shmlba - 1)) {
  1172. if (shmflg & SHM_RND) {
  1173. /*
  1174. * The guest is allowing the kernel to round the address.
  1175. * Assume that the guest is ok with us rounding to the
  1176. * host required alignment too. Anyway if we don't, we'll
  1177. * get an error from the kernel.
  1178. */
  1179. shmaddr &= ~(m_shmlba - 1);
  1180. if (shmaddr == 0 && (shmflg & SHM_REMAP)) {
  1181. return -TARGET_EINVAL;
  1182. }
  1183. } else {
  1184. int require = TARGET_PAGE_SIZE;
  1185. #ifdef TARGET_FORCE_SHMLBA
  1186. require = t_shmlba;
  1187. #endif
  1188. /*
  1189. * Include host required alignment, as otherwise we cannot
  1190. * use host shmat at all.
  1191. */
  1192. require = MAX(require, h_shmlba);
  1193. if (shmaddr & (require - 1)) {
  1194. return -TARGET_EINVAL;
  1195. }
  1196. }
  1197. }
  1198. } else {
  1199. if (shmflg & SHM_REMAP) {
  1200. return -TARGET_EINVAL;
  1201. }
  1202. }
  1203. /* All rounding now manually concluded. */
  1204. shmflg &= ~SHM_RND;
  1205. /* Find out the length of the shared memory segment. */
  1206. ret = get_errno(shmctl(shmid, IPC_STAT, &shm_info));
  1207. if (is_error(ret)) {
  1208. /* can't get length, bail out */
  1209. return ret;
  1210. }
  1211. t_len = TARGET_PAGE_ALIGN(shm_info.shm_segsz);
  1212. h_len = ROUND_UP(shm_info.shm_segsz, h_pagesize);
  1213. m_len = MAX(t_len, h_len);
  1214. if (!guest_range_valid_untagged(shmaddr, m_len)) {
  1215. return -TARGET_EINVAL;
  1216. }
  1217. WITH_MMAP_LOCK_GUARD() {
  1218. bool mapped = false;
  1219. void *want, *test;
  1220. abi_ulong last;
  1221. if (!shmaddr) {
  1222. shmaddr = mmap_find_vma(0, m_len, m_shmlba);
  1223. if (shmaddr == -1) {
  1224. return -TARGET_ENOMEM;
  1225. }
  1226. mapped = !reserved_va;
  1227. } else if (shmflg & SHM_REMAP) {
  1228. /*
  1229. * If host page size > target page size, the host shmat may map
  1230. * more memory than the guest expects. Reject a mapping that
  1231. * would replace memory in the unexpected gap.
  1232. * TODO: Could be fixed with softmmu.
  1233. */
  1234. if (t_len < h_len &&
  1235. !page_check_range_empty(shmaddr + t_len,
  1236. shmaddr + h_len - 1)) {
  1237. return -TARGET_EINVAL;
  1238. }
  1239. } else {
  1240. if (!page_check_range_empty(shmaddr, shmaddr + m_len - 1)) {
  1241. return -TARGET_EINVAL;
  1242. }
  1243. }
  1244. /* All placement is now complete. */
  1245. want = (void *)g2h_untagged(shmaddr);
  1246. /*
  1247. * Map anonymous pages across the entire range, then remap with
  1248. * the shared memory. This is required for a number of corner
  1249. * cases for which host and guest page sizes differ.
  1250. */
  1251. if (h_len != t_len) {
  1252. int mmap_p = PROT_READ | (shmflg & SHM_RDONLY ? 0 : PROT_WRITE);
  1253. int mmap_f = MAP_PRIVATE | MAP_ANONYMOUS
  1254. | (reserved_va || mapped || (shmflg & SHM_REMAP)
  1255. ? MAP_FIXED : MAP_FIXED_NOREPLACE);
  1256. test = mmap(want, m_len, mmap_p, mmap_f, -1, 0);
  1257. if (unlikely(test != want)) {
  1258. /* shmat returns EINVAL not EEXIST like mmap. */
  1259. ret = (test == MAP_FAILED && errno != EEXIST
  1260. ? get_errno(-1) : -TARGET_EINVAL);
  1261. if (mapped) {
  1262. do_munmap(want, m_len);
  1263. }
  1264. return ret;
  1265. }
  1266. mapped = true;
  1267. }
  1268. if (reserved_va || mapped) {
  1269. shmflg |= SHM_REMAP;
  1270. }
  1271. test = shmat(shmid, want, shmflg);
  1272. if (test == MAP_FAILED) {
  1273. ret = get_errno(-1);
  1274. if (mapped) {
  1275. do_munmap(want, m_len);
  1276. }
  1277. return ret;
  1278. }
  1279. assert(test == want);
  1280. last = shmaddr + m_len - 1;
  1281. page_set_flags(shmaddr, last,
  1282. PAGE_VALID | PAGE_RESET | PAGE_READ |
  1283. (shmflg & SHM_RDONLY ? 0 : PAGE_WRITE) |
  1284. (shmflg & SHM_EXEC ? PAGE_EXEC : 0));
  1285. shm_region_rm_complete(shmaddr, last);
  1286. shm_region_add(shmaddr, last);
  1287. }
  1288. /*
  1289. * We're mapping shared memory, so ensure we generate code for parallel
  1290. * execution and flush old translations. This will work up to the level
  1291. * supported by the host -- anything that requires EXCP_ATOMIC will not
  1292. * be atomic with respect to an external process.
  1293. */
  1294. if (!tcg_cflags_has(cpu, CF_PARALLEL)) {
  1295. tcg_cflags_set(cpu, CF_PARALLEL);
  1296. tb_flush(cpu);
  1297. }
  1298. if (qemu_loglevel_mask(CPU_LOG_PAGE)) {
  1299. FILE *f = qemu_log_trylock();
  1300. if (f) {
  1301. fprintf(f, "page layout changed following shmat\n");
  1302. page_dump(f);
  1303. qemu_log_unlock(f);
  1304. }
  1305. }
  1306. return shmaddr;
  1307. }
  1308. abi_long target_shmdt(abi_ulong shmaddr)
  1309. {
  1310. abi_long rv;
  1311. /* shmdt pointers are always untagged */
  1312. WITH_MMAP_LOCK_GUARD() {
  1313. abi_ulong last = shm_region_find(shmaddr);
  1314. if (last == 0) {
  1315. return -TARGET_EINVAL;
  1316. }
  1317. rv = get_errno(shmdt(g2h_untagged(shmaddr)));
  1318. if (rv == 0) {
  1319. abi_ulong size = last - shmaddr + 1;
  1320. page_set_flags(shmaddr, last, 0);
  1321. shm_region_rm_complete(shmaddr, last);
  1322. mmap_reserve_or_unmap(shmaddr, size);
  1323. }
  1324. }
  1325. return rv;
  1326. }