oslib-posix.c 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744
  1. /*
  2. * os-posix-lib.c
  3. *
  4. * Copyright (c) 2003-2008 Fabrice Bellard
  5. * Copyright (c) 2010 Red Hat, Inc.
  6. *
  7. * QEMU library functions on POSIX which are shared between QEMU and
  8. * the QEMU tools.
  9. *
  10. * Permission is hereby granted, free of charge, to any person obtaining a copy
  11. * of this software and associated documentation files (the "Software"), to deal
  12. * in the Software without restriction, including without limitation the rights
  13. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  14. * copies of the Software, and to permit persons to whom the Software is
  15. * furnished to do so, subject to the following conditions:
  16. *
  17. * The above copyright notice and this permission notice shall be included in
  18. * all copies or substantial portions of the Software.
  19. *
  20. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  21. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  22. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  23. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  24. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  25. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  26. * THE SOFTWARE.
  27. */
  28. #include "qemu/osdep.h"
  29. #include <termios.h>
  30. #include <glib/gprintf.h>
  31. #include "sysemu/sysemu.h"
  32. #include "trace.h"
  33. #include "qapi/error.h"
  34. #include "qemu/error-report.h"
  35. #include "qemu/madvise.h"
  36. #include "qemu/sockets.h"
  37. #include "qemu/thread.h"
  38. #include <libgen.h>
  39. #include "qemu/cutils.h"
  40. #include "qemu/units.h"
  41. #include "qemu/thread-context.h"
  42. #ifdef CONFIG_LINUX
  43. #include <sys/syscall.h>
  44. #endif
  45. #ifdef __FreeBSD__
  46. #include <sys/thr.h>
  47. #include <sys/user.h>
  48. #include <libutil.h>
  49. #endif
  50. #ifdef __NetBSD__
  51. #include <lwp.h>
  52. #endif
  53. #include "qemu/mmap-alloc.h"
  54. #define MAX_MEM_PREALLOC_THREAD_COUNT 16
  55. struct MemsetThread;
  56. typedef struct MemsetContext {
  57. bool all_threads_created;
  58. bool any_thread_failed;
  59. struct MemsetThread *threads;
  60. int num_threads;
  61. } MemsetContext;
  62. struct MemsetThread {
  63. char *addr;
  64. size_t numpages;
  65. size_t hpagesize;
  66. QemuThread pgthread;
  67. sigjmp_buf env;
  68. MemsetContext *context;
  69. };
  70. typedef struct MemsetThread MemsetThread;
  71. /* used by sigbus_handler() */
  72. static MemsetContext *sigbus_memset_context;
  73. struct sigaction sigbus_oldact;
  74. static QemuMutex sigbus_mutex;
  75. static QemuMutex page_mutex;
  76. static QemuCond page_cond;
  77. int qemu_get_thread_id(void)
  78. {
  79. #if defined(__linux__)
  80. return syscall(SYS_gettid);
  81. #elif defined(__FreeBSD__)
  82. /* thread id is up to INT_MAX */
  83. long tid;
  84. thr_self(&tid);
  85. return (int)tid;
  86. #elif defined(__NetBSD__)
  87. return _lwp_self();
  88. #elif defined(__OpenBSD__)
  89. return getthrid();
  90. #else
  91. return getpid();
  92. #endif
  93. }
  94. int qemu_daemon(int nochdir, int noclose)
  95. {
  96. return daemon(nochdir, noclose);
  97. }
  98. bool qemu_write_pidfile(const char *path, Error **errp)
  99. {
  100. int fd;
  101. char pidstr[32];
  102. while (1) {
  103. struct stat a, b;
  104. struct flock lock = {
  105. .l_type = F_WRLCK,
  106. .l_whence = SEEK_SET,
  107. .l_len = 0,
  108. };
  109. fd = qemu_create(path, O_WRONLY, S_IRUSR | S_IWUSR, errp);
  110. if (fd == -1) {
  111. return false;
  112. }
  113. if (fstat(fd, &b) < 0) {
  114. error_setg_errno(errp, errno, "Cannot stat file");
  115. goto fail_close;
  116. }
  117. if (fcntl(fd, F_SETLK, &lock)) {
  118. error_setg_errno(errp, errno, "Cannot lock pid file");
  119. goto fail_close;
  120. }
  121. /*
  122. * Now make sure the path we locked is the same one that now
  123. * exists on the filesystem.
  124. */
  125. if (stat(path, &a) < 0) {
  126. /*
  127. * PID file disappeared, someone else must be racing with
  128. * us, so try again.
  129. */
  130. close(fd);
  131. continue;
  132. }
  133. if (a.st_ino == b.st_ino) {
  134. break;
  135. }
  136. /*
  137. * PID file was recreated, someone else must be racing with
  138. * us, so try again.
  139. */
  140. close(fd);
  141. }
  142. if (ftruncate(fd, 0) < 0) {
  143. error_setg_errno(errp, errno, "Failed to truncate pid file");
  144. goto fail_unlink;
  145. }
  146. snprintf(pidstr, sizeof(pidstr), FMT_pid "\n", getpid());
  147. if (qemu_write_full(fd, pidstr, strlen(pidstr)) != strlen(pidstr)) {
  148. error_setg(errp, "Failed to write pid file");
  149. goto fail_unlink;
  150. }
  151. return true;
  152. fail_unlink:
  153. unlink(path);
  154. fail_close:
  155. close(fd);
  156. return false;
  157. }
  158. /* alloc shared memory pages */
  159. void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment, bool shared,
  160. bool noreserve)
  161. {
  162. const uint32_t qemu_map_flags = (shared ? QEMU_MAP_SHARED : 0) |
  163. (noreserve ? QEMU_MAP_NORESERVE : 0);
  164. size_t align = QEMU_VMALLOC_ALIGN;
  165. void *ptr = qemu_ram_mmap(-1, size, align, qemu_map_flags, 0);
  166. if (ptr == MAP_FAILED) {
  167. return NULL;
  168. }
  169. if (alignment) {
  170. *alignment = align;
  171. }
  172. trace_qemu_anon_ram_alloc(size, ptr);
  173. return ptr;
  174. }
  175. void qemu_anon_ram_free(void *ptr, size_t size)
  176. {
  177. trace_qemu_anon_ram_free(ptr, size);
  178. qemu_ram_munmap(-1, ptr, size);
  179. }
  180. void qemu_socket_set_block(int fd)
  181. {
  182. g_unix_set_fd_nonblocking(fd, false, NULL);
  183. }
  184. int qemu_socket_try_set_nonblock(int fd)
  185. {
  186. return g_unix_set_fd_nonblocking(fd, true, NULL) ? 0 : -errno;
  187. }
  188. void qemu_socket_set_nonblock(int fd)
  189. {
  190. int f;
  191. f = qemu_socket_try_set_nonblock(fd);
  192. assert(f == 0);
  193. }
  194. int socket_set_fast_reuse(int fd)
  195. {
  196. int val = 1, ret;
  197. ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
  198. (const char *)&val, sizeof(val));
  199. assert(ret == 0);
  200. return ret;
  201. }
  202. void qemu_set_cloexec(int fd)
  203. {
  204. int f;
  205. f = fcntl(fd, F_GETFD);
  206. assert(f != -1);
  207. f = fcntl(fd, F_SETFD, f | FD_CLOEXEC);
  208. assert(f != -1);
  209. }
  210. int qemu_socketpair(int domain, int type, int protocol, int sv[2])
  211. {
  212. int ret;
  213. #ifdef SOCK_CLOEXEC
  214. ret = socketpair(domain, type | SOCK_CLOEXEC, protocol, sv);
  215. if (ret != -1 || errno != EINVAL) {
  216. return ret;
  217. }
  218. #endif
  219. ret = socketpair(domain, type, protocol, sv);;
  220. if (ret == 0) {
  221. qemu_set_cloexec(sv[0]);
  222. qemu_set_cloexec(sv[1]);
  223. }
  224. return ret;
  225. }
  226. char *
  227. qemu_get_local_state_dir(void)
  228. {
  229. return get_relocated_path(CONFIG_QEMU_LOCALSTATEDIR);
  230. }
  231. void qemu_set_tty_echo(int fd, bool echo)
  232. {
  233. struct termios tty;
  234. tcgetattr(fd, &tty);
  235. if (echo) {
  236. tty.c_lflag |= ECHO | ECHONL | ICANON | IEXTEN;
  237. } else {
  238. tty.c_lflag &= ~(ECHO | ECHONL | ICANON | IEXTEN);
  239. }
  240. tcsetattr(fd, TCSANOW, &tty);
  241. }
  242. #ifdef CONFIG_LINUX
  243. static void sigbus_handler(int signal, siginfo_t *siginfo, void *ctx)
  244. #else /* CONFIG_LINUX */
  245. static void sigbus_handler(int signal)
  246. #endif /* CONFIG_LINUX */
  247. {
  248. int i;
  249. if (sigbus_memset_context) {
  250. for (i = 0; i < sigbus_memset_context->num_threads; i++) {
  251. MemsetThread *thread = &sigbus_memset_context->threads[i];
  252. if (qemu_thread_is_self(&thread->pgthread)) {
  253. siglongjmp(thread->env, 1);
  254. }
  255. }
  256. }
  257. #ifdef CONFIG_LINUX
  258. /*
  259. * We assume that the MCE SIGBUS handler could have been registered. We
  260. * should never receive BUS_MCEERR_AO on any of our threads, but only on
  261. * the main thread registered for PR_MCE_KILL_EARLY. Further, we should not
  262. * receive BUS_MCEERR_AR triggered by action of other threads on one of
  263. * our threads. So, no need to check for unrelated SIGBUS when seeing one
  264. * for our threads.
  265. *
  266. * We will forward to the MCE handler, which will either handle the SIGBUS
  267. * or reinstall the default SIGBUS handler and reraise the SIGBUS. The
  268. * default SIGBUS handler will crash the process, so we don't care.
  269. */
  270. if (sigbus_oldact.sa_flags & SA_SIGINFO) {
  271. sigbus_oldact.sa_sigaction(signal, siginfo, ctx);
  272. return;
  273. }
  274. #endif /* CONFIG_LINUX */
  275. warn_report("qemu_prealloc_mem: unrelated SIGBUS detected and ignored");
  276. }
  277. static void *do_touch_pages(void *arg)
  278. {
  279. MemsetThread *memset_args = (MemsetThread *)arg;
  280. sigset_t set, oldset;
  281. int ret = 0;
  282. /*
  283. * On Linux, the page faults from the loop below can cause mmap_sem
  284. * contention with allocation of the thread stacks. Do not start
  285. * clearing until all threads have been created.
  286. */
  287. qemu_mutex_lock(&page_mutex);
  288. while (!memset_args->context->all_threads_created) {
  289. qemu_cond_wait(&page_cond, &page_mutex);
  290. }
  291. qemu_mutex_unlock(&page_mutex);
  292. /* unblock SIGBUS */
  293. sigemptyset(&set);
  294. sigaddset(&set, SIGBUS);
  295. pthread_sigmask(SIG_UNBLOCK, &set, &oldset);
  296. if (sigsetjmp(memset_args->env, 1)) {
  297. ret = -EFAULT;
  298. } else {
  299. char *addr = memset_args->addr;
  300. size_t numpages = memset_args->numpages;
  301. size_t hpagesize = memset_args->hpagesize;
  302. size_t i;
  303. for (i = 0; i < numpages; i++) {
  304. /*
  305. * Read & write back the same value, so we don't
  306. * corrupt existing user/app data that might be
  307. * stored.
  308. *
  309. * 'volatile' to stop compiler optimizing this away
  310. * to a no-op
  311. */
  312. *(volatile char *)addr = *addr;
  313. addr += hpagesize;
  314. }
  315. }
  316. pthread_sigmask(SIG_SETMASK, &oldset, NULL);
  317. return (void *)(uintptr_t)ret;
  318. }
  319. static void *do_madv_populate_write_pages(void *arg)
  320. {
  321. MemsetThread *memset_args = (MemsetThread *)arg;
  322. const size_t size = memset_args->numpages * memset_args->hpagesize;
  323. char * const addr = memset_args->addr;
  324. int ret = 0;
  325. /* See do_touch_pages(). */
  326. qemu_mutex_lock(&page_mutex);
  327. while (!memset_args->context->all_threads_created) {
  328. qemu_cond_wait(&page_cond, &page_mutex);
  329. }
  330. qemu_mutex_unlock(&page_mutex);
  331. if (size && qemu_madvise(addr, size, QEMU_MADV_POPULATE_WRITE)) {
  332. ret = -errno;
  333. }
  334. return (void *)(uintptr_t)ret;
  335. }
  336. static inline int get_memset_num_threads(size_t hpagesize, size_t numpages,
  337. int max_threads)
  338. {
  339. long host_procs = sysconf(_SC_NPROCESSORS_ONLN);
  340. int ret = 1;
  341. if (host_procs > 0) {
  342. ret = MIN(MIN(host_procs, MAX_MEM_PREALLOC_THREAD_COUNT), max_threads);
  343. }
  344. /* Especially with gigantic pages, don't create more threads than pages. */
  345. ret = MIN(ret, numpages);
  346. /* Don't start threads to prealloc comparatively little memory. */
  347. ret = MIN(ret, MAX(1, hpagesize * numpages / (64 * MiB)));
  348. /* In case sysconf() fails, we fall back to single threaded */
  349. return ret;
  350. }
  351. static int touch_all_pages(char *area, size_t hpagesize, size_t numpages,
  352. int max_threads, ThreadContext *tc,
  353. bool use_madv_populate_write)
  354. {
  355. static gsize initialized = 0;
  356. MemsetContext context = {
  357. .num_threads = get_memset_num_threads(hpagesize, numpages, max_threads),
  358. };
  359. size_t numpages_per_thread, leftover;
  360. void *(*touch_fn)(void *);
  361. int ret = 0, i = 0;
  362. char *addr = area;
  363. if (g_once_init_enter(&initialized)) {
  364. qemu_mutex_init(&page_mutex);
  365. qemu_cond_init(&page_cond);
  366. g_once_init_leave(&initialized, 1);
  367. }
  368. if (use_madv_populate_write) {
  369. /* Avoid creating a single thread for MADV_POPULATE_WRITE */
  370. if (context.num_threads == 1) {
  371. if (qemu_madvise(area, hpagesize * numpages,
  372. QEMU_MADV_POPULATE_WRITE)) {
  373. return -errno;
  374. }
  375. return 0;
  376. }
  377. touch_fn = do_madv_populate_write_pages;
  378. } else {
  379. touch_fn = do_touch_pages;
  380. }
  381. context.threads = g_new0(MemsetThread, context.num_threads);
  382. numpages_per_thread = numpages / context.num_threads;
  383. leftover = numpages % context.num_threads;
  384. for (i = 0; i < context.num_threads; i++) {
  385. context.threads[i].addr = addr;
  386. context.threads[i].numpages = numpages_per_thread + (i < leftover);
  387. context.threads[i].hpagesize = hpagesize;
  388. context.threads[i].context = &context;
  389. if (tc) {
  390. thread_context_create_thread(tc, &context.threads[i].pgthread,
  391. "touch_pages",
  392. touch_fn, &context.threads[i],
  393. QEMU_THREAD_JOINABLE);
  394. } else {
  395. qemu_thread_create(&context.threads[i].pgthread, "touch_pages",
  396. touch_fn, &context.threads[i],
  397. QEMU_THREAD_JOINABLE);
  398. }
  399. addr += context.threads[i].numpages * hpagesize;
  400. }
  401. if (!use_madv_populate_write) {
  402. sigbus_memset_context = &context;
  403. }
  404. qemu_mutex_lock(&page_mutex);
  405. context.all_threads_created = true;
  406. qemu_cond_broadcast(&page_cond);
  407. qemu_mutex_unlock(&page_mutex);
  408. for (i = 0; i < context.num_threads; i++) {
  409. int tmp = (uintptr_t)qemu_thread_join(&context.threads[i].pgthread);
  410. if (tmp) {
  411. ret = tmp;
  412. }
  413. }
  414. if (!use_madv_populate_write) {
  415. sigbus_memset_context = NULL;
  416. }
  417. g_free(context.threads);
  418. return ret;
  419. }
  420. static bool madv_populate_write_possible(char *area, size_t pagesize)
  421. {
  422. return !qemu_madvise(area, pagesize, QEMU_MADV_POPULATE_WRITE) ||
  423. errno != EINVAL;
  424. }
  425. void qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads,
  426. ThreadContext *tc, Error **errp)
  427. {
  428. static gsize initialized;
  429. int ret;
  430. size_t hpagesize = qemu_fd_getpagesize(fd);
  431. size_t numpages = DIV_ROUND_UP(sz, hpagesize);
  432. bool use_madv_populate_write;
  433. struct sigaction act;
  434. /*
  435. * Sense on every invocation, as MADV_POPULATE_WRITE cannot be used for
  436. * some special mappings, such as mapping /dev/mem.
  437. */
  438. use_madv_populate_write = madv_populate_write_possible(area, hpagesize);
  439. if (!use_madv_populate_write) {
  440. if (g_once_init_enter(&initialized)) {
  441. qemu_mutex_init(&sigbus_mutex);
  442. g_once_init_leave(&initialized, 1);
  443. }
  444. qemu_mutex_lock(&sigbus_mutex);
  445. memset(&act, 0, sizeof(act));
  446. #ifdef CONFIG_LINUX
  447. act.sa_sigaction = &sigbus_handler;
  448. act.sa_flags = SA_SIGINFO;
  449. #else /* CONFIG_LINUX */
  450. act.sa_handler = &sigbus_handler;
  451. act.sa_flags = 0;
  452. #endif /* CONFIG_LINUX */
  453. ret = sigaction(SIGBUS, &act, &sigbus_oldact);
  454. if (ret) {
  455. qemu_mutex_unlock(&sigbus_mutex);
  456. error_setg_errno(errp, errno,
  457. "qemu_prealloc_mem: failed to install signal handler");
  458. return;
  459. }
  460. }
  461. /* touch pages simultaneously */
  462. ret = touch_all_pages(area, hpagesize, numpages, max_threads, tc,
  463. use_madv_populate_write);
  464. if (ret) {
  465. error_setg_errno(errp, -ret,
  466. "qemu_prealloc_mem: preallocating memory failed");
  467. }
  468. if (!use_madv_populate_write) {
  469. ret = sigaction(SIGBUS, &sigbus_oldact, NULL);
  470. if (ret) {
  471. /* Terminate QEMU since it can't recover from error */
  472. perror("qemu_prealloc_mem: failed to reinstall signal handler");
  473. exit(1);
  474. }
  475. qemu_mutex_unlock(&sigbus_mutex);
  476. }
  477. }
  478. char *qemu_get_pid_name(pid_t pid)
  479. {
  480. char *name = NULL;
  481. #if defined(__FreeBSD__)
  482. /* BSDs don't have /proc, but they provide a nice substitute */
  483. struct kinfo_proc *proc = kinfo_getproc(pid);
  484. if (proc) {
  485. name = g_strdup(proc->ki_comm);
  486. free(proc);
  487. }
  488. #else
  489. /* Assume a system with reasonable procfs */
  490. char *pid_path;
  491. size_t len;
  492. pid_path = g_strdup_printf("/proc/%d/cmdline", pid);
  493. g_file_get_contents(pid_path, &name, &len, NULL);
  494. g_free(pid_path);
  495. #endif
  496. return name;
  497. }
  498. void *qemu_alloc_stack(size_t *sz)
  499. {
  500. void *ptr, *guardpage;
  501. int flags;
  502. #ifdef CONFIG_DEBUG_STACK_USAGE
  503. void *ptr2;
  504. #endif
  505. size_t pagesz = qemu_real_host_page_size();
  506. #ifdef _SC_THREAD_STACK_MIN
  507. /* avoid stacks smaller than _SC_THREAD_STACK_MIN */
  508. long min_stack_sz = sysconf(_SC_THREAD_STACK_MIN);
  509. *sz = MAX(MAX(min_stack_sz, 0), *sz);
  510. #endif
  511. /* adjust stack size to a multiple of the page size */
  512. *sz = ROUND_UP(*sz, pagesz);
  513. /* allocate one extra page for the guard page */
  514. *sz += pagesz;
  515. flags = MAP_PRIVATE | MAP_ANONYMOUS;
  516. #if defined(MAP_STACK) && defined(__OpenBSD__)
  517. /* Only enable MAP_STACK on OpenBSD. Other OS's such as
  518. * Linux/FreeBSD/NetBSD have a flag with the same name
  519. * but have differing functionality. OpenBSD will SEGV
  520. * if it spots execution with a stack pointer pointing
  521. * at memory that was not allocated with MAP_STACK.
  522. */
  523. flags |= MAP_STACK;
  524. #endif
  525. ptr = mmap(NULL, *sz, PROT_READ | PROT_WRITE, flags, -1, 0);
  526. if (ptr == MAP_FAILED) {
  527. perror("failed to allocate memory for stack");
  528. abort();
  529. }
  530. #if defined(HOST_IA64)
  531. /* separate register stack */
  532. guardpage = ptr + (((*sz - pagesz) / 2) & ~pagesz);
  533. #elif defined(HOST_HPPA)
  534. /* stack grows up */
  535. guardpage = ptr + *sz - pagesz;
  536. #else
  537. /* stack grows down */
  538. guardpage = ptr;
  539. #endif
  540. if (mprotect(guardpage, pagesz, PROT_NONE) != 0) {
  541. perror("failed to set up stack guard page");
  542. abort();
  543. }
  544. #ifdef CONFIG_DEBUG_STACK_USAGE
  545. for (ptr2 = ptr + pagesz; ptr2 < ptr + *sz; ptr2 += sizeof(uint32_t)) {
  546. *(uint32_t *)ptr2 = 0xdeadbeaf;
  547. }
  548. #endif
  549. return ptr;
  550. }
  551. #ifdef CONFIG_DEBUG_STACK_USAGE
  552. static __thread unsigned int max_stack_usage;
  553. #endif
  554. void qemu_free_stack(void *stack, size_t sz)
  555. {
  556. #ifdef CONFIG_DEBUG_STACK_USAGE
  557. unsigned int usage;
  558. void *ptr;
  559. for (ptr = stack + qemu_real_host_page_size(); ptr < stack + sz;
  560. ptr += sizeof(uint32_t)) {
  561. if (*(uint32_t *)ptr != 0xdeadbeaf) {
  562. break;
  563. }
  564. }
  565. usage = sz - (uintptr_t) (ptr - stack);
  566. if (usage > max_stack_usage) {
  567. error_report("thread %d max stack usage increased from %u to %u",
  568. qemu_get_thread_id(), max_stack_usage, usage);
  569. max_stack_usage = usage;
  570. }
  571. #endif
  572. munmap(stack, sz);
  573. }
  574. /*
  575. * Disable CFI checks.
  576. * We are going to call a signal handler directly. Such handler may or may not
  577. * have been defined in our binary, so there's no guarantee that the pointer
  578. * used to set the handler is a cfi-valid pointer. Since the handlers are
  579. * stored in kernel memory, changing the handler to an attacker-defined
  580. * function requires being able to call a sigaction() syscall,
  581. * which is not as easy as overwriting a pointer in memory.
  582. */
  583. QEMU_DISABLE_CFI
  584. void sigaction_invoke(struct sigaction *action,
  585. struct qemu_signalfd_siginfo *info)
  586. {
  587. siginfo_t si = {};
  588. si.si_signo = info->ssi_signo;
  589. si.si_errno = info->ssi_errno;
  590. si.si_code = info->ssi_code;
  591. /* Convert the minimal set of fields defined by POSIX.
  592. * Positive si_code values are reserved for kernel-generated
  593. * signals, where the valid siginfo fields are determined by
  594. * the signal number. But according to POSIX, it is unspecified
  595. * whether SI_USER and SI_QUEUE have values less than or equal to
  596. * zero.
  597. */
  598. if (info->ssi_code == SI_USER || info->ssi_code == SI_QUEUE ||
  599. info->ssi_code <= 0) {
  600. /* SIGTERM, etc. */
  601. si.si_pid = info->ssi_pid;
  602. si.si_uid = info->ssi_uid;
  603. } else if (info->ssi_signo == SIGILL || info->ssi_signo == SIGFPE ||
  604. info->ssi_signo == SIGSEGV || info->ssi_signo == SIGBUS) {
  605. si.si_addr = (void *)(uintptr_t)info->ssi_addr;
  606. } else if (info->ssi_signo == SIGCHLD) {
  607. si.si_pid = info->ssi_pid;
  608. si.si_status = info->ssi_status;
  609. si.si_uid = info->ssi_uid;
  610. }
  611. action->sa_sigaction(info->ssi_signo, &si, NULL);
  612. }
  613. size_t qemu_get_host_physmem(void)
  614. {
  615. #ifdef _SC_PHYS_PAGES
  616. long pages = sysconf(_SC_PHYS_PAGES);
  617. if (pages > 0) {
  618. if (pages > SIZE_MAX / qemu_real_host_page_size()) {
  619. return SIZE_MAX;
  620. } else {
  621. return pages * qemu_real_host_page_size();
  622. }
  623. }
  624. #endif
  625. return 0;
  626. }
  627. int qemu_msync(void *addr, size_t length, int fd)
  628. {
  629. size_t align_mask = ~(qemu_real_host_page_size() - 1);
  630. /**
  631. * There are no strict reqs as per the length of mapping
  632. * to be synced. Still the length needs to follow the address
  633. * alignment changes. Additionally - round the size to the multiple
  634. * of PAGE_SIZE
  635. */
  636. length += ((uintptr_t)addr & (qemu_real_host_page_size() - 1));
  637. length = (length + ~align_mask) & align_mask;
  638. addr = (void *)((uintptr_t)addr & align_mask);
  639. return msync(addr, length, MS_SYNC);
  640. }