oslib-posix.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520
  1. /*
  2. * os-posix-lib.c
  3. *
  4. * Copyright (c) 2003-2008 Fabrice Bellard
  5. * Copyright (c) 2010 Red Hat, Inc.
  6. *
  7. * QEMU library functions on POSIX which are shared between QEMU and
  8. * the QEMU tools.
  9. *
  10. * Permission is hereby granted, free of charge, to any person obtaining a copy
  11. * of this software and associated documentation files (the "Software"), to deal
  12. * in the Software without restriction, including without limitation the rights
  13. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  14. * copies of the Software, and to permit persons to whom the Software is
  15. * furnished to do so, subject to the following conditions:
  16. *
  17. * The above copyright notice and this permission notice shall be included in
  18. * all copies or substantial portions of the Software.
  19. *
  20. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  21. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  22. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  23. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  24. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  25. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  26. * THE SOFTWARE.
  27. */
  28. /* The following block of code temporarily renames the daemon() function so the
  29. compiler does not see the warning associated with it in stdlib.h on OSX */
  30. #ifdef __APPLE__
  31. #define daemon qemu_fake_daemon_function
  32. #include <stdlib.h>
  33. #undef daemon
  34. extern int daemon(int, int);
  35. #endif
  36. #if defined(__linux__) && (defined(__x86_64__) || defined(__arm__))
  37. /* Use 2 MiB alignment so transparent hugepages can be used by KVM.
  38. Valgrind does not support alignments larger than 1 MiB,
  39. therefore we need special code which handles running on Valgrind. */
  40. # define QEMU_VMALLOC_ALIGN (512 * 4096)
  41. #elif defined(__linux__) && defined(__s390x__)
  42. /* Use 1 MiB (segment size) alignment so gmap can be used by KVM. */
  43. # define QEMU_VMALLOC_ALIGN (256 * 4096)
  44. #else
  45. # define QEMU_VMALLOC_ALIGN getpagesize()
  46. #endif
  47. #include "qemu/osdep.h"
  48. #include <termios.h>
  49. #include <termios.h>
  50. #include <glib/gprintf.h>
  51. #include "sysemu/sysemu.h"
  52. #include "trace.h"
  53. #include "qemu/sockets.h"
  54. #include <sys/mman.h>
  55. #include <libgen.h>
  56. #include <setjmp.h>
  57. #include <sys/signal.h>
  58. #ifdef CONFIG_LINUX
  59. #include <sys/syscall.h>
  60. #endif
  61. #ifdef __FreeBSD__
  62. #include <sys/sysctl.h>
  63. #endif
  64. #include <qemu/mmap-alloc.h>
  65. int qemu_get_thread_id(void)
  66. {
  67. #if defined(__linux__)
  68. return syscall(SYS_gettid);
  69. #else
  70. return getpid();
  71. #endif
  72. }
  73. int qemu_daemon(int nochdir, int noclose)
  74. {
  75. return daemon(nochdir, noclose);
  76. }
  77. void *qemu_oom_check(void *ptr)
  78. {
  79. if (ptr == NULL) {
  80. fprintf(stderr, "Failed to allocate memory: %s\n", strerror(errno));
  81. abort();
  82. }
  83. return ptr;
  84. }
  85. void *qemu_try_memalign(size_t alignment, size_t size)
  86. {
  87. void *ptr;
  88. if (alignment < sizeof(void*)) {
  89. alignment = sizeof(void*);
  90. }
  91. #if defined(_POSIX_C_SOURCE) && !defined(__sun__)
  92. int ret;
  93. ret = posix_memalign(&ptr, alignment, size);
  94. if (ret != 0) {
  95. errno = ret;
  96. ptr = NULL;
  97. }
  98. #elif defined(CONFIG_BSD)
  99. ptr = valloc(size);
  100. #else
  101. ptr = memalign(alignment, size);
  102. #endif
  103. trace_qemu_memalign(alignment, size, ptr);
  104. return ptr;
  105. }
  106. void *qemu_memalign(size_t alignment, size_t size)
  107. {
  108. return qemu_oom_check(qemu_try_memalign(alignment, size));
  109. }
  110. /* alloc shared memory pages */
  111. void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment)
  112. {
  113. size_t align = QEMU_VMALLOC_ALIGN;
  114. void *ptr = qemu_ram_mmap(-1, size, align, false);
  115. if (ptr == MAP_FAILED) {
  116. return NULL;
  117. }
  118. if (alignment) {
  119. *alignment = align;
  120. }
  121. trace_qemu_anon_ram_alloc(size, ptr);
  122. return ptr;
  123. }
  124. void qemu_vfree(void *ptr)
  125. {
  126. trace_qemu_vfree(ptr);
  127. free(ptr);
  128. }
  129. void qemu_anon_ram_free(void *ptr, size_t size)
  130. {
  131. trace_qemu_anon_ram_free(ptr, size);
  132. qemu_ram_munmap(ptr, size);
  133. }
  134. void qemu_set_block(int fd)
  135. {
  136. int f;
  137. f = fcntl(fd, F_GETFL);
  138. fcntl(fd, F_SETFL, f & ~O_NONBLOCK);
  139. }
  140. void qemu_set_nonblock(int fd)
  141. {
  142. int f;
  143. f = fcntl(fd, F_GETFL);
  144. fcntl(fd, F_SETFL, f | O_NONBLOCK);
  145. }
  146. int socket_set_fast_reuse(int fd)
  147. {
  148. int val = 1, ret;
  149. ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
  150. (const char *)&val, sizeof(val));
  151. assert(ret == 0);
  152. return ret;
  153. }
  154. void qemu_set_cloexec(int fd)
  155. {
  156. int f;
  157. f = fcntl(fd, F_GETFD);
  158. fcntl(fd, F_SETFD, f | FD_CLOEXEC);
  159. }
  160. /*
  161. * Creates a pipe with FD_CLOEXEC set on both file descriptors
  162. */
  163. int qemu_pipe(int pipefd[2])
  164. {
  165. int ret;
  166. #ifdef CONFIG_PIPE2
  167. ret = pipe2(pipefd, O_CLOEXEC);
  168. if (ret != -1 || errno != ENOSYS) {
  169. return ret;
  170. }
  171. #endif
  172. ret = pipe(pipefd);
  173. if (ret == 0) {
  174. qemu_set_cloexec(pipefd[0]);
  175. qemu_set_cloexec(pipefd[1]);
  176. }
  177. return ret;
  178. }
  179. int qemu_utimens(const char *path, const struct timespec *times)
  180. {
  181. struct timeval tv[2], tv_now;
  182. struct stat st;
  183. int i;
  184. #ifdef CONFIG_UTIMENSAT
  185. int ret;
  186. ret = utimensat(AT_FDCWD, path, times, AT_SYMLINK_NOFOLLOW);
  187. if (ret != -1 || errno != ENOSYS) {
  188. return ret;
  189. }
  190. #endif
  191. /* Fallback: use utimes() instead of utimensat() */
  192. /* happy if special cases */
  193. if (times[0].tv_nsec == UTIME_OMIT && times[1].tv_nsec == UTIME_OMIT) {
  194. return 0;
  195. }
  196. if (times[0].tv_nsec == UTIME_NOW && times[1].tv_nsec == UTIME_NOW) {
  197. return utimes(path, NULL);
  198. }
  199. /* prepare for hard cases */
  200. if (times[0].tv_nsec == UTIME_NOW || times[1].tv_nsec == UTIME_NOW) {
  201. gettimeofday(&tv_now, NULL);
  202. }
  203. if (times[0].tv_nsec == UTIME_OMIT || times[1].tv_nsec == UTIME_OMIT) {
  204. stat(path, &st);
  205. }
  206. for (i = 0; i < 2; i++) {
  207. if (times[i].tv_nsec == UTIME_NOW) {
  208. tv[i].tv_sec = tv_now.tv_sec;
  209. tv[i].tv_usec = tv_now.tv_usec;
  210. } else if (times[i].tv_nsec == UTIME_OMIT) {
  211. tv[i].tv_sec = (i == 0) ? st.st_atime : st.st_mtime;
  212. tv[i].tv_usec = 0;
  213. } else {
  214. tv[i].tv_sec = times[i].tv_sec;
  215. tv[i].tv_usec = times[i].tv_nsec / 1000;
  216. }
  217. }
  218. return utimes(path, &tv[0]);
  219. }
  220. char *
  221. qemu_get_local_state_pathname(const char *relative_pathname)
  222. {
  223. return g_strdup_printf("%s/%s", CONFIG_QEMU_LOCALSTATEDIR,
  224. relative_pathname);
  225. }
  226. void qemu_set_tty_echo(int fd, bool echo)
  227. {
  228. struct termios tty;
  229. tcgetattr(fd, &tty);
  230. if (echo) {
  231. tty.c_lflag |= ECHO | ECHONL | ICANON | IEXTEN;
  232. } else {
  233. tty.c_lflag &= ~(ECHO | ECHONL | ICANON | IEXTEN);
  234. }
  235. tcsetattr(fd, TCSANOW, &tty);
  236. }
  237. static char exec_dir[PATH_MAX];
  238. void qemu_init_exec_dir(const char *argv0)
  239. {
  240. char *dir;
  241. char *p = NULL;
  242. char buf[PATH_MAX];
  243. assert(!exec_dir[0]);
  244. #if defined(__linux__)
  245. {
  246. int len;
  247. len = readlink("/proc/self/exe", buf, sizeof(buf) - 1);
  248. if (len > 0) {
  249. buf[len] = 0;
  250. p = buf;
  251. }
  252. }
  253. #elif defined(__FreeBSD__)
  254. {
  255. static int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1};
  256. size_t len = sizeof(buf) - 1;
  257. *buf = '\0';
  258. if (!sysctl(mib, ARRAY_SIZE(mib), buf, &len, NULL, 0) &&
  259. *buf) {
  260. buf[sizeof(buf) - 1] = '\0';
  261. p = buf;
  262. }
  263. }
  264. #endif
  265. /* If we don't have any way of figuring out the actual executable
  266. location then try argv[0]. */
  267. if (!p) {
  268. if (!argv0) {
  269. return;
  270. }
  271. p = realpath(argv0, buf);
  272. if (!p) {
  273. return;
  274. }
  275. }
  276. dir = dirname(p);
  277. pstrcpy(exec_dir, sizeof(exec_dir), dir);
  278. }
  279. char *qemu_get_exec_dir(void)
  280. {
  281. return g_strdup(exec_dir);
  282. }
  283. static sigjmp_buf sigjump;
  284. static void sigbus_handler(int signal)
  285. {
  286. siglongjmp(sigjump, 1);
  287. }
  288. void os_mem_prealloc(int fd, char *area, size_t memory)
  289. {
  290. int ret;
  291. struct sigaction act, oldact;
  292. sigset_t set, oldset;
  293. memset(&act, 0, sizeof(act));
  294. act.sa_handler = &sigbus_handler;
  295. act.sa_flags = 0;
  296. ret = sigaction(SIGBUS, &act, &oldact);
  297. if (ret) {
  298. perror("os_mem_prealloc: failed to install signal handler");
  299. exit(1);
  300. }
  301. /* unblock SIGBUS */
  302. sigemptyset(&set);
  303. sigaddset(&set, SIGBUS);
  304. pthread_sigmask(SIG_UNBLOCK, &set, &oldset);
  305. if (sigsetjmp(sigjump, 1)) {
  306. fprintf(stderr, "os_mem_prealloc: Insufficient free host memory "
  307. "pages available to allocate guest RAM\n");
  308. exit(1);
  309. } else {
  310. int i;
  311. size_t hpagesize = qemu_fd_getpagesize(fd);
  312. size_t numpages = DIV_ROUND_UP(memory, hpagesize);
  313. /* MAP_POPULATE silently ignores failures */
  314. for (i = 0; i < numpages; i++) {
  315. memset(area + (hpagesize * i), 0, 1);
  316. }
  317. ret = sigaction(SIGBUS, &oldact, NULL);
  318. if (ret) {
  319. perror("os_mem_prealloc: failed to reinstall signal handler");
  320. exit(1);
  321. }
  322. pthread_sigmask(SIG_SETMASK, &oldset, NULL);
  323. }
  324. }
  325. static struct termios oldtty;
  326. static void term_exit(void)
  327. {
  328. tcsetattr(0, TCSANOW, &oldtty);
  329. }
  330. static void term_init(void)
  331. {
  332. struct termios tty;
  333. tcgetattr(0, &tty);
  334. oldtty = tty;
  335. tty.c_iflag &= ~(IGNBRK|BRKINT|PARMRK|ISTRIP
  336. |INLCR|IGNCR|ICRNL|IXON);
  337. tty.c_oflag |= OPOST;
  338. tty.c_lflag &= ~(ECHO|ECHONL|ICANON|IEXTEN);
  339. tty.c_cflag &= ~(CSIZE|PARENB);
  340. tty.c_cflag |= CS8;
  341. tty.c_cc[VMIN] = 1;
  342. tty.c_cc[VTIME] = 0;
  343. tcsetattr(0, TCSANOW, &tty);
  344. atexit(term_exit);
  345. }
  346. int qemu_read_password(char *buf, int buf_size)
  347. {
  348. uint8_t ch;
  349. int i, ret;
  350. printf("password: ");
  351. fflush(stdout);
  352. term_init();
  353. i = 0;
  354. for (;;) {
  355. ret = read(0, &ch, 1);
  356. if (ret == -1) {
  357. if (errno == EAGAIN || errno == EINTR) {
  358. continue;
  359. } else {
  360. break;
  361. }
  362. } else if (ret == 0) {
  363. ret = -1;
  364. break;
  365. } else {
  366. if (ch == '\r' ||
  367. ch == '\n') {
  368. ret = 0;
  369. break;
  370. }
  371. if (i < (buf_size - 1)) {
  372. buf[i++] = ch;
  373. }
  374. }
  375. }
  376. term_exit();
  377. buf[i] = '\0';
  378. printf("\n");
  379. return ret;
  380. }
  381. pid_t qemu_fork(Error **errp)
  382. {
  383. sigset_t oldmask, newmask;
  384. struct sigaction sig_action;
  385. int saved_errno;
  386. pid_t pid;
  387. /*
  388. * Need to block signals now, so that child process can safely
  389. * kill off caller's signal handlers without a race.
  390. */
  391. sigfillset(&newmask);
  392. if (pthread_sigmask(SIG_SETMASK, &newmask, &oldmask) != 0) {
  393. error_setg_errno(errp, errno,
  394. "cannot block signals");
  395. return -1;
  396. }
  397. pid = fork();
  398. saved_errno = errno;
  399. if (pid < 0) {
  400. /* attempt to restore signal mask, but ignore failure, to
  401. * avoid obscuring the fork failure */
  402. (void)pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
  403. error_setg_errno(errp, saved_errno,
  404. "cannot fork child process");
  405. errno = saved_errno;
  406. return -1;
  407. } else if (pid) {
  408. /* parent process */
  409. /* Restore our original signal mask now that the child is
  410. * safely running. Only documented failures are EFAULT (not
  411. * possible, since we are using just-grabbed mask) or EINVAL
  412. * (not possible, since we are using correct arguments). */
  413. (void)pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
  414. } else {
  415. /* child process */
  416. size_t i;
  417. /* Clear out all signal handlers from parent so nothing
  418. * unexpected can happen in our child once we unblock
  419. * signals */
  420. sig_action.sa_handler = SIG_DFL;
  421. sig_action.sa_flags = 0;
  422. sigemptyset(&sig_action.sa_mask);
  423. for (i = 1; i < NSIG; i++) {
  424. /* Only possible errors are EFAULT or EINVAL The former
  425. * won't happen, the latter we expect, so no need to check
  426. * return value */
  427. (void)sigaction(i, &sig_action, NULL);
  428. }
  429. /* Unmask all signals in child, since we've no idea what the
  430. * caller's done with their signal mask and don't want to
  431. * propagate that to children */
  432. sigemptyset(&newmask);
  433. if (pthread_sigmask(SIG_SETMASK, &newmask, NULL) != 0) {
  434. Error *local_err = NULL;
  435. error_setg_errno(&local_err, errno,
  436. "cannot unblock signals");
  437. error_report_err(local_err);
  438. _exit(1);
  439. }
  440. }
  441. return pid;
  442. }