2
0

osdep.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632
  1. /*
  2. * QEMU low level functions
  3. *
  4. * Copyright (c) 2003 Fabrice Bellard
  5. *
  6. * Permission is hereby granted, free of charge, to any person obtaining a copy
  7. * of this software and associated documentation files (the "Software"), to deal
  8. * in the Software without restriction, including without limitation the rights
  9. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10. * copies of the Software, and to permit persons to whom the Software is
  11. * furnished to do so, subject to the following conditions:
  12. *
  13. * The above copyright notice and this permission notice shall be included in
  14. * all copies or substantial portions of the Software.
  15. *
  16. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22. * THE SOFTWARE.
  23. */
  24. #include "qemu/osdep.h"
  25. #include "qapi/error.h"
  26. #include "qemu/cutils.h"
  27. #include "qemu/sockets.h"
  28. #include "qemu/error-report.h"
  29. #include "qemu/madvise.h"
  30. #include "qemu/mprotect.h"
  31. #include "qemu/hw-version.h"
  32. #include "monitor/monitor.h"
  33. #ifdef CONFIG_DARWIN
  34. #include "tcg/tcg-apple-jit.h"
  35. #endif
  36. static const char *hw_version = QEMU_HW_VERSION;
  37. int socket_set_cork(int fd, int v)
  38. {
  39. #if defined(SOL_TCP) && defined(TCP_CORK)
  40. return setsockopt(fd, SOL_TCP, TCP_CORK, &v, sizeof(v));
  41. #else
  42. return 0;
  43. #endif
  44. }
  45. int socket_set_nodelay(int fd)
  46. {
  47. int v = 1;
  48. return setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &v, sizeof(v));
  49. }
  50. int qemu_madvise(void *addr, size_t len, int advice)
  51. {
  52. if (advice == QEMU_MADV_INVALID) {
  53. errno = EINVAL;
  54. return -1;
  55. }
  56. #if defined(CONFIG_MADVISE)
  57. return madvise(addr, len, advice);
  58. #elif defined(CONFIG_POSIX_MADVISE)
  59. return posix_madvise(addr, len, advice);
  60. #else
  61. errno = EINVAL;
  62. return -1;
  63. #endif
  64. }
  65. static int qemu_mprotect__osdep(void *addr, size_t size, int prot)
  66. {
  67. g_assert(!((uintptr_t)addr & ~qemu_real_host_page_mask()));
  68. g_assert(!(size & ~qemu_real_host_page_mask()));
  69. #ifdef _WIN32
  70. DWORD old_protect;
  71. if (!VirtualProtect(addr, size, prot, &old_protect)) {
  72. g_autofree gchar *emsg = g_win32_error_message(GetLastError());
  73. error_report("%s: VirtualProtect failed: %s", __func__, emsg);
  74. return -1;
  75. }
  76. return 0;
  77. #else
  78. if (mprotect(addr, size, prot)) {
  79. error_report("%s: mprotect failed: %s", __func__, strerror(errno));
  80. return -1;
  81. }
  82. return 0;
  83. #endif
  84. }
  85. int qemu_mprotect_rw(void *addr, size_t size)
  86. {
  87. #ifdef _WIN32
  88. return qemu_mprotect__osdep(addr, size, PAGE_READWRITE);
  89. #else
  90. return qemu_mprotect__osdep(addr, size, PROT_READ | PROT_WRITE);
  91. #endif
  92. }
  93. int qemu_mprotect_rwx(void *addr, size_t size)
  94. {
  95. #ifdef _WIN32
  96. return qemu_mprotect__osdep(addr, size, PAGE_EXECUTE_READWRITE);
  97. #else
  98. return qemu_mprotect__osdep(addr, size, PROT_READ | PROT_WRITE | PROT_EXEC);
  99. #endif
  100. }
  101. int qemu_mprotect_none(void *addr, size_t size)
  102. {
  103. #ifdef _WIN32
  104. return qemu_mprotect__osdep(addr, size, PAGE_NOACCESS);
  105. #else
  106. # if defined(__APPLE__) && defined(__arm64__)
  107. if (__builtin_available(macOS 11.2, *)) {
  108. /* mprotect() in macOS 11.2 can't switch RWX to NONE */
  109. return 0;
  110. }
  111. # endif
  112. return qemu_mprotect__osdep(addr, size, PROT_NONE);
  113. #endif
  114. }
  115. #ifndef _WIN32
  116. static int fcntl_op_setlk = -1;
  117. static int fcntl_op_getlk = -1;
  118. /*
  119. * Dups an fd and sets the flags
  120. */
  121. int qemu_dup_flags(int fd, int flags)
  122. {
  123. int ret;
  124. int serrno;
  125. int dup_flags;
  126. ret = qemu_dup(fd);
  127. if (ret == -1) {
  128. goto fail;
  129. }
  130. dup_flags = fcntl(ret, F_GETFL);
  131. if (dup_flags == -1) {
  132. goto fail;
  133. }
  134. if ((flags & O_SYNC) != (dup_flags & O_SYNC)) {
  135. errno = EINVAL;
  136. goto fail;
  137. }
  138. /* Set/unset flags that we can with fcntl */
  139. if (fcntl(ret, F_SETFL, flags) == -1) {
  140. goto fail;
  141. }
  142. /* Truncate the file in the cases that open() would truncate it */
  143. if (flags & O_TRUNC ||
  144. ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))) {
  145. if (ftruncate(ret, 0) == -1) {
  146. goto fail;
  147. }
  148. }
  149. return ret;
  150. fail:
  151. serrno = errno;
  152. if (ret != -1) {
  153. close(ret);
  154. }
  155. errno = serrno;
  156. return -1;
  157. }
  158. int qemu_dup(int fd)
  159. {
  160. int ret;
  161. #ifdef F_DUPFD_CLOEXEC
  162. ret = fcntl(fd, F_DUPFD_CLOEXEC, 0);
  163. #else
  164. ret = dup(fd);
  165. if (ret != -1) {
  166. qemu_set_cloexec(ret);
  167. }
  168. #endif
  169. return ret;
  170. }
  171. static int qemu_parse_fdset(const char *param)
  172. {
  173. return qemu_parse_fd(param);
  174. }
  175. static void qemu_probe_lock_ops(void)
  176. {
  177. if (fcntl_op_setlk == -1) {
  178. #ifdef F_OFD_SETLK
  179. int fd;
  180. int ret;
  181. struct flock fl = {
  182. .l_whence = SEEK_SET,
  183. .l_start = 0,
  184. .l_len = 0,
  185. .l_type = F_WRLCK,
  186. };
  187. fd = open("/dev/null", O_RDWR);
  188. if (fd < 0) {
  189. fprintf(stderr,
  190. "Failed to open /dev/null for OFD lock probing: %s\n",
  191. strerror(errno));
  192. fcntl_op_setlk = F_SETLK;
  193. fcntl_op_getlk = F_GETLK;
  194. return;
  195. }
  196. ret = fcntl(fd, F_OFD_GETLK, &fl);
  197. close(fd);
  198. if (!ret) {
  199. fcntl_op_setlk = F_OFD_SETLK;
  200. fcntl_op_getlk = F_OFD_GETLK;
  201. } else {
  202. fcntl_op_setlk = F_SETLK;
  203. fcntl_op_getlk = F_GETLK;
  204. }
  205. #else
  206. fcntl_op_setlk = F_SETLK;
  207. fcntl_op_getlk = F_GETLK;
  208. #endif
  209. }
  210. }
  211. bool qemu_has_ofd_lock(void)
  212. {
  213. qemu_probe_lock_ops();
  214. #ifdef F_OFD_SETLK
  215. return fcntl_op_setlk == F_OFD_SETLK;
  216. #else
  217. return false;
  218. #endif
  219. }
  220. static int qemu_lock_fcntl(int fd, int64_t start, int64_t len, int fl_type)
  221. {
  222. int ret;
  223. struct flock fl = {
  224. .l_whence = SEEK_SET,
  225. .l_start = start,
  226. .l_len = len,
  227. .l_type = fl_type,
  228. };
  229. qemu_probe_lock_ops();
  230. ret = RETRY_ON_EINTR(fcntl(fd, fcntl_op_setlk, &fl));
  231. return ret == -1 ? -errno : 0;
  232. }
  233. int qemu_lock_fd(int fd, int64_t start, int64_t len, bool exclusive)
  234. {
  235. return qemu_lock_fcntl(fd, start, len, exclusive ? F_WRLCK : F_RDLCK);
  236. }
  237. int qemu_unlock_fd(int fd, int64_t start, int64_t len)
  238. {
  239. return qemu_lock_fcntl(fd, start, len, F_UNLCK);
  240. }
  241. int qemu_lock_fd_test(int fd, int64_t start, int64_t len, bool exclusive)
  242. {
  243. int ret;
  244. struct flock fl = {
  245. .l_whence = SEEK_SET,
  246. .l_start = start,
  247. .l_len = len,
  248. .l_type = exclusive ? F_WRLCK : F_RDLCK,
  249. };
  250. qemu_probe_lock_ops();
  251. ret = fcntl(fd, fcntl_op_getlk, &fl);
  252. if (ret == -1) {
  253. return -errno;
  254. } else {
  255. return fl.l_type == F_UNLCK ? 0 : -EAGAIN;
  256. }
  257. }
  258. #endif
  259. static int qemu_open_cloexec(const char *name, int flags, mode_t mode)
  260. {
  261. int ret;
  262. #ifdef O_CLOEXEC
  263. ret = open(name, flags | O_CLOEXEC, mode);
  264. #else
  265. ret = open(name, flags, mode);
  266. if (ret >= 0) {
  267. qemu_set_cloexec(ret);
  268. }
  269. #endif
  270. return ret;
  271. }
  272. /*
  273. * Opens a file with FD_CLOEXEC set
  274. */
  275. static int
  276. qemu_open_internal(const char *name, int flags, mode_t mode, Error **errp)
  277. {
  278. int ret;
  279. #ifndef _WIN32
  280. const char *fdset_id_str;
  281. /* Attempt dup of fd from fd set */
  282. if (strstart(name, "/dev/fdset/", &fdset_id_str)) {
  283. int64_t fdset_id;
  284. int dupfd;
  285. fdset_id = qemu_parse_fdset(fdset_id_str);
  286. if (fdset_id == -1) {
  287. error_setg(errp, "Could not parse fdset %s", name);
  288. errno = EINVAL;
  289. return -1;
  290. }
  291. dupfd = monitor_fdset_dup_fd_add(fdset_id, flags);
  292. if (dupfd == -1) {
  293. error_setg_errno(errp, errno, "Could not dup FD for %s flags %x",
  294. name, flags);
  295. return -1;
  296. }
  297. return dupfd;
  298. }
  299. #endif
  300. ret = qemu_open_cloexec(name, flags, mode);
  301. if (ret == -1) {
  302. const char *action = flags & O_CREAT ? "create" : "open";
  303. #ifdef O_DIRECT
  304. /* Give more helpful error message for O_DIRECT */
  305. if (errno == EINVAL && (flags & O_DIRECT)) {
  306. ret = open(name, flags & ~O_DIRECT, mode);
  307. if (ret != -1) {
  308. close(ret);
  309. error_setg(errp, "Could not %s '%s': "
  310. "filesystem does not support O_DIRECT",
  311. action, name);
  312. errno = EINVAL; /* restore first open()'s errno */
  313. return -1;
  314. }
  315. }
  316. #endif /* O_DIRECT */
  317. error_setg_errno(errp, errno, "Could not %s '%s'",
  318. action, name);
  319. }
  320. return ret;
  321. }
  322. int qemu_open(const char *name, int flags, Error **errp)
  323. {
  324. assert(!(flags & O_CREAT));
  325. return qemu_open_internal(name, flags, 0, errp);
  326. }
  327. int qemu_create(const char *name, int flags, mode_t mode, Error **errp)
  328. {
  329. assert(!(flags & O_CREAT));
  330. return qemu_open_internal(name, flags | O_CREAT, mode, errp);
  331. }
  332. int qemu_open_old(const char *name, int flags, ...)
  333. {
  334. va_list ap;
  335. mode_t mode = 0;
  336. int ret;
  337. va_start(ap, flags);
  338. if (flags & O_CREAT) {
  339. mode = va_arg(ap, int);
  340. }
  341. va_end(ap);
  342. ret = qemu_open_internal(name, flags, mode, NULL);
  343. #ifdef O_DIRECT
  344. if (ret == -1 && errno == EINVAL && (flags & O_DIRECT)) {
  345. error_report("file system may not support O_DIRECT");
  346. errno = EINVAL; /* in case it was clobbered */
  347. }
  348. #endif /* O_DIRECT */
  349. return ret;
  350. }
  351. int qemu_close(int fd)
  352. {
  353. int64_t fdset_id;
  354. /* Close fd that was dup'd from an fdset */
  355. fdset_id = monitor_fdset_dup_fd_find(fd);
  356. if (fdset_id != -1) {
  357. int ret;
  358. ret = close(fd);
  359. if (ret == 0) {
  360. monitor_fdset_dup_fd_remove(fd);
  361. }
  362. return ret;
  363. }
  364. return close(fd);
  365. }
  366. /*
  367. * Delete a file from the filesystem, unless the filename is /dev/fdset/...
  368. *
  369. * Returns: On success, zero is returned. On error, -1 is returned,
  370. * and errno is set appropriately.
  371. */
  372. int qemu_unlink(const char *name)
  373. {
  374. if (g_str_has_prefix(name, "/dev/fdset/")) {
  375. return 0;
  376. }
  377. return unlink(name);
  378. }
  379. /*
  380. * A variant of write(2) which handles partial write.
  381. *
  382. * Return the number of bytes transferred.
  383. * Set errno if fewer than `count' bytes are written.
  384. *
  385. * This function don't work with non-blocking fd's.
  386. * Any of the possibilities with non-blocking fd's is bad:
  387. * - return a short write (then name is wrong)
  388. * - busy wait adding (errno == EAGAIN) to the loop
  389. */
  390. ssize_t qemu_write_full(int fd, const void *buf, size_t count)
  391. {
  392. ssize_t ret = 0;
  393. ssize_t total = 0;
  394. while (count) {
  395. ret = write(fd, buf, count);
  396. if (ret < 0) {
  397. if (errno == EINTR)
  398. continue;
  399. break;
  400. }
  401. count -= ret;
  402. buf += ret;
  403. total += ret;
  404. }
  405. return total;
  406. }
  407. /*
  408. * Opens a socket with FD_CLOEXEC set
  409. */
  410. int qemu_socket(int domain, int type, int protocol)
  411. {
  412. int ret;
  413. #ifdef SOCK_CLOEXEC
  414. ret = socket(domain, type | SOCK_CLOEXEC, protocol);
  415. if (ret != -1 || errno != EINVAL) {
  416. return ret;
  417. }
  418. #endif
  419. ret = socket(domain, type, protocol);
  420. if (ret >= 0) {
  421. qemu_set_cloexec(ret);
  422. }
  423. return ret;
  424. }
  425. /*
  426. * Accept a connection and set FD_CLOEXEC
  427. */
  428. int qemu_accept(int s, struct sockaddr *addr, socklen_t *addrlen)
  429. {
  430. int ret;
  431. #ifdef CONFIG_ACCEPT4
  432. ret = accept4(s, addr, addrlen, SOCK_CLOEXEC);
  433. if (ret != -1 || errno != ENOSYS) {
  434. return ret;
  435. }
  436. #endif
  437. ret = accept(s, addr, addrlen);
  438. if (ret >= 0) {
  439. qemu_set_cloexec(ret);
  440. }
  441. return ret;
  442. }
  443. ssize_t qemu_send_full(int s, const void *buf, size_t count)
  444. {
  445. ssize_t ret = 0;
  446. ssize_t total = 0;
  447. while (count) {
  448. ret = send(s, buf, count, 0);
  449. if (ret < 0) {
  450. if (errno == EINTR) {
  451. continue;
  452. }
  453. break;
  454. }
  455. count -= ret;
  456. buf += ret;
  457. total += ret;
  458. }
  459. return total;
  460. }
  461. void qemu_set_hw_version(const char *version)
  462. {
  463. hw_version = version;
  464. }
  465. const char *qemu_hw_version(void)
  466. {
  467. return hw_version;
  468. }
  469. #ifdef _WIN32
  470. static void socket_cleanup(void)
  471. {
  472. WSACleanup();
  473. }
  474. #endif
  475. int socket_init(void)
  476. {
  477. #ifdef _WIN32
  478. WSADATA Data;
  479. int ret, err;
  480. ret = WSAStartup(MAKEWORD(2, 2), &Data);
  481. if (ret != 0) {
  482. err = WSAGetLastError();
  483. fprintf(stderr, "WSAStartup: %d\n", err);
  484. return -1;
  485. }
  486. atexit(socket_cleanup);
  487. #endif
  488. return 0;
  489. }
  490. #ifndef CONFIG_IOVEC
  491. static ssize_t
  492. readv_writev(int fd, const struct iovec *iov, int iov_cnt, bool do_write)
  493. {
  494. unsigned i = 0;
  495. ssize_t ret = 0;
  496. ssize_t off = 0;
  497. while (i < iov_cnt) {
  498. ssize_t r = do_write
  499. ? write(fd, iov[i].iov_base + off, iov[i].iov_len - off)
  500. : read(fd, iov[i].iov_base + off, iov[i].iov_len - off);
  501. if (r > 0) {
  502. ret += r;
  503. off += r;
  504. if (off < iov[i].iov_len) {
  505. continue;
  506. }
  507. } else if (!r) {
  508. break;
  509. } else if (errno == EINTR) {
  510. continue;
  511. } else {
  512. /* else it is some "other" error,
  513. * only return if there was no data processed. */
  514. if (ret == 0) {
  515. ret = -1;
  516. }
  517. break;
  518. }
  519. off = 0;
  520. i++;
  521. }
  522. return ret;
  523. }
  524. ssize_t
  525. readv(int fd, const struct iovec *iov, int iov_cnt)
  526. {
  527. return readv_writev(fd, iov, iov_cnt, false);
  528. }
  529. ssize_t
  530. writev(int fd, const struct iovec *iov, int iov_cnt)
  531. {
  532. return readv_writev(fd, iov, iov_cnt, true);
  533. }
  534. #endif
  535. /*
  536. * Make sure data goes on disk, but if possible do not bother to
  537. * write out the inode just for timestamp updates.
  538. *
  539. * Unfortunately even in 2009 many operating systems do not support
  540. * fdatasync and have to fall back to fsync.
  541. */
  542. int qemu_fdatasync(int fd)
  543. {
  544. #ifdef CONFIG_FDATASYNC
  545. return fdatasync(fd);
  546. #else
  547. return fsync(fd);
  548. #endif
  549. }