osdep.c 14 KB


  1. /*
  2. * QEMU low level functions
  3. *
  4. * Copyright (c) 2003 Fabrice Bellard
  5. *
  6. * Permission is hereby granted, free of charge, to any person obtaining a copy
  7. * of this software and associated documentation files (the "Software"), to deal
  8. * in the Software without restriction, including without limitation the rights
  9. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10. * copies of the Software, and to permit persons to whom the Software is
  11. * furnished to do so, subject to the following conditions:
  12. *
  13. * The above copyright notice and this permission notice shall be included in
  14. * all copies or substantial portions of the Software.
  15. *
  16. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22. * THE SOFTWARE.
  23. */
  24. #include "qemu/osdep.h"
  25. #include "qapi/error.h"
  26. #include "qemu/cutils.h"
  27. #include "qemu/sockets.h"
  28. #include "qemu/error-report.h"
  29. #include "qemu/madvise.h"
  30. #include "qemu/mprotect.h"
  31. #include "qemu/hw-version.h"
  32. #include "monitor/monitor.h"
  33. static const char *hw_version = QEMU_HW_VERSION;
  34. int socket_set_cork(int fd, int v)
  35. {
  36. #if defined(SOL_TCP) && defined(TCP_CORK)
  37. return setsockopt(fd, SOL_TCP, TCP_CORK, &v, sizeof(v));
  38. #else
  39. return 0;
  40. #endif
  41. }
  42. int socket_set_nodelay(int fd)
  43. {
  44. int v = 1;
  45. return setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &v, sizeof(v));
  46. }
  47. int qemu_madvise(void *addr, size_t len, int advice)
  48. {
  49. if (advice == QEMU_MADV_INVALID) {
  50. errno = EINVAL;
  51. return -1;
  52. }
  53. #if defined(CONFIG_MADVISE)
  54. return madvise(addr, len, advice);
  55. #elif defined(CONFIG_POSIX_MADVISE)
  56. int rc = posix_madvise(addr, len, advice);
  57. if (rc) {
  58. errno = rc;
  59. return -1;
  60. }
  61. return 0;
  62. #else
  63. errno = ENOSYS;
  64. return -1;
  65. #endif
  66. }
  67. static int qemu_mprotect__osdep(void *addr, size_t size, int prot)
  68. {
  69. g_assert(!((uintptr_t)addr & ~qemu_real_host_page_mask()));
  70. g_assert(!(size & ~qemu_real_host_page_mask()));
  71. #ifdef _WIN32
  72. DWORD old_protect;
  73. if (!VirtualProtect(addr, size, prot, &old_protect)) {
  74. g_autofree gchar *emsg = g_win32_error_message(GetLastError());
  75. error_report("%s: VirtualProtect failed: %s", __func__, emsg);
  76. return -1;
  77. }
  78. return 0;
  79. #else
  80. if (mprotect(addr, size, prot)) {
  81. error_report("%s: mprotect failed: %s", __func__, strerror(errno));
  82. return -1;
  83. }
  84. return 0;
  85. #endif
  86. }
  87. int qemu_mprotect_rw(void *addr, size_t size)
  88. {
  89. #ifdef _WIN32
  90. return qemu_mprotect__osdep(addr, size, PAGE_READWRITE);
  91. #else
  92. return qemu_mprotect__osdep(addr, size, PROT_READ | PROT_WRITE);
  93. #endif
  94. }
  95. int qemu_mprotect_rwx(void *addr, size_t size)
  96. {
  97. #ifdef _WIN32
  98. return qemu_mprotect__osdep(addr, size, PAGE_EXECUTE_READWRITE);
  99. #else
  100. return qemu_mprotect__osdep(addr, size, PROT_READ | PROT_WRITE | PROT_EXEC);
  101. #endif
  102. }
  103. int qemu_mprotect_none(void *addr, size_t size)
  104. {
  105. #ifdef _WIN32
  106. return qemu_mprotect__osdep(addr, size, PAGE_NOACCESS);
  107. #else
  108. return qemu_mprotect__osdep(addr, size, PROT_NONE);
  109. #endif
  110. }
  111. #ifndef _WIN32
  112. static int fcntl_op_setlk = -1;
  113. static int fcntl_op_getlk = -1;
  114. /*
  115. * Dups an fd and sets the flags
  116. */
  117. int qemu_dup_flags(int fd, int flags)
  118. {
  119. int ret;
  120. int serrno;
  121. int dup_flags;
  122. ret = qemu_dup(fd);
  123. if (ret == -1) {
  124. goto fail;
  125. }
  126. dup_flags = fcntl(ret, F_GETFL);
  127. if (dup_flags == -1) {
  128. goto fail;
  129. }
  130. if ((flags & O_SYNC) != (dup_flags & O_SYNC)) {
  131. errno = EINVAL;
  132. goto fail;
  133. }
  134. /* Set/unset flags that we can with fcntl */
  135. if (fcntl(ret, F_SETFL, flags) == -1) {
  136. goto fail;
  137. }
  138. /* Truncate the file in the cases that open() would truncate it */
  139. if (flags & O_TRUNC ||
  140. ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))) {
  141. if (ftruncate(ret, 0) == -1) {
  142. goto fail;
  143. }
  144. }
  145. return ret;
  146. fail:
  147. serrno = errno;
  148. if (ret != -1) {
  149. close(ret);
  150. }
  151. errno = serrno;
  152. return -1;
  153. }
  154. int qemu_dup(int fd)
  155. {
  156. int ret;
  157. #ifdef F_DUPFD_CLOEXEC
  158. ret = fcntl(fd, F_DUPFD_CLOEXEC, 0);
  159. #else
  160. ret = dup(fd);
  161. if (ret != -1) {
  162. qemu_set_cloexec(ret);
  163. }
  164. #endif
  165. return ret;
  166. }
  167. static int qemu_parse_fdset(const char *param)
  168. {
  169. return qemu_parse_fd(param);
  170. }
  171. static void qemu_probe_lock_ops(void)
  172. {
  173. if (fcntl_op_setlk == -1) {
  174. #ifdef F_OFD_SETLK
  175. int fd;
  176. int ret;
  177. struct flock fl = {
  178. .l_whence = SEEK_SET,
  179. .l_start = 0,
  180. .l_len = 0,
  181. .l_type = F_WRLCK,
  182. };
  183. fd = open("/dev/null", O_RDWR);
  184. if (fd < 0) {
  185. fprintf(stderr,
  186. "Failed to open /dev/null for OFD lock probing: %s\n",
  187. strerror(errno));
  188. fcntl_op_setlk = F_SETLK;
  189. fcntl_op_getlk = F_GETLK;
  190. return;
  191. }
  192. ret = fcntl(fd, F_OFD_GETLK, &fl);
  193. close(fd);
  194. if (!ret) {
  195. fcntl_op_setlk = F_OFD_SETLK;
  196. fcntl_op_getlk = F_OFD_GETLK;
  197. } else {
  198. fcntl_op_setlk = F_SETLK;
  199. fcntl_op_getlk = F_GETLK;
  200. }
  201. #else
  202. fcntl_op_setlk = F_SETLK;
  203. fcntl_op_getlk = F_GETLK;
  204. #endif
  205. }
  206. }
  207. bool qemu_has_ofd_lock(void)
  208. {
  209. qemu_probe_lock_ops();
  210. #ifdef F_OFD_SETLK
  211. return fcntl_op_setlk == F_OFD_SETLK;
  212. #else
  213. return false;
  214. #endif
  215. }
  216. static int qemu_lock_fcntl(int fd, int64_t start, int64_t len, int fl_type)
  217. {
  218. int ret;
  219. struct flock fl = {
  220. .l_whence = SEEK_SET,
  221. .l_start = start,
  222. .l_len = len,
  223. .l_type = fl_type,
  224. };
  225. qemu_probe_lock_ops();
  226. ret = RETRY_ON_EINTR(fcntl(fd, fcntl_op_setlk, &fl));
  227. return ret == -1 ? -errno : 0;
  228. }
  229. int qemu_lock_fd(int fd, int64_t start, int64_t len, bool exclusive)
  230. {
  231. return qemu_lock_fcntl(fd, start, len, exclusive ? F_WRLCK : F_RDLCK);
  232. }
  233. int qemu_unlock_fd(int fd, int64_t start, int64_t len)
  234. {
  235. return qemu_lock_fcntl(fd, start, len, F_UNLCK);
  236. }
  237. int qemu_lock_fd_test(int fd, int64_t start, int64_t len, bool exclusive)
  238. {
  239. int ret;
  240. struct flock fl = {
  241. .l_whence = SEEK_SET,
  242. .l_start = start,
  243. .l_len = len,
  244. .l_type = exclusive ? F_WRLCK : F_RDLCK,
  245. };
  246. qemu_probe_lock_ops();
  247. ret = fcntl(fd, fcntl_op_getlk, &fl);
  248. if (ret == -1) {
  249. return -errno;
  250. } else {
  251. return fl.l_type == F_UNLCK ? 0 : -EAGAIN;
  252. }
  253. }
  254. #endif
  255. bool qemu_has_direct_io(void)
  256. {
  257. #ifdef O_DIRECT
  258. return true;
  259. #else
  260. return false;
  261. #endif
  262. }
  263. static int qemu_open_cloexec(const char *name, int flags, mode_t mode)
  264. {
  265. int ret;
  266. #ifdef O_CLOEXEC
  267. ret = open(name, flags | O_CLOEXEC, mode);
  268. #else
  269. ret = open(name, flags, mode);
  270. if (ret >= 0) {
  271. qemu_set_cloexec(ret);
  272. }
  273. #endif
  274. return ret;
  275. }
  276. /*
  277. * Opens a file with FD_CLOEXEC set
  278. */
  279. static int
  280. qemu_open_internal(const char *name, int flags, mode_t mode, Error **errp)
  281. {
  282. int ret;
  283. #ifndef _WIN32
  284. const char *fdset_id_str;
  285. /* Attempt dup of fd from fd set */
  286. if (strstart(name, "/dev/fdset/", &fdset_id_str)) {
  287. int64_t fdset_id;
  288. fdset_id = qemu_parse_fdset(fdset_id_str);
  289. if (fdset_id == -1) {
  290. error_setg(errp, "Could not parse fdset %s", name);
  291. errno = EINVAL;
  292. return -1;
  293. }
  294. return monitor_fdset_dup_fd_add(fdset_id, flags, errp);
  295. }
  296. #endif
  297. ret = qemu_open_cloexec(name, flags, mode);
  298. if (ret == -1) {
  299. const char *action = flags & O_CREAT ? "create" : "open";
  300. #ifdef O_DIRECT
  301. /* Give more helpful error message for O_DIRECT */
  302. if (errno == EINVAL && (flags & O_DIRECT)) {
  303. ret = open(name, flags & ~O_DIRECT, mode);
  304. if (ret != -1) {
  305. close(ret);
  306. error_setg(errp, "Could not %s '%s': "
  307. "filesystem does not support O_DIRECT",
  308. action, name);
  309. errno = EINVAL; /* restore first open()'s errno */
  310. return -1;
  311. }
  312. }
  313. #endif /* O_DIRECT */
  314. error_setg_errno(errp, errno, "Could not %s '%s'",
  315. action, name);
  316. }
  317. return ret;
  318. }
  319. int qemu_open(const char *name, int flags, Error **errp)
  320. {
  321. assert(!(flags & O_CREAT));
  322. return qemu_open_internal(name, flags, 0, errp);
  323. }
  324. int qemu_create(const char *name, int flags, mode_t mode, Error **errp)
  325. {
  326. assert(!(flags & O_CREAT));
  327. return qemu_open_internal(name, flags | O_CREAT, mode, errp);
  328. }
  329. int qemu_open_old(const char *name, int flags, ...)
  330. {
  331. va_list ap;
  332. mode_t mode = 0;
  333. int ret;
  334. va_start(ap, flags);
  335. if (flags & O_CREAT) {
  336. mode = va_arg(ap, int);
  337. }
  338. va_end(ap);
  339. ret = qemu_open_internal(name, flags, mode, NULL);
  340. #ifdef O_DIRECT
  341. if (ret == -1 && errno == EINVAL && (flags & O_DIRECT)) {
  342. error_report("file system may not support O_DIRECT");
  343. errno = EINVAL; /* in case it was clobbered */
  344. }
  345. #endif /* O_DIRECT */
  346. return ret;
  347. }
  348. int qemu_close(int fd)
  349. {
  350. /* Close fd that was dup'd from an fdset */
  351. monitor_fdset_dup_fd_remove(fd);
  352. return close(fd);
  353. }
  354. /*
  355. * Delete a file from the filesystem, unless the filename is /dev/fdset/...
  356. *
  357. * Returns: On success, zero is returned. On error, -1 is returned,
  358. * and errno is set appropriately.
  359. */
  360. int qemu_unlink(const char *name)
  361. {
  362. if (g_str_has_prefix(name, "/dev/fdset/")) {
  363. return 0;
  364. }
  365. return unlink(name);
  366. }
  367. /*
  368. * A variant of write(2) which handles partial write.
  369. *
  370. * Return the number of bytes transferred.
  371. * Set errno if fewer than `count' bytes are written.
  372. *
  373. * This function don't work with non-blocking fd's.
  374. * Any of the possibilities with non-blocking fd's is bad:
  375. * - return a short write (then name is wrong)
  376. * - busy wait adding (errno == EAGAIN) to the loop
  377. */
  378. ssize_t qemu_write_full(int fd, const void *buf, size_t count)
  379. {
  380. ssize_t ret = 0;
  381. ssize_t total = 0;
  382. while (count) {
  383. ret = write(fd, buf, count);
  384. if (ret < 0) {
  385. if (errno == EINTR)
  386. continue;
  387. break;
  388. }
  389. count -= ret;
  390. buf += ret;
  391. total += ret;
  392. }
  393. return total;
  394. }
  395. /*
  396. * Opens a socket with FD_CLOEXEC set
  397. */
  398. int qemu_socket(int domain, int type, int protocol)
  399. {
  400. int ret;
  401. #ifdef SOCK_CLOEXEC
  402. ret = socket(domain, type | SOCK_CLOEXEC, protocol);
  403. if (ret != -1 || errno != EINVAL) {
  404. return ret;
  405. }
  406. #endif
  407. ret = socket(domain, type, protocol);
  408. if (ret >= 0) {
  409. qemu_set_cloexec(ret);
  410. }
  411. return ret;
  412. }
  413. /*
  414. * Accept a connection and set FD_CLOEXEC
  415. */
  416. int qemu_accept(int s, struct sockaddr *addr, socklen_t *addrlen)
  417. {
  418. int ret;
  419. #ifdef CONFIG_ACCEPT4
  420. ret = accept4(s, addr, addrlen, SOCK_CLOEXEC);
  421. if (ret != -1 || errno != ENOSYS) {
  422. return ret;
  423. }
  424. #endif
  425. ret = accept(s, addr, addrlen);
  426. if (ret >= 0) {
  427. qemu_set_cloexec(ret);
  428. }
  429. return ret;
  430. }
  431. ssize_t qemu_send_full(int s, const void *buf, size_t count)
  432. {
  433. ssize_t ret = 0;
  434. ssize_t total = 0;
  435. while (count) {
  436. ret = send(s, buf, count, 0);
  437. if (ret < 0) {
  438. if (errno == EINTR) {
  439. continue;
  440. }
  441. break;
  442. }
  443. count -= ret;
  444. buf += ret;
  445. total += ret;
  446. }
  447. return total;
  448. }
  449. void qemu_set_hw_version(const char *version)
  450. {
  451. hw_version = version;
  452. }
  453. const char *qemu_hw_version(void)
  454. {
  455. return hw_version;
  456. }
  457. #ifdef _WIN32
  458. static void socket_cleanup(void)
  459. {
  460. WSACleanup();
  461. }
  462. #endif
  463. int socket_init(void)
  464. {
  465. #ifdef _WIN32
  466. WSADATA Data;
  467. int ret, err;
  468. ret = WSAStartup(MAKEWORD(2, 2), &Data);
  469. if (ret != 0) {
  470. err = WSAGetLastError();
  471. fprintf(stderr, "WSAStartup: %d\n", err);
  472. return -1;
  473. }
  474. atexit(socket_cleanup);
  475. #endif
  476. return 0;
  477. }
  478. #ifndef CONFIG_IOVEC
  479. static ssize_t
  480. readv_writev(int fd, const struct iovec *iov, int iov_cnt, bool do_write)
  481. {
  482. unsigned i = 0;
  483. ssize_t ret = 0;
  484. ssize_t off = 0;
  485. while (i < iov_cnt) {
  486. ssize_t r = do_write
  487. ? write(fd, iov[i].iov_base + off, iov[i].iov_len - off)
  488. : read(fd, iov[i].iov_base + off, iov[i].iov_len - off);
  489. if (r > 0) {
  490. ret += r;
  491. off += r;
  492. if (off < iov[i].iov_len) {
  493. continue;
  494. }
  495. } else if (!r) {
  496. break;
  497. } else if (errno == EINTR) {
  498. continue;
  499. } else {
  500. /* else it is some "other" error,
  501. * only return if there was no data processed. */
  502. if (ret == 0) {
  503. ret = -1;
  504. }
  505. break;
  506. }
  507. off = 0;
  508. i++;
  509. }
  510. return ret;
  511. }
  512. ssize_t
  513. readv(int fd, const struct iovec *iov, int iov_cnt)
  514. {
  515. return readv_writev(fd, iov, iov_cnt, false);
  516. }
  517. ssize_t
  518. writev(int fd, const struct iovec *iov, int iov_cnt)
  519. {
  520. return readv_writev(fd, iov, iov_cnt, true);
  521. }
  522. #endif
  523. /*
  524. * Make sure data goes on disk, but if possible do not bother to
  525. * write out the inode just for timestamp updates.
  526. *
  527. * Unfortunately even in 2009 many operating systems do not support
  528. * fdatasync and have to fall back to fsync.
  529. */
  530. int qemu_fdatasync(int fd)
  531. {
  532. #ifdef CONFIG_FDATASYNC
  533. return fdatasync(fd);
  534. #else
  535. return fsync(fd);
  536. #endif
  537. }