123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615 |
- /*
- * QEMU low level functions
- *
- * Copyright (c) 2003 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
- #include "qemu/osdep.h"
- #include "qapi/error.h"
- #include "qemu/cutils.h"
- #include "qemu/sockets.h"
- #include "qemu/error-report.h"
- #include "qemu/madvise.h"
- #include "qemu/mprotect.h"
- #include "qemu/hw-version.h"
- #include "monitor/monitor.h"
- static const char *hw_version = QEMU_HW_VERSION;
- int socket_set_cork(int fd, int v)
- {
- #if defined(SOL_TCP) && defined(TCP_CORK)
- return setsockopt(fd, SOL_TCP, TCP_CORK, &v, sizeof(v));
- #else
- return 0;
- #endif
- }
- int socket_set_nodelay(int fd)
- {
- int v = 1;
- return setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &v, sizeof(v));
- }
- int qemu_madvise(void *addr, size_t len, int advice)
- {
- if (advice == QEMU_MADV_INVALID) {
- errno = EINVAL;
- return -1;
- }
- #if defined(CONFIG_MADVISE)
- return madvise(addr, len, advice);
- #elif defined(CONFIG_POSIX_MADVISE)
- int rc = posix_madvise(addr, len, advice);
- if (rc) {
- errno = rc;
- return -1;
- }
- return 0;
- #else
- errno = ENOSYS;
- return -1;
- #endif
- }
- static int qemu_mprotect__osdep(void *addr, size_t size, int prot)
- {
- g_assert(!((uintptr_t)addr & ~qemu_real_host_page_mask()));
- g_assert(!(size & ~qemu_real_host_page_mask()));
- #ifdef _WIN32
- DWORD old_protect;
- if (!VirtualProtect(addr, size, prot, &old_protect)) {
- g_autofree gchar *emsg = g_win32_error_message(GetLastError());
- error_report("%s: VirtualProtect failed: %s", __func__, emsg);
- return -1;
- }
- return 0;
- #else
- if (mprotect(addr, size, prot)) {
- error_report("%s: mprotect failed: %s", __func__, strerror(errno));
- return -1;
- }
- return 0;
- #endif
- }
- int qemu_mprotect_rw(void *addr, size_t size)
- {
- #ifdef _WIN32
- return qemu_mprotect__osdep(addr, size, PAGE_READWRITE);
- #else
- return qemu_mprotect__osdep(addr, size, PROT_READ | PROT_WRITE);
- #endif
- }
- int qemu_mprotect_rwx(void *addr, size_t size)
- {
- #ifdef _WIN32
- return qemu_mprotect__osdep(addr, size, PAGE_EXECUTE_READWRITE);
- #else
- return qemu_mprotect__osdep(addr, size, PROT_READ | PROT_WRITE | PROT_EXEC);
- #endif
- }
- int qemu_mprotect_none(void *addr, size_t size)
- {
- #ifdef _WIN32
- return qemu_mprotect__osdep(addr, size, PAGE_NOACCESS);
- #else
- return qemu_mprotect__osdep(addr, size, PROT_NONE);
- #endif
- }
- #ifndef _WIN32
- static int fcntl_op_setlk = -1;
- static int fcntl_op_getlk = -1;
- /*
- * Dups an fd and sets the flags
- */
- int qemu_dup_flags(int fd, int flags)
- {
- int ret;
- int serrno;
- int dup_flags;
- ret = qemu_dup(fd);
- if (ret == -1) {
- goto fail;
- }
- dup_flags = fcntl(ret, F_GETFL);
- if (dup_flags == -1) {
- goto fail;
- }
- if ((flags & O_SYNC) != (dup_flags & O_SYNC)) {
- errno = EINVAL;
- goto fail;
- }
- /* Set/unset flags that we can with fcntl */
- if (fcntl(ret, F_SETFL, flags) == -1) {
- goto fail;
- }
- /* Truncate the file in the cases that open() would truncate it */
- if (flags & O_TRUNC ||
- ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))) {
- if (ftruncate(ret, 0) == -1) {
- goto fail;
- }
- }
- return ret;
- fail:
- serrno = errno;
- if (ret != -1) {
- close(ret);
- }
- errno = serrno;
- return -1;
- }
- int qemu_dup(int fd)
- {
- int ret;
- #ifdef F_DUPFD_CLOEXEC
- ret = fcntl(fd, F_DUPFD_CLOEXEC, 0);
- #else
- ret = dup(fd);
- if (ret != -1) {
- qemu_set_cloexec(ret);
- }
- #endif
- return ret;
- }
- static int qemu_parse_fdset(const char *param)
- {
- return qemu_parse_fd(param);
- }
- static void qemu_probe_lock_ops(void)
- {
- if (fcntl_op_setlk == -1) {
- #ifdef F_OFD_SETLK
- int fd;
- int ret;
- struct flock fl = {
- .l_whence = SEEK_SET,
- .l_start = 0,
- .l_len = 0,
- .l_type = F_WRLCK,
- };
- fd = open("/dev/null", O_RDWR);
- if (fd < 0) {
- fprintf(stderr,
- "Failed to open /dev/null for OFD lock probing: %s\n",
- strerror(errno));
- fcntl_op_setlk = F_SETLK;
- fcntl_op_getlk = F_GETLK;
- return;
- }
- ret = fcntl(fd, F_OFD_GETLK, &fl);
- close(fd);
- if (!ret) {
- fcntl_op_setlk = F_OFD_SETLK;
- fcntl_op_getlk = F_OFD_GETLK;
- } else {
- fcntl_op_setlk = F_SETLK;
- fcntl_op_getlk = F_GETLK;
- }
- #else
- fcntl_op_setlk = F_SETLK;
- fcntl_op_getlk = F_GETLK;
- #endif
- }
- }
- bool qemu_has_ofd_lock(void)
- {
- qemu_probe_lock_ops();
- #ifdef F_OFD_SETLK
- return fcntl_op_setlk == F_OFD_SETLK;
- #else
- return false;
- #endif
- }
- static int qemu_lock_fcntl(int fd, int64_t start, int64_t len, int fl_type)
- {
- int ret;
- struct flock fl = {
- .l_whence = SEEK_SET,
- .l_start = start,
- .l_len = len,
- .l_type = fl_type,
- };
- qemu_probe_lock_ops();
- ret = RETRY_ON_EINTR(fcntl(fd, fcntl_op_setlk, &fl));
- return ret == -1 ? -errno : 0;
- }
- int qemu_lock_fd(int fd, int64_t start, int64_t len, bool exclusive)
- {
- return qemu_lock_fcntl(fd, start, len, exclusive ? F_WRLCK : F_RDLCK);
- }
- int qemu_unlock_fd(int fd, int64_t start, int64_t len)
- {
- return qemu_lock_fcntl(fd, start, len, F_UNLCK);
- }
- int qemu_lock_fd_test(int fd, int64_t start, int64_t len, bool exclusive)
- {
- int ret;
- struct flock fl = {
- .l_whence = SEEK_SET,
- .l_start = start,
- .l_len = len,
- .l_type = exclusive ? F_WRLCK : F_RDLCK,
- };
- qemu_probe_lock_ops();
- ret = fcntl(fd, fcntl_op_getlk, &fl);
- if (ret == -1) {
- return -errno;
- } else {
- return fl.l_type == F_UNLCK ? 0 : -EAGAIN;
- }
- }
- #endif
- bool qemu_has_direct_io(void)
- {
- #ifdef O_DIRECT
- return true;
- #else
- return false;
- #endif
- }
- static int qemu_open_cloexec(const char *name, int flags, mode_t mode)
- {
- int ret;
- #ifdef O_CLOEXEC
- ret = open(name, flags | O_CLOEXEC, mode);
- #else
- ret = open(name, flags, mode);
- if (ret >= 0) {
- qemu_set_cloexec(ret);
- }
- #endif
- return ret;
- }
- /*
- * Opens a file with FD_CLOEXEC set
- */
- static int
- qemu_open_internal(const char *name, int flags, mode_t mode, Error **errp)
- {
- int ret;
- #ifndef _WIN32
- const char *fdset_id_str;
- /* Attempt dup of fd from fd set */
- if (strstart(name, "/dev/fdset/", &fdset_id_str)) {
- int64_t fdset_id;
- fdset_id = qemu_parse_fdset(fdset_id_str);
- if (fdset_id == -1) {
- error_setg(errp, "Could not parse fdset %s", name);
- errno = EINVAL;
- return -1;
- }
- return monitor_fdset_dup_fd_add(fdset_id, flags, errp);
- }
- #endif
- ret = qemu_open_cloexec(name, flags, mode);
- if (ret == -1) {
- const char *action = flags & O_CREAT ? "create" : "open";
- #ifdef O_DIRECT
- /* Give more helpful error message for O_DIRECT */
- if (errno == EINVAL && (flags & O_DIRECT)) {
- ret = open(name, flags & ~O_DIRECT, mode);
- if (ret != -1) {
- close(ret);
- error_setg(errp, "Could not %s '%s': "
- "filesystem does not support O_DIRECT",
- action, name);
- errno = EINVAL; /* restore first open()'s errno */
- return -1;
- }
- }
- #endif /* O_DIRECT */
- error_setg_errno(errp, errno, "Could not %s '%s'",
- action, name);
- }
- return ret;
- }
- int qemu_open(const char *name, int flags, Error **errp)
- {
- assert(!(flags & O_CREAT));
- return qemu_open_internal(name, flags, 0, errp);
- }
- int qemu_create(const char *name, int flags, mode_t mode, Error **errp)
- {
- assert(!(flags & O_CREAT));
- return qemu_open_internal(name, flags | O_CREAT, mode, errp);
- }
- int qemu_open_old(const char *name, int flags, ...)
- {
- va_list ap;
- mode_t mode = 0;
- int ret;
- va_start(ap, flags);
- if (flags & O_CREAT) {
- mode = va_arg(ap, int);
- }
- va_end(ap);
- ret = qemu_open_internal(name, flags, mode, NULL);
- #ifdef O_DIRECT
- if (ret == -1 && errno == EINVAL && (flags & O_DIRECT)) {
- error_report("file system may not support O_DIRECT");
- errno = EINVAL; /* in case it was clobbered */
- }
- #endif /* O_DIRECT */
- return ret;
- }
- int qemu_close(int fd)
- {
- /* Close fd that was dup'd from an fdset */
- monitor_fdset_dup_fd_remove(fd);
- return close(fd);
- }
- /*
- * Delete a file from the filesystem, unless the filename is /dev/fdset/...
- *
- * Returns: On success, zero is returned. On error, -1 is returned,
- * and errno is set appropriately.
- */
- int qemu_unlink(const char *name)
- {
- if (g_str_has_prefix(name, "/dev/fdset/")) {
- return 0;
- }
- return unlink(name);
- }
- /*
- * A variant of write(2) which handles partial write.
- *
- * Return the number of bytes transferred.
- * Set errno if fewer than `count' bytes are written.
- *
- * This function don't work with non-blocking fd's.
- * Any of the possibilities with non-blocking fd's is bad:
- * - return a short write (then name is wrong)
- * - busy wait adding (errno == EAGAIN) to the loop
- */
- ssize_t qemu_write_full(int fd, const void *buf, size_t count)
- {
- ssize_t ret = 0;
- ssize_t total = 0;
- while (count) {
- ret = write(fd, buf, count);
- if (ret < 0) {
- if (errno == EINTR)
- continue;
- break;
- }
- count -= ret;
- buf += ret;
- total += ret;
- }
- return total;
- }
- /*
- * Opens a socket with FD_CLOEXEC set
- */
- int qemu_socket(int domain, int type, int protocol)
- {
- int ret;
- #ifdef SOCK_CLOEXEC
- ret = socket(domain, type | SOCK_CLOEXEC, protocol);
- if (ret != -1 || errno != EINVAL) {
- return ret;
- }
- #endif
- ret = socket(domain, type, protocol);
- if (ret >= 0) {
- qemu_set_cloexec(ret);
- }
- return ret;
- }
- /*
- * Accept a connection and set FD_CLOEXEC
- */
- int qemu_accept(int s, struct sockaddr *addr, socklen_t *addrlen)
- {
- int ret;
- #ifdef CONFIG_ACCEPT4
- ret = accept4(s, addr, addrlen, SOCK_CLOEXEC);
- if (ret != -1 || errno != ENOSYS) {
- return ret;
- }
- #endif
- ret = accept(s, addr, addrlen);
- if (ret >= 0) {
- qemu_set_cloexec(ret);
- }
- return ret;
- }
- ssize_t qemu_send_full(int s, const void *buf, size_t count)
- {
- ssize_t ret = 0;
- ssize_t total = 0;
- while (count) {
- ret = send(s, buf, count, 0);
- if (ret < 0) {
- if (errno == EINTR) {
- continue;
- }
- break;
- }
- count -= ret;
- buf += ret;
- total += ret;
- }
- return total;
- }
- void qemu_set_hw_version(const char *version)
- {
- hw_version = version;
- }
- const char *qemu_hw_version(void)
- {
- return hw_version;
- }
- #ifdef _WIN32
- static void socket_cleanup(void)
- {
- WSACleanup();
- }
- #endif
- int socket_init(void)
- {
- #ifdef _WIN32
- WSADATA Data;
- int ret, err;
- ret = WSAStartup(MAKEWORD(2, 2), &Data);
- if (ret != 0) {
- err = WSAGetLastError();
- fprintf(stderr, "WSAStartup: %d\n", err);
- return -1;
- }
- atexit(socket_cleanup);
- #endif
- return 0;
- }
- #ifndef CONFIG_IOVEC
- static ssize_t
- readv_writev(int fd, const struct iovec *iov, int iov_cnt, bool do_write)
- {
- unsigned i = 0;
- ssize_t ret = 0;
- ssize_t off = 0;
- while (i < iov_cnt) {
- ssize_t r = do_write
- ? write(fd, iov[i].iov_base + off, iov[i].iov_len - off)
- : read(fd, iov[i].iov_base + off, iov[i].iov_len - off);
- if (r > 0) {
- ret += r;
- off += r;
- if (off < iov[i].iov_len) {
- continue;
- }
- } else if (!r) {
- break;
- } else if (errno == EINTR) {
- continue;
- } else {
- /* else it is some "other" error,
- * only return if there was no data processed. */
- if (ret == 0) {
- ret = -1;
- }
- break;
- }
- off = 0;
- i++;
- }
- return ret;
- }
- ssize_t
- readv(int fd, const struct iovec *iov, int iov_cnt)
- {
- return readv_writev(fd, iov, iov_cnt, false);
- }
- ssize_t
- writev(int fd, const struct iovec *iov, int iov_cnt)
- {
- return readv_writev(fd, iov, iov_cnt, true);
- }
- #endif
- /*
- * Make sure data goes on disk, but if possible do not bother to
- * write out the inode just for timestamp updates.
- *
- * Unfortunately even in 2009 many operating systems do not support
- * fdatasync and have to fall back to fsync.
- */
- int qemu_fdatasync(int fd)
- {
- #ifdef CONFIG_FDATASYNC
- return fdatasync(fd);
- #else
- return fsync(fd);
- #endif
- }
|