|
@@ -18,6 +18,8 @@
|
|
|
#include "block/block.h"
|
|
|
#include "qemu/queue.h"
|
|
|
#include "qemu/sockets.h"
|
|
|
+#include "qemu/cutils.h"
|
|
|
+#include "trace.h"
|
|
|
#ifdef CONFIG_EPOLL_CREATE1
|
|
|
#include <sys/epoll.h>
|
|
|
#endif
|
|
@@ -27,6 +29,9 @@ struct AioHandler
|
|
|
GPollFD pfd;
|
|
|
IOHandler *io_read;
|
|
|
IOHandler *io_write;
|
|
|
+ AioPollFn *io_poll;
|
|
|
+ IOHandler *io_poll_begin;
|
|
|
+ IOHandler *io_poll_end;
|
|
|
int deleted;
|
|
|
void *opaque;
|
|
|
bool is_external;
|
|
@@ -200,6 +205,7 @@ void aio_set_fd_handler(AioContext *ctx,
|
|
|
bool is_external,
|
|
|
IOHandler *io_read,
|
|
|
IOHandler *io_write,
|
|
|
+ AioPollFn *io_poll,
|
|
|
void *opaque)
|
|
|
{
|
|
|
AioHandler *node;
|
|
@@ -209,7 +215,7 @@ void aio_set_fd_handler(AioContext *ctx,
|
|
|
node = find_aio_handler(ctx, fd);
|
|
|
|
|
|
/* Are we deleting the fd handler? */
|
|
|
- if (!io_read && !io_write) {
|
|
|
+ if (!io_read && !io_write && !io_poll) {
|
|
|
if (node == NULL) {
|
|
|
return;
|
|
|
}
|
|
@@ -228,6 +234,10 @@ void aio_set_fd_handler(AioContext *ctx,
|
|
|
QLIST_REMOVE(node, node);
|
|
|
deleted = true;
|
|
|
}
|
|
|
+
|
|
|
+ if (!node->io_poll) {
|
|
|
+ ctx->poll_disable_cnt--;
|
|
|
+ }
|
|
|
} else {
|
|
|
if (node == NULL) {
|
|
|
/* Alloc and insert if it's not already there */
|
|
@@ -237,10 +247,16 @@ void aio_set_fd_handler(AioContext *ctx,
|
|
|
|
|
|
g_source_add_poll(&ctx->source, &node->pfd);
|
|
|
is_new = true;
|
|
|
+
|
|
|
+ ctx->poll_disable_cnt += !io_poll;
|
|
|
+ } else {
|
|
|
+ ctx->poll_disable_cnt += !io_poll - !node->io_poll;
|
|
|
}
|
|
|
+
|
|
|
/* Update handler with latest information */
|
|
|
node->io_read = io_read;
|
|
|
node->io_write = io_write;
|
|
|
+ node->io_poll = io_poll;
|
|
|
node->opaque = opaque;
|
|
|
node->is_external = is_external;
|
|
|
|
|
@@ -250,22 +266,83 @@ void aio_set_fd_handler(AioContext *ctx,
|
|
|
|
|
|
aio_epoll_update(ctx, node, is_new);
|
|
|
aio_notify(ctx);
|
|
|
+
|
|
|
if (deleted) {
|
|
|
g_free(node);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+void aio_set_fd_poll(AioContext *ctx, int fd,
|
|
|
+ IOHandler *io_poll_begin,
|
|
|
+ IOHandler *io_poll_end)
|
|
|
+{
|
|
|
+ AioHandler *node = find_aio_handler(ctx, fd);
|
|
|
+
|
|
|
+ if (!node) {
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ node->io_poll_begin = io_poll_begin;
|
|
|
+ node->io_poll_end = io_poll_end;
|
|
|
+}
|
|
|
+
|
|
|
void aio_set_event_notifier(AioContext *ctx,
|
|
|
EventNotifier *notifier,
|
|
|
bool is_external,
|
|
|
- EventNotifierHandler *io_read)
|
|
|
+ EventNotifierHandler *io_read,
|
|
|
+ AioPollFn *io_poll)
|
|
|
+{
|
|
|
+ aio_set_fd_handler(ctx, event_notifier_get_fd(notifier), is_external,
|
|
|
+ (IOHandler *)io_read, NULL, io_poll, notifier);
|
|
|
+}
|
|
|
+
|
|
|
+void aio_set_event_notifier_poll(AioContext *ctx,
|
|
|
+ EventNotifier *notifier,
|
|
|
+ EventNotifierHandler *io_poll_begin,
|
|
|
+ EventNotifierHandler *io_poll_end)
|
|
|
{
|
|
|
- aio_set_fd_handler(ctx, event_notifier_get_fd(notifier),
|
|
|
- is_external, (IOHandler *)io_read, NULL, notifier);
|
|
|
+ aio_set_fd_poll(ctx, event_notifier_get_fd(notifier),
|
|
|
+ (IOHandler *)io_poll_begin,
|
|
|
+ (IOHandler *)io_poll_end);
|
|
|
}
|
|
|
|
|
|
+static void poll_set_started(AioContext *ctx, bool started)
|
|
|
+{
|
|
|
+ AioHandler *node;
|
|
|
+
|
|
|
+ if (started == ctx->poll_started) {
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ ctx->poll_started = started;
|
|
|
+
|
|
|
+ ctx->walking_handlers++;
|
|
|
+ QLIST_FOREACH(node, &ctx->aio_handlers, node) {
|
|
|
+ IOHandler *fn;
|
|
|
+
|
|
|
+ if (node->deleted) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (started) {
|
|
|
+ fn = node->io_poll_begin;
|
|
|
+ } else {
|
|
|
+ fn = node->io_poll_end;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (fn) {
|
|
|
+ fn(node->opaque);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ ctx->walking_handlers--;
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
bool aio_prepare(AioContext *ctx)
|
|
|
{
|
|
|
+ /* Poll mode cannot be used with glib's event loop, disable it. */
|
|
|
+ poll_set_started(ctx, false);
|
|
|
+
|
|
|
return false;
|
|
|
}
|
|
|
|
|
@@ -290,9 +367,13 @@ bool aio_pending(AioContext *ctx)
|
|
|
return false;
|
|
|
}
|
|
|
|
|
|
-bool aio_dispatch(AioContext *ctx)
|
|
|
+/*
|
|
|
+ * Note that dispatch_fds == false has the side-effect of post-poning the
|
|
|
+ * freeing of deleted handlers.
|
|
|
+ */
|
|
|
+bool aio_dispatch(AioContext *ctx, bool dispatch_fds)
|
|
|
{
|
|
|
- AioHandler *node;
|
|
|
+ AioHandler *node = NULL;
|
|
|
bool progress = false;
|
|
|
|
|
|
/*
|
|
@@ -308,7 +389,9 @@ bool aio_dispatch(AioContext *ctx)
|
|
|
* We have to walk very carefully in case aio_set_fd_handler is
|
|
|
* called while we're walking.
|
|
|
*/
|
|
|
- node = QLIST_FIRST(&ctx->aio_handlers);
|
|
|
+ if (dispatch_fds) {
|
|
|
+ node = QLIST_FIRST(&ctx->aio_handlers);
|
|
|
+ }
|
|
|
while (node) {
|
|
|
AioHandler *tmp;
|
|
|
int revents;
|
|
@@ -400,12 +483,100 @@ static void add_pollfd(AioHandler *node)
|
|
|
npfd++;
|
|
|
}
|
|
|
|
|
|
+static bool run_poll_handlers_once(AioContext *ctx)
|
|
|
+{
|
|
|
+ bool progress = false;
|
|
|
+ AioHandler *node;
|
|
|
+
|
|
|
+ QLIST_FOREACH(node, &ctx->aio_handlers, node) {
|
|
|
+ if (!node->deleted && node->io_poll &&
|
|
|
+ node->io_poll(node->opaque)) {
|
|
|
+ progress = true;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Caller handles freeing deleted nodes. Don't do it here. */
|
|
|
+ }
|
|
|
+
|
|
|
+ return progress;
|
|
|
+}
|
|
|
+
|
|
|
+/* run_poll_handlers:
|
|
|
+ * @ctx: the AioContext
|
|
|
+ * @max_ns: maximum time to poll for, in nanoseconds
|
|
|
+ *
|
|
|
+ * Polls for a given time.
|
|
|
+ *
|
|
|
+ * Note that ctx->notify_me must be non-zero so this function can detect
|
|
|
+ * aio_notify().
|
|
|
+ *
|
|
|
+ * Note that the caller must have incremented ctx->walking_handlers.
|
|
|
+ *
|
|
|
+ * Returns: true if progress was made, false otherwise
|
|
|
+ */
|
|
|
+static bool run_poll_handlers(AioContext *ctx, int64_t max_ns)
|
|
|
+{
|
|
|
+ bool progress;
|
|
|
+ int64_t end_time;
|
|
|
+
|
|
|
+ assert(ctx->notify_me);
|
|
|
+ assert(ctx->walking_handlers > 0);
|
|
|
+ assert(ctx->poll_disable_cnt == 0);
|
|
|
+
|
|
|
+ trace_run_poll_handlers_begin(ctx, max_ns);
|
|
|
+
|
|
|
+ end_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + max_ns;
|
|
|
+
|
|
|
+ do {
|
|
|
+ progress = run_poll_handlers_once(ctx);
|
|
|
+ } while (!progress && qemu_clock_get_ns(QEMU_CLOCK_REALTIME) < end_time);
|
|
|
+
|
|
|
+ trace_run_poll_handlers_end(ctx, progress);
|
|
|
+
|
|
|
+ return progress;
|
|
|
+}
|
|
|
+
|
|
|
+/* try_poll_mode:
|
|
|
+ * @ctx: the AioContext
|
|
|
+ * @blocking: busy polling is only attempted when blocking is true
|
|
|
+ *
|
|
|
+ * ctx->notify_me must be non-zero so this function can detect aio_notify().
|
|
|
+ *
|
|
|
+ * Note that the caller must have incremented ctx->walking_handlers.
|
|
|
+ *
|
|
|
+ * Returns: true if progress was made, false otherwise
|
|
|
+ */
|
|
|
+static bool try_poll_mode(AioContext *ctx, bool blocking)
|
|
|
+{
|
|
|
+ if (blocking && ctx->poll_max_ns && ctx->poll_disable_cnt == 0) {
|
|
|
+ /* See qemu_soonest_timeout() uint64_t hack */
|
|
|
+ int64_t max_ns = MIN((uint64_t)aio_compute_timeout(ctx),
|
|
|
+ (uint64_t)ctx->poll_ns);
|
|
|
+
|
|
|
+ if (max_ns) {
|
|
|
+ poll_set_started(ctx, true);
|
|
|
+
|
|
|
+ if (run_poll_handlers(ctx, max_ns)) {
|
|
|
+ return true;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ poll_set_started(ctx, false);
|
|
|
+
|
|
|
+ /* Even if we don't run busy polling, try polling once in case it can make
|
|
|
+ * progress and the caller will be able to avoid ppoll(2)/epoll_wait(2).
|
|
|
+ */
|
|
|
+ return run_poll_handlers_once(ctx);
|
|
|
+}
|
|
|
+
|
|
|
bool aio_poll(AioContext *ctx, bool blocking)
|
|
|
{
|
|
|
AioHandler *node;
|
|
|
- int i, ret;
|
|
|
+ int i;
|
|
|
+ int ret = 0;
|
|
|
bool progress;
|
|
|
int64_t timeout;
|
|
|
+ int64_t start = 0;
|
|
|
|
|
|
aio_context_acquire(ctx);
|
|
|
progress = false;
|
|
@@ -423,41 +594,91 @@ bool aio_poll(AioContext *ctx, bool blocking)
|
|
|
|
|
|
ctx->walking_handlers++;
|
|
|
|
|
|
- assert(npfd == 0);
|
|
|
+ if (ctx->poll_max_ns) {
|
|
|
+ start = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (try_poll_mode(ctx, blocking)) {
|
|
|
+ progress = true;
|
|
|
+ } else {
|
|
|
+ assert(npfd == 0);
|
|
|
|
|
|
- /* fill pollfds */
|
|
|
+ /* fill pollfds */
|
|
|
|
|
|
- if (!aio_epoll_enabled(ctx)) {
|
|
|
- QLIST_FOREACH(node, &ctx->aio_handlers, node) {
|
|
|
- if (!node->deleted && node->pfd.events
|
|
|
- && aio_node_check(ctx, node->is_external)) {
|
|
|
- add_pollfd(node);
|
|
|
+ if (!aio_epoll_enabled(ctx)) {
|
|
|
+ QLIST_FOREACH(node, &ctx->aio_handlers, node) {
|
|
|
+ if (!node->deleted && node->pfd.events
|
|
|
+ && aio_node_check(ctx, node->is_external)) {
|
|
|
+ add_pollfd(node);
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
- }
|
|
|
|
|
|
- timeout = blocking ? aio_compute_timeout(ctx) : 0;
|
|
|
+ timeout = blocking ? aio_compute_timeout(ctx) : 0;
|
|
|
|
|
|
- /* wait until next event */
|
|
|
- if (timeout) {
|
|
|
- aio_context_release(ctx);
|
|
|
+ /* wait until next event */
|
|
|
+ if (timeout) {
|
|
|
+ aio_context_release(ctx);
|
|
|
+ }
|
|
|
+ if (aio_epoll_check_poll(ctx, pollfds, npfd, timeout)) {
|
|
|
+ AioHandler epoll_handler;
|
|
|
+
|
|
|
+ epoll_handler.pfd.fd = ctx->epollfd;
|
|
|
+ epoll_handler.pfd.events = G_IO_IN | G_IO_OUT | G_IO_HUP | G_IO_ERR;
|
|
|
+ npfd = 0;
|
|
|
+ add_pollfd(&epoll_handler);
|
|
|
+ ret = aio_epoll(ctx, pollfds, npfd, timeout);
|
|
|
+ } else {
|
|
|
+ ret = qemu_poll_ns(pollfds, npfd, timeout);
|
|
|
+ }
|
|
|
+ if (timeout) {
|
|
|
+ aio_context_acquire(ctx);
|
|
|
+ }
|
|
|
}
|
|
|
- if (aio_epoll_check_poll(ctx, pollfds, npfd, timeout)) {
|
|
|
- AioHandler epoll_handler;
|
|
|
|
|
|
- epoll_handler.pfd.fd = ctx->epollfd;
|
|
|
- epoll_handler.pfd.events = G_IO_IN | G_IO_OUT | G_IO_HUP | G_IO_ERR;
|
|
|
- npfd = 0;
|
|
|
- add_pollfd(&epoll_handler);
|
|
|
- ret = aio_epoll(ctx, pollfds, npfd, timeout);
|
|
|
- } else {
|
|
|
- ret = qemu_poll_ns(pollfds, npfd, timeout);
|
|
|
- }
|
|
|
if (blocking) {
|
|
|
atomic_sub(&ctx->notify_me, 2);
|
|
|
}
|
|
|
- if (timeout) {
|
|
|
- aio_context_acquire(ctx);
|
|
|
+
|
|
|
+ /* Adjust polling time */
|
|
|
+ if (ctx->poll_max_ns) {
|
|
|
+ int64_t block_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start;
|
|
|
+
|
|
|
+ if (block_ns <= ctx->poll_ns) {
|
|
|
+ /* This is the sweet spot, no adjustment needed */
|
|
|
+ } else if (block_ns > ctx->poll_max_ns) {
|
|
|
+ /* We'd have to poll for too long, poll less */
|
|
|
+ int64_t old = ctx->poll_ns;
|
|
|
+
|
|
|
+ if (ctx->poll_shrink) {
|
|
|
+ ctx->poll_ns /= ctx->poll_shrink;
|
|
|
+ } else {
|
|
|
+ ctx->poll_ns = 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ trace_poll_shrink(ctx, old, ctx->poll_ns);
|
|
|
+ } else if (ctx->poll_ns < ctx->poll_max_ns &&
|
|
|
+ block_ns < ctx->poll_max_ns) {
|
|
|
+ /* There is room to grow, poll longer */
|
|
|
+ int64_t old = ctx->poll_ns;
|
|
|
+ int64_t grow = ctx->poll_grow;
|
|
|
+
|
|
|
+ if (grow == 0) {
|
|
|
+ grow = 2;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (ctx->poll_ns) {
|
|
|
+ ctx->poll_ns *= grow;
|
|
|
+ } else {
|
|
|
+ ctx->poll_ns = 4000; /* start polling at 4 microseconds */
|
|
|
+ }
|
|
|
+
|
|
|
+ if (ctx->poll_ns > ctx->poll_max_ns) {
|
|
|
+ ctx->poll_ns = ctx->poll_max_ns;
|
|
|
+ }
|
|
|
+
|
|
|
+ trace_poll_grow(ctx, old, ctx->poll_ns);
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
aio_notify_accept(ctx);
|
|
@@ -473,7 +694,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
|
|
|
ctx->walking_handlers--;
|
|
|
|
|
|
/* Run dispatch even if there were no readable fds to run timers */
|
|
|
- if (aio_dispatch(ctx)) {
|
|
|
+ if (aio_dispatch(ctx, ret > 0)) {
|
|
|
progress = true;
|
|
|
}
|
|
|
|
|
@@ -484,6 +705,13 @@ bool aio_poll(AioContext *ctx, bool blocking)
|
|
|
|
|
|
void aio_context_setup(AioContext *ctx)
|
|
|
{
|
|
|
+ /* TODO remove this in final patch submission */
|
|
|
+ if (getenv("QEMU_AIO_POLL_MAX_NS")) {
|
|
|
+ fprintf(stderr, "The QEMU_AIO_POLL_MAX_NS environment variable has "
|
|
|
+ "been replaced with -object iothread,poll-max-ns=NUM\n");
|
|
|
+ exit(1);
|
|
|
+ }
|
|
|
+
|
|
|
#ifdef CONFIG_EPOLL_CREATE1
|
|
|
assert(!ctx->epollfd);
|
|
|
ctx->epollfd = epoll_create1(EPOLL_CLOEXEC);
|
|
@@ -495,3 +723,17 @@ void aio_context_setup(AioContext *ctx)
|
|
|
}
|
|
|
#endif
|
|
|
}
|
|
|
+
|
|
|
+void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
|
|
|
+ int64_t grow, int64_t shrink, Error **errp)
|
|
|
+{
|
|
|
+ /* No thread synchronization here, it doesn't matter if an incorrect value
|
|
|
+ * is used once.
|
|
|
+ */
|
|
|
+ ctx->poll_max_ns = max_ns;
|
|
|
+ ctx->poll_ns = 0;
|
|
|
+ ctx->poll_grow = grow;
|
|
|
+ ctx->poll_shrink = shrink;
|
|
|
+
|
|
|
+ aio_notify(ctx);
|
|
|
+}
|