aio-posix.c 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274
  1. /*
  2. * QEMU aio implementation
  3. *
  4. * Copyright IBM, Corp. 2008
  5. *
  6. * Authors:
  7. * Anthony Liguori <aliguori@us.ibm.com>
  8. *
  9. * This work is licensed under the terms of the GNU GPL, version 2. See
  10. * the COPYING file in the top-level directory.
  11. *
  12. * Contributions after 2012-01-13 are licensed under the terms of the
  13. * GNU GPL, version 2 or (at your option) any later version.
  14. */
  15. #include "qemu-common.h"
  16. #include "block/block.h"
  17. #include "qemu/queue.h"
  18. #include "qemu/sockets.h"
  19. struct AioHandler
  20. {
  21. GPollFD pfd;
  22. IOHandler *io_read;
  23. IOHandler *io_write;
  24. int deleted;
  25. int pollfds_idx;
  26. void *opaque;
  27. QLIST_ENTRY(AioHandler) node;
  28. };
  29. static AioHandler *find_aio_handler(AioContext *ctx, int fd)
  30. {
  31. AioHandler *node;
  32. QLIST_FOREACH(node, &ctx->aio_handlers, node) {
  33. if (node->pfd.fd == fd)
  34. if (!node->deleted)
  35. return node;
  36. }
  37. return NULL;
  38. }
  39. void aio_set_fd_handler(AioContext *ctx,
  40. int fd,
  41. IOHandler *io_read,
  42. IOHandler *io_write,
  43. void *opaque)
  44. {
  45. AioHandler *node;
  46. node = find_aio_handler(ctx, fd);
  47. /* Are we deleting the fd handler? */
  48. if (!io_read && !io_write) {
  49. if (node) {
  50. g_source_remove_poll(&ctx->source, &node->pfd);
  51. /* If the lock is held, just mark the node as deleted */
  52. if (ctx->walking_handlers) {
  53. node->deleted = 1;
  54. node->pfd.revents = 0;
  55. } else {
  56. /* Otherwise, delete it for real. We can't just mark it as
  57. * deleted because deleted nodes are only cleaned up after
  58. * releasing the walking_handlers lock.
  59. */
  60. QLIST_REMOVE(node, node);
  61. g_free(node);
  62. }
  63. }
  64. } else {
  65. if (node == NULL) {
  66. /* Alloc and insert if it's not already there */
  67. node = g_malloc0(sizeof(AioHandler));
  68. node->pfd.fd = fd;
  69. QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);
  70. g_source_add_poll(&ctx->source, &node->pfd);
  71. }
  72. /* Update handler with latest information */
  73. node->io_read = io_read;
  74. node->io_write = io_write;
  75. node->opaque = opaque;
  76. node->pollfds_idx = -1;
  77. node->pfd.events = (io_read ? G_IO_IN | G_IO_HUP | G_IO_ERR : 0);
  78. node->pfd.events |= (io_write ? G_IO_OUT | G_IO_ERR : 0);
  79. }
  80. aio_notify(ctx);
  81. }
  82. void aio_set_event_notifier(AioContext *ctx,
  83. EventNotifier *notifier,
  84. EventNotifierHandler *io_read)
  85. {
  86. aio_set_fd_handler(ctx, event_notifier_get_fd(notifier),
  87. (IOHandler *)io_read, NULL, notifier);
  88. }
  89. bool aio_pending(AioContext *ctx)
  90. {
  91. AioHandler *node;
  92. QLIST_FOREACH(node, &ctx->aio_handlers, node) {
  93. int revents;
  94. revents = node->pfd.revents & node->pfd.events;
  95. if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read) {
  96. return true;
  97. }
  98. if (revents & (G_IO_OUT | G_IO_ERR) && node->io_write) {
  99. return true;
  100. }
  101. }
  102. return false;
  103. }
  104. static bool aio_dispatch(AioContext *ctx)
  105. {
  106. AioHandler *node;
  107. bool progress = false;
  108. /*
  109. * We have to walk very carefully in case aio_set_fd_handler is
  110. * called while we're walking.
  111. */
  112. node = QLIST_FIRST(&ctx->aio_handlers);
  113. while (node) {
  114. AioHandler *tmp;
  115. int revents;
  116. ctx->walking_handlers++;
  117. revents = node->pfd.revents & node->pfd.events;
  118. node->pfd.revents = 0;
  119. if (!node->deleted &&
  120. (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR)) &&
  121. node->io_read) {
  122. node->io_read(node->opaque);
  123. /* aio_notify() does not count as progress */
  124. if (node->opaque != &ctx->notifier) {
  125. progress = true;
  126. }
  127. }
  128. if (!node->deleted &&
  129. (revents & (G_IO_OUT | G_IO_ERR)) &&
  130. node->io_write) {
  131. node->io_write(node->opaque);
  132. progress = true;
  133. }
  134. tmp = node;
  135. node = QLIST_NEXT(node, node);
  136. ctx->walking_handlers--;
  137. if (!ctx->walking_handlers && tmp->deleted) {
  138. QLIST_REMOVE(tmp, node);
  139. g_free(tmp);
  140. }
  141. }
  142. /* Run our timers */
  143. progress |= timerlistgroup_run_timers(&ctx->tlg);
  144. return progress;
  145. }
  146. bool aio_poll(AioContext *ctx, bool blocking)
  147. {
  148. AioHandler *node;
  149. bool was_dispatching;
  150. int ret;
  151. bool progress;
  152. was_dispatching = ctx->dispatching;
  153. progress = false;
  154. /* aio_notify can avoid the expensive event_notifier_set if
  155. * everything (file descriptors, bottom halves, timers) will
  156. * be re-evaluated before the next blocking poll(). This happens
  157. * in two cases:
  158. *
  159. * 1) when aio_poll is called with blocking == false
  160. *
  161. * 2) when we are called after poll(). If we are called before
  162. * poll(), bottom halves will not be re-evaluated and we need
  163. * aio_notify() if blocking == true.
  164. *
  165. * The first aio_dispatch() only does something when AioContext is
  166. * running as a GSource, and in that case aio_poll is used only
  167. * with blocking == false, so this optimization is already quite
  168. * effective. However, the code is ugly and should be restructured
  169. * to have a single aio_dispatch() call. To do this, we need to
  170. * reorganize aio_poll into a prepare/poll/dispatch model like
  171. * glib's.
  172. *
  173. * If we're in a nested event loop, ctx->dispatching might be true.
  174. * In that case we can restore it just before returning, but we
  175. * have to clear it now.
  176. */
  177. aio_set_dispatching(ctx, !blocking);
  178. /*
  179. * If there are callbacks left that have been queued, we need to call them.
  180. * Do not call select in this case, because it is possible that the caller
  181. * does not need a complete flush (as is the case for aio_poll loops).
  182. */
  183. if (aio_bh_poll(ctx)) {
  184. blocking = false;
  185. progress = true;
  186. }
  187. /* Re-evaluate condition (1) above. */
  188. aio_set_dispatching(ctx, !blocking);
  189. if (aio_dispatch(ctx)) {
  190. progress = true;
  191. }
  192. if (progress && !blocking) {
  193. goto out;
  194. }
  195. ctx->walking_handlers++;
  196. g_array_set_size(ctx->pollfds, 0);
  197. /* fill pollfds */
  198. QLIST_FOREACH(node, &ctx->aio_handlers, node) {
  199. node->pollfds_idx = -1;
  200. if (!node->deleted && node->pfd.events) {
  201. GPollFD pfd = {
  202. .fd = node->pfd.fd,
  203. .events = node->pfd.events,
  204. };
  205. node->pollfds_idx = ctx->pollfds->len;
  206. g_array_append_val(ctx->pollfds, pfd);
  207. }
  208. }
  209. ctx->walking_handlers--;
  210. /* wait until next event */
  211. ret = qemu_poll_ns((GPollFD *)ctx->pollfds->data,
  212. ctx->pollfds->len,
  213. blocking ? timerlistgroup_deadline_ns(&ctx->tlg) : 0);
  214. /* if we have any readable fds, dispatch event */
  215. if (ret > 0) {
  216. QLIST_FOREACH(node, &ctx->aio_handlers, node) {
  217. if (node->pollfds_idx != -1) {
  218. GPollFD *pfd = &g_array_index(ctx->pollfds, GPollFD,
  219. node->pollfds_idx);
  220. node->pfd.revents = pfd->revents;
  221. }
  222. }
  223. }
  224. /* Run dispatch even if there were no readable fds to run timers */
  225. aio_set_dispatching(ctx, true);
  226. if (aio_dispatch(ctx)) {
  227. progress = true;
  228. }
  229. out:
  230. aio_set_dispatching(ctx, was_dispatching);
  231. return progress;
  232. }