aio-win32.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412
  1. /*
  2. * QEMU aio implementation
  3. *
  4. * Copyright IBM Corp., 2008
  5. * Copyright Red Hat Inc., 2012
  6. *
  7. * Authors:
  8. * Anthony Liguori <aliguori@us.ibm.com>
  9. * Paolo Bonzini <pbonzini@redhat.com>
  10. *
  11. * This work is licensed under the terms of the GNU GPL, version 2. See
  12. * the COPYING file in the top-level directory.
  13. *
  14. * Contributions after 2012-01-13 are licensed under the terms of the
  15. * GNU GPL, version 2 or (at your option) any later version.
  16. */
  17. #include "qemu/osdep.h"
  18. #include "qemu-common.h"
  19. #include "block/block.h"
  20. #include "qemu/queue.h"
  21. #include "qemu/sockets.h"
  22. #include "qapi/error.h"
  23. #include "qemu/rcu_queue.h"
  24. struct AioHandler {
  25. EventNotifier *e;
  26. IOHandler *io_read;
  27. IOHandler *io_write;
  28. EventNotifierHandler *io_notify;
  29. GPollFD pfd;
  30. int deleted;
  31. void *opaque;
  32. bool is_external;
  33. QLIST_ENTRY(AioHandler) node;
  34. };
  35. static void aio_remove_fd_handler(AioContext *ctx, AioHandler *node)
  36. {
  37. /* If aio_poll is in progress, just mark the node as deleted */
  38. if (qemu_lockcnt_count(&ctx->list_lock)) {
  39. node->deleted = 1;
  40. node->pfd.revents = 0;
  41. } else {
  42. /* Otherwise, delete it for real. We can't just mark it as
  43. * deleted because deleted nodes are only cleaned up after
  44. * releasing the list_lock.
  45. */
  46. QLIST_REMOVE(node, node);
  47. g_free(node);
  48. }
  49. }
  50. void aio_set_fd_handler(AioContext *ctx,
  51. int fd,
  52. bool is_external,
  53. IOHandler *io_read,
  54. IOHandler *io_write,
  55. AioPollFn *io_poll,
  56. void *opaque)
  57. {
  58. /* fd is a SOCKET in our case */
  59. AioHandler *old_node;
  60. AioHandler *node = NULL;
  61. qemu_lockcnt_lock(&ctx->list_lock);
  62. QLIST_FOREACH(old_node, &ctx->aio_handlers, node) {
  63. if (old_node->pfd.fd == fd && !old_node->deleted) {
  64. break;
  65. }
  66. }
  67. if (io_read || io_write) {
  68. HANDLE event;
  69. long bitmask = 0;
  70. /* Alloc and insert if it's not already there */
  71. node = g_new0(AioHandler, 1);
  72. node->pfd.fd = fd;
  73. node->pfd.events = 0;
  74. if (node->io_read) {
  75. node->pfd.events |= G_IO_IN;
  76. }
  77. if (node->io_write) {
  78. node->pfd.events |= G_IO_OUT;
  79. }
  80. node->e = &ctx->notifier;
  81. /* Update handler with latest information */
  82. node->opaque = opaque;
  83. node->io_read = io_read;
  84. node->io_write = io_write;
  85. node->is_external = is_external;
  86. if (io_read) {
  87. bitmask |= FD_READ | FD_ACCEPT | FD_CLOSE;
  88. }
  89. if (io_write) {
  90. bitmask |= FD_WRITE | FD_CONNECT;
  91. }
  92. QLIST_INSERT_HEAD_RCU(&ctx->aio_handlers, node, node);
  93. event = event_notifier_get_handle(&ctx->notifier);
  94. WSAEventSelect(node->pfd.fd, event, bitmask);
  95. }
  96. if (old_node) {
  97. aio_remove_fd_handler(ctx, old_node);
  98. }
  99. qemu_lockcnt_unlock(&ctx->list_lock);
  100. aio_notify(ctx);
  101. }
  102. void aio_set_fd_poll(AioContext *ctx, int fd,
  103. IOHandler *io_poll_begin,
  104. IOHandler *io_poll_end)
  105. {
  106. /* Not implemented */
  107. }
  108. void aio_set_event_notifier(AioContext *ctx,
  109. EventNotifier *e,
  110. bool is_external,
  111. EventNotifierHandler *io_notify,
  112. AioPollFn *io_poll)
  113. {
  114. AioHandler *node;
  115. qemu_lockcnt_lock(&ctx->list_lock);
  116. QLIST_FOREACH(node, &ctx->aio_handlers, node) {
  117. if (node->e == e && !node->deleted) {
  118. break;
  119. }
  120. }
  121. /* Are we deleting the fd handler? */
  122. if (!io_notify) {
  123. if (node) {
  124. g_source_remove_poll(&ctx->source, &node->pfd);
  125. aio_remove_fd_handler(ctx, node);
  126. }
  127. } else {
  128. if (node == NULL) {
  129. /* Alloc and insert if it's not already there */
  130. node = g_new0(AioHandler, 1);
  131. node->e = e;
  132. node->pfd.fd = (uintptr_t)event_notifier_get_handle(e);
  133. node->pfd.events = G_IO_IN;
  134. node->is_external = is_external;
  135. QLIST_INSERT_HEAD_RCU(&ctx->aio_handlers, node, node);
  136. g_source_add_poll(&ctx->source, &node->pfd);
  137. }
  138. /* Update handler with latest information */
  139. node->io_notify = io_notify;
  140. }
  141. qemu_lockcnt_unlock(&ctx->list_lock);
  142. aio_notify(ctx);
  143. }
  144. void aio_set_event_notifier_poll(AioContext *ctx,
  145. EventNotifier *notifier,
  146. EventNotifierHandler *io_poll_begin,
  147. EventNotifierHandler *io_poll_end)
  148. {
  149. /* Not implemented */
  150. }
  151. bool aio_prepare(AioContext *ctx)
  152. {
  153. static struct timeval tv0;
  154. AioHandler *node;
  155. bool have_select_revents = false;
  156. fd_set rfds, wfds;
  157. /*
  158. * We have to walk very carefully in case aio_set_fd_handler is
  159. * called while we're walking.
  160. */
  161. qemu_lockcnt_inc(&ctx->list_lock);
  162. /* fill fd sets */
  163. FD_ZERO(&rfds);
  164. FD_ZERO(&wfds);
  165. QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
  166. if (node->io_read) {
  167. FD_SET ((SOCKET)node->pfd.fd, &rfds);
  168. }
  169. if (node->io_write) {
  170. FD_SET ((SOCKET)node->pfd.fd, &wfds);
  171. }
  172. }
  173. if (select(0, &rfds, &wfds, NULL, &tv0) > 0) {
  174. QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
  175. node->pfd.revents = 0;
  176. if (FD_ISSET(node->pfd.fd, &rfds)) {
  177. node->pfd.revents |= G_IO_IN;
  178. have_select_revents = true;
  179. }
  180. if (FD_ISSET(node->pfd.fd, &wfds)) {
  181. node->pfd.revents |= G_IO_OUT;
  182. have_select_revents = true;
  183. }
  184. }
  185. }
  186. qemu_lockcnt_dec(&ctx->list_lock);
  187. return have_select_revents;
  188. }
  189. bool aio_pending(AioContext *ctx)
  190. {
  191. AioHandler *node;
  192. bool result = false;
  193. /*
  194. * We have to walk very carefully in case aio_set_fd_handler is
  195. * called while we're walking.
  196. */
  197. qemu_lockcnt_inc(&ctx->list_lock);
  198. QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
  199. if (node->pfd.revents && node->io_notify) {
  200. result = true;
  201. break;
  202. }
  203. if ((node->pfd.revents & G_IO_IN) && node->io_read) {
  204. result = true;
  205. break;
  206. }
  207. if ((node->pfd.revents & G_IO_OUT) && node->io_write) {
  208. result = true;
  209. break;
  210. }
  211. }
  212. qemu_lockcnt_dec(&ctx->list_lock);
  213. return result;
  214. }
  215. static bool aio_dispatch_handlers(AioContext *ctx, HANDLE event)
  216. {
  217. AioHandler *node;
  218. bool progress = false;
  219. AioHandler *tmp;
  220. /*
  221. * We have to walk very carefully in case aio_set_fd_handler is
  222. * called while we're walking.
  223. */
  224. QLIST_FOREACH_SAFE_RCU(node, &ctx->aio_handlers, node, tmp) {
  225. int revents = node->pfd.revents;
  226. if (!node->deleted &&
  227. (revents || event_notifier_get_handle(node->e) == event) &&
  228. node->io_notify) {
  229. node->pfd.revents = 0;
  230. node->io_notify(node->e);
  231. /* aio_notify() does not count as progress */
  232. if (node->e != &ctx->notifier) {
  233. progress = true;
  234. }
  235. }
  236. if (!node->deleted &&
  237. (node->io_read || node->io_write)) {
  238. node->pfd.revents = 0;
  239. if ((revents & G_IO_IN) && node->io_read) {
  240. node->io_read(node->opaque);
  241. progress = true;
  242. }
  243. if ((revents & G_IO_OUT) && node->io_write) {
  244. node->io_write(node->opaque);
  245. progress = true;
  246. }
  247. /* if the next select() will return an event, we have progressed */
  248. if (event == event_notifier_get_handle(&ctx->notifier)) {
  249. WSANETWORKEVENTS ev;
  250. WSAEnumNetworkEvents(node->pfd.fd, event, &ev);
  251. if (ev.lNetworkEvents) {
  252. progress = true;
  253. }
  254. }
  255. }
  256. if (node->deleted) {
  257. if (qemu_lockcnt_dec_if_lock(&ctx->list_lock)) {
  258. QLIST_REMOVE(node, node);
  259. g_free(node);
  260. qemu_lockcnt_inc_and_unlock(&ctx->list_lock);
  261. }
  262. }
  263. }
  264. return progress;
  265. }
  266. void aio_dispatch(AioContext *ctx)
  267. {
  268. qemu_lockcnt_inc(&ctx->list_lock);
  269. aio_bh_poll(ctx);
  270. aio_dispatch_handlers(ctx, INVALID_HANDLE_VALUE);
  271. qemu_lockcnt_dec(&ctx->list_lock);
  272. timerlistgroup_run_timers(&ctx->tlg);
  273. }
  274. bool aio_poll(AioContext *ctx, bool blocking)
  275. {
  276. AioHandler *node;
  277. HANDLE events[MAXIMUM_WAIT_OBJECTS + 1];
  278. bool progress, have_select_revents, first;
  279. int count;
  280. int timeout;
  281. progress = false;
  282. /* aio_notify can avoid the expensive event_notifier_set if
  283. * everything (file descriptors, bottom halves, timers) will
  284. * be re-evaluated before the next blocking poll(). This is
  285. * already true when aio_poll is called with blocking == false;
  286. * if blocking == true, it is only true after poll() returns,
  287. * so disable the optimization now.
  288. */
  289. if (blocking) {
  290. atomic_add(&ctx->notify_me, 2);
  291. }
  292. qemu_lockcnt_inc(&ctx->list_lock);
  293. have_select_revents = aio_prepare(ctx);
  294. /* fill fd sets */
  295. count = 0;
  296. QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
  297. if (!node->deleted && node->io_notify
  298. && aio_node_check(ctx, node->is_external)) {
  299. events[count++] = event_notifier_get_handle(node->e);
  300. }
  301. }
  302. first = true;
  303. /* ctx->notifier is always registered. */
  304. assert(count > 0);
  305. /* Multiple iterations, all of them non-blocking except the first,
  306. * may be necessary to process all pending events. After the first
  307. * WaitForMultipleObjects call ctx->notify_me will be decremented.
  308. */
  309. do {
  310. HANDLE event;
  311. int ret;
  312. timeout = blocking && !have_select_revents
  313. ? qemu_timeout_ns_to_ms(aio_compute_timeout(ctx)) : 0;
  314. ret = WaitForMultipleObjects(count, events, FALSE, timeout);
  315. if (blocking) {
  316. assert(first);
  317. assert(in_aio_context_home_thread(ctx));
  318. atomic_sub(&ctx->notify_me, 2);
  319. aio_notify_accept(ctx);
  320. }
  321. if (first) {
  322. progress |= aio_bh_poll(ctx);
  323. first = false;
  324. }
  325. /* if we have any signaled events, dispatch event */
  326. event = NULL;
  327. if ((DWORD) (ret - WAIT_OBJECT_0) < count) {
  328. event = events[ret - WAIT_OBJECT_0];
  329. events[ret - WAIT_OBJECT_0] = events[--count];
  330. } else if (!have_select_revents) {
  331. break;
  332. }
  333. have_select_revents = false;
  334. blocking = false;
  335. progress |= aio_dispatch_handlers(ctx, event);
  336. } while (count > 0);
  337. qemu_lockcnt_dec(&ctx->list_lock);
  338. progress |= timerlistgroup_run_timers(&ctx->tlg);
  339. return progress;
  340. }
  341. void aio_context_setup(AioContext *ctx)
  342. {
  343. }
  344. void aio_context_destroy(AioContext *ctx)
  345. {
  346. }
  347. void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
  348. int64_t grow, int64_t shrink, Error **errp)
  349. {
  350. if (max_ns) {
  351. error_setg(errp, "AioContext polling is not implemented on Windows");
  352. }
  353. }