aio-win32.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444
  1. /*
  2. * QEMU aio implementation
  3. *
  4. * Copyright IBM Corp., 2008
  5. * Copyright Red Hat Inc., 2012
  6. *
  7. * Authors:
  8. * Anthony Liguori <aliguori@us.ibm.com>
  9. * Paolo Bonzini <pbonzini@redhat.com>
  10. *
  11. * This work is licensed under the terms of the GNU GPL, version 2. See
  12. * the COPYING file in the top-level directory.
  13. *
  14. * Contributions after 2012-01-13 are licensed under the terms of the
  15. * GNU GPL, version 2 or (at your option) any later version.
  16. */
  17. #include "qemu/osdep.h"
  18. #include "block/block.h"
  19. #include "qemu/main-loop.h"
  20. #include "qemu/lockcnt.h"
  21. #include "qemu/queue.h"
  22. #include "qemu/sockets.h"
  23. #include "qapi/error.h"
  24. #include "qemu/rcu_queue.h"
  25. #include "qemu/error-report.h"
  26. struct AioHandler {
  27. EventNotifier *e;
  28. IOHandler *io_read;
  29. IOHandler *io_write;
  30. EventNotifierHandler *io_notify;
  31. GPollFD pfd;
  32. int deleted;
  33. void *opaque;
  34. QLIST_ENTRY(AioHandler) node;
  35. };
  36. static void aio_remove_fd_handler(AioContext *ctx, AioHandler *node)
  37. {
  38. /*
  39. * If the GSource is in the process of being destroyed then
  40. * g_source_remove_poll() causes an assertion failure. Skip
  41. * removal in that case, because glib cleans up its state during
  42. * destruction anyway.
  43. */
  44. if (!g_source_is_destroyed(&ctx->source)) {
  45. g_source_remove_poll(&ctx->source, &node->pfd);
  46. }
  47. /* If aio_poll is in progress, just mark the node as deleted */
  48. if (qemu_lockcnt_count(&ctx->list_lock)) {
  49. node->deleted = 1;
  50. node->pfd.revents = 0;
  51. } else {
  52. /* Otherwise, delete it for real. We can't just mark it as
  53. * deleted because deleted nodes are only cleaned up after
  54. * releasing the list_lock.
  55. */
  56. QLIST_REMOVE(node, node);
  57. g_free(node);
  58. }
  59. }
  60. void aio_set_fd_handler(AioContext *ctx,
  61. int fd,
  62. IOHandler *io_read,
  63. IOHandler *io_write,
  64. AioPollFn *io_poll,
  65. IOHandler *io_poll_ready,
  66. void *opaque)
  67. {
  68. AioHandler *old_node;
  69. AioHandler *node = NULL;
  70. SOCKET s;
  71. if (!fd_is_socket(fd)) {
  72. error_report("fd=%d is not a socket, AIO implementation is missing", fd);
  73. return;
  74. }
  75. s = _get_osfhandle(fd);
  76. qemu_lockcnt_lock(&ctx->list_lock);
  77. QLIST_FOREACH(old_node, &ctx->aio_handlers, node) {
  78. if (old_node->pfd.fd == s && !old_node->deleted) {
  79. break;
  80. }
  81. }
  82. if (io_read || io_write) {
  83. HANDLE event;
  84. long bitmask = 0;
  85. /* Alloc and insert if it's not already there */
  86. node = g_new0(AioHandler, 1);
  87. node->pfd.fd = s;
  88. node->pfd.events = 0;
  89. if (node->io_read) {
  90. node->pfd.events |= G_IO_IN;
  91. }
  92. if (node->io_write) {
  93. node->pfd.events |= G_IO_OUT;
  94. }
  95. node->e = &ctx->notifier;
  96. /* Update handler with latest information */
  97. node->opaque = opaque;
  98. node->io_read = io_read;
  99. node->io_write = io_write;
  100. if (io_read) {
  101. bitmask |= FD_READ | FD_ACCEPT | FD_CLOSE;
  102. }
  103. if (io_write) {
  104. bitmask |= FD_WRITE | FD_CONNECT;
  105. }
  106. QLIST_INSERT_HEAD_RCU(&ctx->aio_handlers, node, node);
  107. event = event_notifier_get_handle(&ctx->notifier);
  108. qemu_socket_select(fd, event, bitmask, NULL);
  109. }
  110. if (old_node) {
  111. aio_remove_fd_handler(ctx, old_node);
  112. }
  113. qemu_lockcnt_unlock(&ctx->list_lock);
  114. aio_notify(ctx);
  115. }
  116. void aio_set_event_notifier(AioContext *ctx,
  117. EventNotifier *e,
  118. EventNotifierHandler *io_notify,
  119. AioPollFn *io_poll,
  120. EventNotifierHandler *io_poll_ready)
  121. {
  122. AioHandler *node;
  123. qemu_lockcnt_lock(&ctx->list_lock);
  124. QLIST_FOREACH(node, &ctx->aio_handlers, node) {
  125. if (node->e == e && !node->deleted) {
  126. break;
  127. }
  128. }
  129. /* Are we deleting the fd handler? */
  130. if (!io_notify) {
  131. if (node) {
  132. aio_remove_fd_handler(ctx, node);
  133. }
  134. } else {
  135. if (node == NULL) {
  136. /* Alloc and insert if it's not already there */
  137. node = g_new0(AioHandler, 1);
  138. node->e = e;
  139. node->pfd.fd = (uintptr_t)event_notifier_get_handle(e);
  140. node->pfd.events = G_IO_IN;
  141. QLIST_INSERT_HEAD_RCU(&ctx->aio_handlers, node, node);
  142. g_source_add_poll(&ctx->source, &node->pfd);
  143. }
  144. /* Update handler with latest information */
  145. node->io_notify = io_notify;
  146. }
  147. qemu_lockcnt_unlock(&ctx->list_lock);
  148. aio_notify(ctx);
  149. }
  150. void aio_set_event_notifier_poll(AioContext *ctx,
  151. EventNotifier *notifier,
  152. EventNotifierHandler *io_poll_begin,
  153. EventNotifierHandler *io_poll_end)
  154. {
  155. /* Not implemented */
  156. }
  157. bool aio_prepare(AioContext *ctx)
  158. {
  159. static struct timeval tv0;
  160. AioHandler *node;
  161. bool have_select_revents = false;
  162. fd_set rfds, wfds;
  163. /*
  164. * We have to walk very carefully in case aio_set_fd_handler is
  165. * called while we're walking.
  166. */
  167. qemu_lockcnt_inc(&ctx->list_lock);
  168. /* fill fd sets */
  169. FD_ZERO(&rfds);
  170. FD_ZERO(&wfds);
  171. QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
  172. if (node->io_read) {
  173. FD_SET ((SOCKET)node->pfd.fd, &rfds);
  174. }
  175. if (node->io_write) {
  176. FD_SET ((SOCKET)node->pfd.fd, &wfds);
  177. }
  178. }
  179. if (select(0, &rfds, &wfds, NULL, &tv0) > 0) {
  180. QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
  181. node->pfd.revents = 0;
  182. if (FD_ISSET(node->pfd.fd, &rfds)) {
  183. node->pfd.revents |= G_IO_IN;
  184. have_select_revents = true;
  185. }
  186. if (FD_ISSET(node->pfd.fd, &wfds)) {
  187. node->pfd.revents |= G_IO_OUT;
  188. have_select_revents = true;
  189. }
  190. }
  191. }
  192. qemu_lockcnt_dec(&ctx->list_lock);
  193. return have_select_revents;
  194. }
  195. bool aio_pending(AioContext *ctx)
  196. {
  197. AioHandler *node;
  198. bool result = false;
  199. /*
  200. * We have to walk very carefully in case aio_set_fd_handler is
  201. * called while we're walking.
  202. */
  203. qemu_lockcnt_inc(&ctx->list_lock);
  204. QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
  205. if (node->pfd.revents && node->io_notify) {
  206. result = true;
  207. break;
  208. }
  209. if ((node->pfd.revents & G_IO_IN) && node->io_read) {
  210. result = true;
  211. break;
  212. }
  213. if ((node->pfd.revents & G_IO_OUT) && node->io_write) {
  214. result = true;
  215. break;
  216. }
  217. }
  218. qemu_lockcnt_dec(&ctx->list_lock);
  219. return result;
  220. }
  221. static bool aio_dispatch_handlers(AioContext *ctx, HANDLE event)
  222. {
  223. AioHandler *node;
  224. bool progress = false;
  225. AioHandler *tmp;
  226. /*
  227. * We have to walk very carefully in case aio_set_fd_handler is
  228. * called while we're walking.
  229. */
  230. QLIST_FOREACH_SAFE_RCU(node, &ctx->aio_handlers, node, tmp) {
  231. int revents = node->pfd.revents;
  232. if (!node->deleted &&
  233. (revents || event_notifier_get_handle(node->e) == event) &&
  234. node->io_notify) {
  235. node->pfd.revents = 0;
  236. node->io_notify(node->e);
  237. /* aio_notify() does not count as progress */
  238. if (node->e != &ctx->notifier) {
  239. progress = true;
  240. }
  241. }
  242. if (!node->deleted &&
  243. (node->io_read || node->io_write)) {
  244. node->pfd.revents = 0;
  245. if ((revents & G_IO_IN) && node->io_read) {
  246. node->io_read(node->opaque);
  247. progress = true;
  248. }
  249. if ((revents & G_IO_OUT) && node->io_write) {
  250. node->io_write(node->opaque);
  251. progress = true;
  252. }
  253. /* if the next select() will return an event, we have progressed */
  254. if (event == event_notifier_get_handle(&ctx->notifier)) {
  255. WSANETWORKEVENTS ev;
  256. WSAEnumNetworkEvents(node->pfd.fd, event, &ev);
  257. if (ev.lNetworkEvents) {
  258. progress = true;
  259. }
  260. }
  261. }
  262. if (node->deleted) {
  263. if (qemu_lockcnt_dec_if_lock(&ctx->list_lock)) {
  264. QLIST_REMOVE(node, node);
  265. g_free(node);
  266. qemu_lockcnt_inc_and_unlock(&ctx->list_lock);
  267. }
  268. }
  269. }
  270. return progress;
  271. }
  272. void aio_dispatch(AioContext *ctx)
  273. {
  274. qemu_lockcnt_inc(&ctx->list_lock);
  275. aio_bh_poll(ctx);
  276. aio_dispatch_handlers(ctx, INVALID_HANDLE_VALUE);
  277. qemu_lockcnt_dec(&ctx->list_lock);
  278. timerlistgroup_run_timers(&ctx->tlg);
  279. }
  280. bool aio_poll(AioContext *ctx, bool blocking)
  281. {
  282. AioHandler *node;
  283. HANDLE events[MAXIMUM_WAIT_OBJECTS];
  284. bool progress, have_select_revents, first;
  285. unsigned count;
  286. int timeout;
  287. /*
  288. * There cannot be two concurrent aio_poll calls for the same AioContext (or
  289. * an aio_poll concurrent with a GSource prepare/check/dispatch callback).
  290. * We rely on this below to avoid slow locked accesses to ctx->notify_me.
  291. *
  292. * aio_poll() may only be called in the AioContext's thread. iohandler_ctx
  293. * is special in that it runs in the main thread, but that thread's context
  294. * is qemu_aio_context.
  295. */
  296. assert(in_aio_context_home_thread(ctx == iohandler_get_aio_context() ?
  297. qemu_get_aio_context() : ctx));
  298. progress = false;
  299. /* aio_notify can avoid the expensive event_notifier_set if
  300. * everything (file descriptors, bottom halves, timers) will
  301. * be re-evaluated before the next blocking poll(). This is
  302. * already true when aio_poll is called with blocking == false;
  303. * if blocking == true, it is only true after poll() returns,
  304. * so disable the optimization now.
  305. */
  306. if (blocking) {
  307. qatomic_set(&ctx->notify_me, qatomic_read(&ctx->notify_me) + 2);
  308. /*
  309. * Write ctx->notify_me before computing the timeout
  310. * (reading bottom half flags, etc.). Pairs with
  311. * smp_mb in aio_notify().
  312. */
  313. smp_mb();
  314. }
  315. qemu_lockcnt_inc(&ctx->list_lock);
  316. have_select_revents = aio_prepare(ctx);
  317. /* fill fd sets */
  318. count = 0;
  319. QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
  320. if (!node->deleted && node->io_notify) {
  321. assert(count < MAXIMUM_WAIT_OBJECTS);
  322. events[count++] = event_notifier_get_handle(node->e);
  323. }
  324. }
  325. first = true;
  326. /* ctx->notifier is always registered. */
  327. assert(count > 0);
  328. /* Multiple iterations, all of them non-blocking except the first,
  329. * may be necessary to process all pending events. After the first
  330. * WaitForMultipleObjects call ctx->notify_me will be decremented.
  331. */
  332. do {
  333. HANDLE event;
  334. int ret;
  335. timeout = blocking && !have_select_revents
  336. ? qemu_timeout_ns_to_ms(aio_compute_timeout(ctx)) : 0;
  337. ret = WaitForMultipleObjects(count, events, FALSE, timeout);
  338. if (blocking) {
  339. assert(first);
  340. qatomic_store_release(&ctx->notify_me,
  341. qatomic_read(&ctx->notify_me) - 2);
  342. aio_notify_accept(ctx);
  343. }
  344. if (first) {
  345. progress |= aio_bh_poll(ctx);
  346. first = false;
  347. }
  348. /* if we have any signaled events, dispatch event */
  349. event = NULL;
  350. if ((DWORD) (ret - WAIT_OBJECT_0) < count) {
  351. event = events[ret - WAIT_OBJECT_0];
  352. events[ret - WAIT_OBJECT_0] = events[--count];
  353. } else if (!have_select_revents) {
  354. break;
  355. }
  356. have_select_revents = false;
  357. blocking = false;
  358. progress |= aio_dispatch_handlers(ctx, event);
  359. } while (count > 0);
  360. qemu_lockcnt_dec(&ctx->list_lock);
  361. progress |= timerlistgroup_run_timers(&ctx->tlg);
  362. return progress;
  363. }
  364. void aio_context_setup(AioContext *ctx)
  365. {
  366. }
  367. void aio_context_destroy(AioContext *ctx)
  368. {
  369. }
  370. void aio_context_use_g_source(AioContext *ctx)
  371. {
  372. }
  373. void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
  374. int64_t grow, int64_t shrink, Error **errp)
  375. {
  376. if (max_ns) {
  377. error_setg(errp, "AioContext polling is not implemented on Windows");
  378. }
  379. }
  380. void aio_context_set_aio_params(AioContext *ctx, int64_t max_batch)
  381. {
  382. }