2
0

aio-win32.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450
  1. /*
  2. * QEMU aio implementation
  3. *
  4. * Copyright IBM Corp., 2008
  5. * Copyright Red Hat Inc., 2012
  6. *
  7. * Authors:
  8. * Anthony Liguori <aliguori@us.ibm.com>
  9. * Paolo Bonzini <pbonzini@redhat.com>
  10. *
  11. * This work is licensed under the terms of the GNU GPL, version 2. See
  12. * the COPYING file in the top-level directory.
  13. *
  14. * Contributions after 2012-01-13 are licensed under the terms of the
  15. * GNU GPL, version 2 or (at your option) any later version.
  16. */
  17. #include "qemu/osdep.h"
  18. #include "block/block.h"
  19. #include "qemu/main-loop.h"
  20. #include "qemu/queue.h"
  21. #include "qemu/sockets.h"
  22. #include "qapi/error.h"
  23. #include "qemu/rcu_queue.h"
  24. #include "qemu/error-report.h"
  25. struct AioHandler {
  26. EventNotifier *e;
  27. IOHandler *io_read;
  28. IOHandler *io_write;
  29. EventNotifierHandler *io_notify;
  30. GPollFD pfd;
  31. int deleted;
  32. void *opaque;
  33. bool is_external;
  34. QLIST_ENTRY(AioHandler) node;
  35. };
  36. static void aio_remove_fd_handler(AioContext *ctx, AioHandler *node)
  37. {
  38. /*
  39. * If the GSource is in the process of being destroyed then
  40. * g_source_remove_poll() causes an assertion failure. Skip
  41. * removal in that case, because glib cleans up its state during
  42. * destruction anyway.
  43. */
  44. if (!g_source_is_destroyed(&ctx->source)) {
  45. g_source_remove_poll(&ctx->source, &node->pfd);
  46. }
  47. /* If aio_poll is in progress, just mark the node as deleted */
  48. if (qemu_lockcnt_count(&ctx->list_lock)) {
  49. node->deleted = 1;
  50. node->pfd.revents = 0;
  51. } else {
  52. /* Otherwise, delete it for real. We can't just mark it as
  53. * deleted because deleted nodes are only cleaned up after
  54. * releasing the list_lock.
  55. */
  56. QLIST_REMOVE(node, node);
  57. g_free(node);
  58. }
  59. }
  60. void aio_set_fd_handler(AioContext *ctx,
  61. int fd,
  62. bool is_external,
  63. IOHandler *io_read,
  64. IOHandler *io_write,
  65. AioPollFn *io_poll,
  66. IOHandler *io_poll_ready,
  67. void *opaque)
  68. {
  69. AioHandler *old_node;
  70. AioHandler *node = NULL;
  71. SOCKET s;
  72. if (!fd_is_socket(fd)) {
  73. error_report("fd=%d is not a socket, AIO implementation is missing", fd);
  74. return;
  75. }
  76. s = _get_osfhandle(fd);
  77. qemu_lockcnt_lock(&ctx->list_lock);
  78. QLIST_FOREACH(old_node, &ctx->aio_handlers, node) {
  79. if (old_node->pfd.fd == s && !old_node->deleted) {
  80. break;
  81. }
  82. }
  83. if (io_read || io_write) {
  84. HANDLE event;
  85. long bitmask = 0;
  86. /* Alloc and insert if it's not already there */
  87. node = g_new0(AioHandler, 1);
  88. node->pfd.fd = s;
  89. node->pfd.events = 0;
  90. if (node->io_read) {
  91. node->pfd.events |= G_IO_IN;
  92. }
  93. if (node->io_write) {
  94. node->pfd.events |= G_IO_OUT;
  95. }
  96. node->e = &ctx->notifier;
  97. /* Update handler with latest information */
  98. node->opaque = opaque;
  99. node->io_read = io_read;
  100. node->io_write = io_write;
  101. node->is_external = is_external;
  102. if (io_read) {
  103. bitmask |= FD_READ | FD_ACCEPT | FD_CLOSE;
  104. }
  105. if (io_write) {
  106. bitmask |= FD_WRITE | FD_CONNECT;
  107. }
  108. QLIST_INSERT_HEAD_RCU(&ctx->aio_handlers, node, node);
  109. event = event_notifier_get_handle(&ctx->notifier);
  110. qemu_socket_select(fd, event, bitmask, NULL);
  111. }
  112. if (old_node) {
  113. aio_remove_fd_handler(ctx, old_node);
  114. }
  115. qemu_lockcnt_unlock(&ctx->list_lock);
  116. aio_notify(ctx);
  117. }
  118. void aio_set_event_notifier(AioContext *ctx,
  119. EventNotifier *e,
  120. bool is_external,
  121. EventNotifierHandler *io_notify,
  122. AioPollFn *io_poll,
  123. EventNotifierHandler *io_poll_ready)
  124. {
  125. AioHandler *node;
  126. qemu_lockcnt_lock(&ctx->list_lock);
  127. QLIST_FOREACH(node, &ctx->aio_handlers, node) {
  128. if (node->e == e && !node->deleted) {
  129. break;
  130. }
  131. }
  132. /* Are we deleting the fd handler? */
  133. if (!io_notify) {
  134. if (node) {
  135. aio_remove_fd_handler(ctx, node);
  136. }
  137. } else {
  138. if (node == NULL) {
  139. /* Alloc and insert if it's not already there */
  140. node = g_new0(AioHandler, 1);
  141. node->e = e;
  142. node->pfd.fd = (uintptr_t)event_notifier_get_handle(e);
  143. node->pfd.events = G_IO_IN;
  144. node->is_external = is_external;
  145. QLIST_INSERT_HEAD_RCU(&ctx->aio_handlers, node, node);
  146. g_source_add_poll(&ctx->source, &node->pfd);
  147. }
  148. /* Update handler with latest information */
  149. node->io_notify = io_notify;
  150. }
  151. qemu_lockcnt_unlock(&ctx->list_lock);
  152. aio_notify(ctx);
  153. }
  154. void aio_set_event_notifier_poll(AioContext *ctx,
  155. EventNotifier *notifier,
  156. EventNotifierHandler *io_poll_begin,
  157. EventNotifierHandler *io_poll_end)
  158. {
  159. /* Not implemented */
  160. }
  161. bool aio_prepare(AioContext *ctx)
  162. {
  163. static struct timeval tv0;
  164. AioHandler *node;
  165. bool have_select_revents = false;
  166. fd_set rfds, wfds;
  167. /*
  168. * We have to walk very carefully in case aio_set_fd_handler is
  169. * called while we're walking.
  170. */
  171. qemu_lockcnt_inc(&ctx->list_lock);
  172. /* fill fd sets */
  173. FD_ZERO(&rfds);
  174. FD_ZERO(&wfds);
  175. QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
  176. if (node->io_read) {
  177. FD_SET ((SOCKET)node->pfd.fd, &rfds);
  178. }
  179. if (node->io_write) {
  180. FD_SET ((SOCKET)node->pfd.fd, &wfds);
  181. }
  182. }
  183. if (select(0, &rfds, &wfds, NULL, &tv0) > 0) {
  184. QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
  185. node->pfd.revents = 0;
  186. if (FD_ISSET(node->pfd.fd, &rfds)) {
  187. node->pfd.revents |= G_IO_IN;
  188. have_select_revents = true;
  189. }
  190. if (FD_ISSET(node->pfd.fd, &wfds)) {
  191. node->pfd.revents |= G_IO_OUT;
  192. have_select_revents = true;
  193. }
  194. }
  195. }
  196. qemu_lockcnt_dec(&ctx->list_lock);
  197. return have_select_revents;
  198. }
  199. bool aio_pending(AioContext *ctx)
  200. {
  201. AioHandler *node;
  202. bool result = false;
  203. /*
  204. * We have to walk very carefully in case aio_set_fd_handler is
  205. * called while we're walking.
  206. */
  207. qemu_lockcnt_inc(&ctx->list_lock);
  208. QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
  209. if (node->pfd.revents && node->io_notify) {
  210. result = true;
  211. break;
  212. }
  213. if ((node->pfd.revents & G_IO_IN) && node->io_read) {
  214. result = true;
  215. break;
  216. }
  217. if ((node->pfd.revents & G_IO_OUT) && node->io_write) {
  218. result = true;
  219. break;
  220. }
  221. }
  222. qemu_lockcnt_dec(&ctx->list_lock);
  223. return result;
  224. }
  225. static bool aio_dispatch_handlers(AioContext *ctx, HANDLE event)
  226. {
  227. AioHandler *node;
  228. bool progress = false;
  229. AioHandler *tmp;
  230. /*
  231. * We have to walk very carefully in case aio_set_fd_handler is
  232. * called while we're walking.
  233. */
  234. QLIST_FOREACH_SAFE_RCU(node, &ctx->aio_handlers, node, tmp) {
  235. int revents = node->pfd.revents;
  236. if (!node->deleted &&
  237. (revents || event_notifier_get_handle(node->e) == event) &&
  238. node->io_notify) {
  239. node->pfd.revents = 0;
  240. node->io_notify(node->e);
  241. /* aio_notify() does not count as progress */
  242. if (node->e != &ctx->notifier) {
  243. progress = true;
  244. }
  245. }
  246. if (!node->deleted &&
  247. (node->io_read || node->io_write)) {
  248. node->pfd.revents = 0;
  249. if ((revents & G_IO_IN) && node->io_read) {
  250. node->io_read(node->opaque);
  251. progress = true;
  252. }
  253. if ((revents & G_IO_OUT) && node->io_write) {
  254. node->io_write(node->opaque);
  255. progress = true;
  256. }
  257. /* if the next select() will return an event, we have progressed */
  258. if (event == event_notifier_get_handle(&ctx->notifier)) {
  259. WSANETWORKEVENTS ev;
  260. WSAEnumNetworkEvents(node->pfd.fd, event, &ev);
  261. if (ev.lNetworkEvents) {
  262. progress = true;
  263. }
  264. }
  265. }
  266. if (node->deleted) {
  267. if (qemu_lockcnt_dec_if_lock(&ctx->list_lock)) {
  268. QLIST_REMOVE(node, node);
  269. g_free(node);
  270. qemu_lockcnt_inc_and_unlock(&ctx->list_lock);
  271. }
  272. }
  273. }
  274. return progress;
  275. }
  276. void aio_dispatch(AioContext *ctx)
  277. {
  278. qemu_lockcnt_inc(&ctx->list_lock);
  279. aio_bh_poll(ctx);
  280. aio_dispatch_handlers(ctx, INVALID_HANDLE_VALUE);
  281. qemu_lockcnt_dec(&ctx->list_lock);
  282. timerlistgroup_run_timers(&ctx->tlg);
  283. }
  284. bool aio_poll(AioContext *ctx, bool blocking)
  285. {
  286. AioHandler *node;
  287. HANDLE events[MAXIMUM_WAIT_OBJECTS];
  288. bool progress, have_select_revents, first;
  289. unsigned count;
  290. int timeout;
  291. /*
  292. * There cannot be two concurrent aio_poll calls for the same AioContext (or
  293. * an aio_poll concurrent with a GSource prepare/check/dispatch callback).
  294. * We rely on this below to avoid slow locked accesses to ctx->notify_me.
  295. *
  296. * aio_poll() may only be called in the AioContext's thread. iohandler_ctx
  297. * is special in that it runs in the main thread, but that thread's context
  298. * is qemu_aio_context.
  299. */
  300. assert(in_aio_context_home_thread(ctx == iohandler_get_aio_context() ?
  301. qemu_get_aio_context() : ctx));
  302. progress = false;
  303. /* aio_notify can avoid the expensive event_notifier_set if
  304. * everything (file descriptors, bottom halves, timers) will
  305. * be re-evaluated before the next blocking poll(). This is
  306. * already true when aio_poll is called with blocking == false;
  307. * if blocking == true, it is only true after poll() returns,
  308. * so disable the optimization now.
  309. */
  310. if (blocking) {
  311. qatomic_set(&ctx->notify_me, qatomic_read(&ctx->notify_me) + 2);
  312. /*
  313. * Write ctx->notify_me before computing the timeout
  314. * (reading bottom half flags, etc.). Pairs with
  315. * smp_mb in aio_notify().
  316. */
  317. smp_mb();
  318. }
  319. qemu_lockcnt_inc(&ctx->list_lock);
  320. have_select_revents = aio_prepare(ctx);
  321. /* fill fd sets */
  322. count = 0;
  323. QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
  324. if (!node->deleted && node->io_notify
  325. && aio_node_check(ctx, node->is_external)) {
  326. assert(count < MAXIMUM_WAIT_OBJECTS);
  327. events[count++] = event_notifier_get_handle(node->e);
  328. }
  329. }
  330. first = true;
  331. /* ctx->notifier is always registered. */
  332. assert(count > 0);
  333. /* Multiple iterations, all of them non-blocking except the first,
  334. * may be necessary to process all pending events. After the first
  335. * WaitForMultipleObjects call ctx->notify_me will be decremented.
  336. */
  337. do {
  338. HANDLE event;
  339. int ret;
  340. timeout = blocking && !have_select_revents
  341. ? qemu_timeout_ns_to_ms(aio_compute_timeout(ctx)) : 0;
  342. ret = WaitForMultipleObjects(count, events, FALSE, timeout);
  343. if (blocking) {
  344. assert(first);
  345. qatomic_store_release(&ctx->notify_me,
  346. qatomic_read(&ctx->notify_me) - 2);
  347. aio_notify_accept(ctx);
  348. }
  349. if (first) {
  350. progress |= aio_bh_poll(ctx);
  351. first = false;
  352. }
  353. /* if we have any signaled events, dispatch event */
  354. event = NULL;
  355. if ((DWORD) (ret - WAIT_OBJECT_0) < count) {
  356. event = events[ret - WAIT_OBJECT_0];
  357. events[ret - WAIT_OBJECT_0] = events[--count];
  358. } else if (!have_select_revents) {
  359. break;
  360. }
  361. have_select_revents = false;
  362. blocking = false;
  363. progress |= aio_dispatch_handlers(ctx, event);
  364. } while (count > 0);
  365. qemu_lockcnt_dec(&ctx->list_lock);
  366. progress |= timerlistgroup_run_timers(&ctx->tlg);
  367. return progress;
  368. }
  369. void aio_context_setup(AioContext *ctx)
  370. {
  371. }
  372. void aio_context_destroy(AioContext *ctx)
  373. {
  374. }
  375. void aio_context_use_g_source(AioContext *ctx)
  376. {
  377. }
  378. void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
  379. int64_t grow, int64_t shrink, Error **errp)
  380. {
  381. if (max_ns) {
  382. error_setg(errp, "AioContext polling is not implemented on Windows");
  383. }
  384. }
  385. void aio_context_set_aio_params(AioContext *ctx, int64_t max_batch,
  386. Error **errp)
  387. {
  388. }