aio-posix.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789
  1. /*
  2. * QEMU aio implementation
  3. *
  4. * Copyright IBM, Corp. 2008
  5. *
  6. * Authors:
  7. * Anthony Liguori <aliguori@us.ibm.com>
  8. *
  9. * This work is licensed under the terms of the GNU GPL, version 2. See
  10. * the COPYING file in the top-level directory.
  11. *
  12. * Contributions after 2012-01-13 are licensed under the terms of the
  13. * GNU GPL, version 2 or (at your option) any later version.
  14. */
  15. #include "qemu/osdep.h"
  16. #include "block/block.h"
  17. #include "block/thread-pool.h"
  18. #include "qemu/main-loop.h"
  19. #include "qemu/rcu.h"
  20. #include "qemu/rcu_queue.h"
  21. #include "qemu/sockets.h"
  22. #include "qemu/cutils.h"
  23. #include "trace.h"
  24. #include "aio-posix.h"
  25. /* Stop userspace polling on a handler if it isn't active for some time */
  26. #define POLL_IDLE_INTERVAL_NS (7 * NANOSECONDS_PER_SECOND)
  27. bool aio_poll_disabled(AioContext *ctx)
  28. {
  29. return qatomic_read(&ctx->poll_disable_cnt);
  30. }
  31. void aio_add_ready_handler(AioHandlerList *ready_list,
  32. AioHandler *node,
  33. int revents)
  34. {
  35. QLIST_SAFE_REMOVE(node, node_ready); /* remove from nested parent's list */
  36. node->pfd.revents = revents;
  37. QLIST_INSERT_HEAD(ready_list, node, node_ready);
  38. }
  39. static void aio_add_poll_ready_handler(AioHandlerList *ready_list,
  40. AioHandler *node)
  41. {
  42. QLIST_SAFE_REMOVE(node, node_ready); /* remove from nested parent's list */
  43. node->poll_ready = true;
  44. QLIST_INSERT_HEAD(ready_list, node, node_ready);
  45. }
  46. static AioHandler *find_aio_handler(AioContext *ctx, int fd)
  47. {
  48. AioHandler *node;
  49. QLIST_FOREACH(node, &ctx->aio_handlers, node) {
  50. if (node->pfd.fd == fd) {
  51. if (!QLIST_IS_INSERTED(node, node_deleted)) {
  52. return node;
  53. }
  54. }
  55. }
  56. return NULL;
  57. }
  58. static bool aio_remove_fd_handler(AioContext *ctx, AioHandler *node)
  59. {
  60. /* If the GSource is in the process of being destroyed then
  61. * g_source_remove_poll() causes an assertion failure. Skip
  62. * removal in that case, because glib cleans up its state during
  63. * destruction anyway.
  64. */
  65. if (!g_source_is_destroyed(&ctx->source)) {
  66. g_source_remove_poll(&ctx->source, &node->pfd);
  67. }
  68. node->pfd.revents = 0;
  69. node->poll_ready = false;
  70. /* If the fd monitor has already marked it deleted, leave it alone */
  71. if (QLIST_IS_INSERTED(node, node_deleted)) {
  72. return false;
  73. }
  74. /* If a read is in progress, just mark the node as deleted */
  75. if (qemu_lockcnt_count(&ctx->list_lock)) {
  76. QLIST_INSERT_HEAD_RCU(&ctx->deleted_aio_handlers, node, node_deleted);
  77. return false;
  78. }
  79. /* Otherwise, delete it for real. We can't just mark it as
  80. * deleted because deleted nodes are only cleaned up while
  81. * no one is walking the handlers list.
  82. */
  83. QLIST_SAFE_REMOVE(node, node_poll);
  84. QLIST_REMOVE(node, node);
  85. return true;
  86. }
  87. void aio_set_fd_handler(AioContext *ctx,
  88. int fd,
  89. bool is_external,
  90. IOHandler *io_read,
  91. IOHandler *io_write,
  92. AioPollFn *io_poll,
  93. IOHandler *io_poll_ready,
  94. void *opaque)
  95. {
  96. AioHandler *node;
  97. AioHandler *new_node = NULL;
  98. bool is_new = false;
  99. bool deleted = false;
  100. int poll_disable_change;
  101. if (io_poll && !io_poll_ready) {
  102. io_poll = NULL; /* polling only makes sense if there is a handler */
  103. }
  104. qemu_lockcnt_lock(&ctx->list_lock);
  105. node = find_aio_handler(ctx, fd);
  106. /* Are we deleting the fd handler? */
  107. if (!io_read && !io_write && !io_poll) {
  108. if (node == NULL) {
  109. qemu_lockcnt_unlock(&ctx->list_lock);
  110. return;
  111. }
  112. /* Clean events in order to unregister fd from the ctx epoll. */
  113. node->pfd.events = 0;
  114. poll_disable_change = -!node->io_poll;
  115. } else {
  116. poll_disable_change = !io_poll - (node && !node->io_poll);
  117. if (node == NULL) {
  118. is_new = true;
  119. }
  120. /* Alloc and insert if it's not already there */
  121. new_node = g_new0(AioHandler, 1);
  122. /* Update handler with latest information */
  123. new_node->io_read = io_read;
  124. new_node->io_write = io_write;
  125. new_node->io_poll = io_poll;
  126. new_node->io_poll_ready = io_poll_ready;
  127. new_node->opaque = opaque;
  128. new_node->is_external = is_external;
  129. if (is_new) {
  130. new_node->pfd.fd = fd;
  131. } else {
  132. new_node->pfd = node->pfd;
  133. }
  134. g_source_add_poll(&ctx->source, &new_node->pfd);
  135. new_node->pfd.events = (io_read ? G_IO_IN | G_IO_HUP | G_IO_ERR : 0);
  136. new_node->pfd.events |= (io_write ? G_IO_OUT | G_IO_ERR : 0);
  137. QLIST_INSERT_HEAD_RCU(&ctx->aio_handlers, new_node, node);
  138. }
  139. /* No need to order poll_disable_cnt writes against other updates;
  140. * the counter is only used to avoid wasting time and latency on
  141. * iterated polling when the system call will be ultimately necessary.
  142. * Changing handlers is a rare event, and a little wasted polling until
  143. * the aio_notify below is not an issue.
  144. */
  145. qatomic_set(&ctx->poll_disable_cnt,
  146. qatomic_read(&ctx->poll_disable_cnt) + poll_disable_change);
  147. ctx->fdmon_ops->update(ctx, node, new_node);
  148. if (node) {
  149. deleted = aio_remove_fd_handler(ctx, node);
  150. }
  151. qemu_lockcnt_unlock(&ctx->list_lock);
  152. aio_notify(ctx);
  153. if (deleted) {
  154. g_free(node);
  155. }
  156. }
  157. static void aio_set_fd_poll(AioContext *ctx, int fd,
  158. IOHandler *io_poll_begin,
  159. IOHandler *io_poll_end)
  160. {
  161. AioHandler *node = find_aio_handler(ctx, fd);
  162. if (!node) {
  163. return;
  164. }
  165. node->io_poll_begin = io_poll_begin;
  166. node->io_poll_end = io_poll_end;
  167. }
  168. void aio_set_event_notifier(AioContext *ctx,
  169. EventNotifier *notifier,
  170. bool is_external,
  171. EventNotifierHandler *io_read,
  172. AioPollFn *io_poll,
  173. EventNotifierHandler *io_poll_ready)
  174. {
  175. aio_set_fd_handler(ctx, event_notifier_get_fd(notifier), is_external,
  176. (IOHandler *)io_read, NULL, io_poll,
  177. (IOHandler *)io_poll_ready, notifier);
  178. }
  179. void aio_set_event_notifier_poll(AioContext *ctx,
  180. EventNotifier *notifier,
  181. EventNotifierHandler *io_poll_begin,
  182. EventNotifierHandler *io_poll_end)
  183. {
  184. aio_set_fd_poll(ctx, event_notifier_get_fd(notifier),
  185. (IOHandler *)io_poll_begin,
  186. (IOHandler *)io_poll_end);
  187. }
  188. static bool poll_set_started(AioContext *ctx, AioHandlerList *ready_list,
  189. bool started)
  190. {
  191. AioHandler *node;
  192. bool progress = false;
  193. if (started == ctx->poll_started) {
  194. return false;
  195. }
  196. ctx->poll_started = started;
  197. qemu_lockcnt_inc(&ctx->list_lock);
  198. QLIST_FOREACH(node, &ctx->poll_aio_handlers, node_poll) {
  199. IOHandler *fn;
  200. if (QLIST_IS_INSERTED(node, node_deleted)) {
  201. continue;
  202. }
  203. if (started) {
  204. fn = node->io_poll_begin;
  205. } else {
  206. fn = node->io_poll_end;
  207. }
  208. if (fn) {
  209. fn(node->opaque);
  210. }
  211. /* Poll one last time in case ->io_poll_end() raced with the event */
  212. if (!started && node->io_poll(node->opaque)) {
  213. aio_add_poll_ready_handler(ready_list, node);
  214. progress = true;
  215. }
  216. }
  217. qemu_lockcnt_dec(&ctx->list_lock);
  218. return progress;
  219. }
  220. bool aio_prepare(AioContext *ctx)
  221. {
  222. AioHandlerList ready_list = QLIST_HEAD_INITIALIZER(ready_list);
  223. /* Poll mode cannot be used with glib's event loop, disable it. */
  224. poll_set_started(ctx, &ready_list, false);
  225. /* TODO what to do with this list? */
  226. return false;
  227. }
  228. bool aio_pending(AioContext *ctx)
  229. {
  230. AioHandler *node;
  231. bool result = false;
  232. /*
  233. * We have to walk very carefully in case aio_set_fd_handler is
  234. * called while we're walking.
  235. */
  236. qemu_lockcnt_inc(&ctx->list_lock);
  237. QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
  238. int revents;
  239. /* TODO should this check poll ready? */
  240. revents = node->pfd.revents & node->pfd.events;
  241. if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read &&
  242. aio_node_check(ctx, node->is_external)) {
  243. result = true;
  244. break;
  245. }
  246. if (revents & (G_IO_OUT | G_IO_ERR) && node->io_write &&
  247. aio_node_check(ctx, node->is_external)) {
  248. result = true;
  249. break;
  250. }
  251. }
  252. qemu_lockcnt_dec(&ctx->list_lock);
  253. return result;
  254. }
  255. static void aio_free_deleted_handlers(AioContext *ctx)
  256. {
  257. AioHandler *node;
  258. if (QLIST_EMPTY_RCU(&ctx->deleted_aio_handlers)) {
  259. return;
  260. }
  261. if (!qemu_lockcnt_dec_if_lock(&ctx->list_lock)) {
  262. return; /* we are nested, let the parent do the freeing */
  263. }
  264. while ((node = QLIST_FIRST_RCU(&ctx->deleted_aio_handlers))) {
  265. QLIST_REMOVE(node, node);
  266. QLIST_REMOVE(node, node_deleted);
  267. QLIST_SAFE_REMOVE(node, node_poll);
  268. g_free(node);
  269. }
  270. qemu_lockcnt_inc_and_unlock(&ctx->list_lock);
  271. }
  272. static bool aio_dispatch_handler(AioContext *ctx, AioHandler *node)
  273. {
  274. bool progress = false;
  275. bool poll_ready;
  276. int revents;
  277. revents = node->pfd.revents & node->pfd.events;
  278. node->pfd.revents = 0;
  279. poll_ready = node->poll_ready;
  280. node->poll_ready = false;
  281. /*
  282. * Start polling AioHandlers when they become ready because activity is
  283. * likely to continue. Note that starvation is theoretically possible when
  284. * fdmon_supports_polling(), but only until the fd fires for the first
  285. * time.
  286. */
  287. if (!QLIST_IS_INSERTED(node, node_deleted) &&
  288. !QLIST_IS_INSERTED(node, node_poll) &&
  289. node->io_poll) {
  290. trace_poll_add(ctx, node, node->pfd.fd, revents);
  291. if (ctx->poll_started && node->io_poll_begin) {
  292. node->io_poll_begin(node->opaque);
  293. }
  294. QLIST_INSERT_HEAD(&ctx->poll_aio_handlers, node, node_poll);
  295. }
  296. if (!QLIST_IS_INSERTED(node, node_deleted) &&
  297. poll_ready && revents == 0 &&
  298. aio_node_check(ctx, node->is_external) &&
  299. node->io_poll_ready) {
  300. node->io_poll_ready(node->opaque);
  301. /*
  302. * Return early since revents was zero. aio_notify() does not count as
  303. * progress.
  304. */
  305. return node->opaque != &ctx->notifier;
  306. }
  307. if (!QLIST_IS_INSERTED(node, node_deleted) &&
  308. (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR)) &&
  309. aio_node_check(ctx, node->is_external) &&
  310. node->io_read) {
  311. node->io_read(node->opaque);
  312. /* aio_notify() does not count as progress */
  313. if (node->opaque != &ctx->notifier) {
  314. progress = true;
  315. }
  316. }
  317. if (!QLIST_IS_INSERTED(node, node_deleted) &&
  318. (revents & (G_IO_OUT | G_IO_ERR)) &&
  319. aio_node_check(ctx, node->is_external) &&
  320. node->io_write) {
  321. node->io_write(node->opaque);
  322. progress = true;
  323. }
  324. return progress;
  325. }
  326. /*
  327. * If we have a list of ready handlers then this is more efficient than
  328. * scanning all handlers with aio_dispatch_handlers().
  329. */
  330. static bool aio_dispatch_ready_handlers(AioContext *ctx,
  331. AioHandlerList *ready_list)
  332. {
  333. bool progress = false;
  334. AioHandler *node;
  335. while ((node = QLIST_FIRST(ready_list))) {
  336. QLIST_REMOVE(node, node_ready);
  337. progress = aio_dispatch_handler(ctx, node) || progress;
  338. }
  339. return progress;
  340. }
  341. /* Slower than aio_dispatch_ready_handlers() but only used via glib */
  342. static bool aio_dispatch_handlers(AioContext *ctx)
  343. {
  344. AioHandler *node, *tmp;
  345. bool progress = false;
  346. QLIST_FOREACH_SAFE_RCU(node, &ctx->aio_handlers, node, tmp) {
  347. progress = aio_dispatch_handler(ctx, node) || progress;
  348. }
  349. return progress;
  350. }
  351. void aio_dispatch(AioContext *ctx)
  352. {
  353. qemu_lockcnt_inc(&ctx->list_lock);
  354. aio_bh_poll(ctx);
  355. aio_dispatch_handlers(ctx);
  356. aio_free_deleted_handlers(ctx);
  357. qemu_lockcnt_dec(&ctx->list_lock);
  358. timerlistgroup_run_timers(&ctx->tlg);
  359. }
  360. static bool run_poll_handlers_once(AioContext *ctx,
  361. AioHandlerList *ready_list,
  362. int64_t now,
  363. int64_t *timeout)
  364. {
  365. bool progress = false;
  366. AioHandler *node;
  367. AioHandler *tmp;
  368. QLIST_FOREACH_SAFE(node, &ctx->poll_aio_handlers, node_poll, tmp) {
  369. if (aio_node_check(ctx, node->is_external) &&
  370. node->io_poll(node->opaque)) {
  371. aio_add_poll_ready_handler(ready_list, node);
  372. node->poll_idle_timeout = now + POLL_IDLE_INTERVAL_NS;
  373. /*
  374. * Polling was successful, exit try_poll_mode immediately
  375. * to adjust the next polling time.
  376. */
  377. *timeout = 0;
  378. if (node->opaque != &ctx->notifier) {
  379. progress = true;
  380. }
  381. }
  382. /* Caller handles freeing deleted nodes. Don't do it here. */
  383. }
  384. return progress;
  385. }
  386. static bool fdmon_supports_polling(AioContext *ctx)
  387. {
  388. return ctx->fdmon_ops->need_wait != aio_poll_disabled;
  389. }
  390. static bool remove_idle_poll_handlers(AioContext *ctx,
  391. AioHandlerList *ready_list,
  392. int64_t now)
  393. {
  394. AioHandler *node;
  395. AioHandler *tmp;
  396. bool progress = false;
  397. /*
  398. * File descriptor monitoring implementations without userspace polling
  399. * support suffer from starvation when a subset of handlers is polled
  400. * because fds will not be processed in a timely fashion. Don't remove
  401. * idle poll handlers.
  402. */
  403. if (!fdmon_supports_polling(ctx)) {
  404. return false;
  405. }
  406. QLIST_FOREACH_SAFE(node, &ctx->poll_aio_handlers, node_poll, tmp) {
  407. if (node->poll_idle_timeout == 0LL) {
  408. node->poll_idle_timeout = now + POLL_IDLE_INTERVAL_NS;
  409. } else if (now >= node->poll_idle_timeout) {
  410. trace_poll_remove(ctx, node, node->pfd.fd);
  411. node->poll_idle_timeout = 0LL;
  412. QLIST_SAFE_REMOVE(node, node_poll);
  413. if (ctx->poll_started && node->io_poll_end) {
  414. node->io_poll_end(node->opaque);
  415. /*
  416. * Final poll in case ->io_poll_end() races with an event.
  417. * Nevermind about re-adding the handler in the rare case where
  418. * this causes progress.
  419. */
  420. if (node->io_poll(node->opaque)) {
  421. aio_add_poll_ready_handler(ready_list, node);
  422. progress = true;
  423. }
  424. }
  425. }
  426. }
  427. return progress;
  428. }
  429. /* run_poll_handlers:
  430. * @ctx: the AioContext
  431. * @ready_list: the list to place ready handlers on
  432. * @max_ns: maximum time to poll for, in nanoseconds
  433. *
  434. * Polls for a given time.
  435. *
  436. * Note that the caller must have incremented ctx->list_lock.
  437. *
  438. * Returns: true if progress was made, false otherwise
  439. */
  440. static bool run_poll_handlers(AioContext *ctx, AioHandlerList *ready_list,
  441. int64_t max_ns, int64_t *timeout)
  442. {
  443. bool progress;
  444. int64_t start_time, elapsed_time;
  445. assert(qemu_lockcnt_count(&ctx->list_lock) > 0);
  446. trace_run_poll_handlers_begin(ctx, max_ns, *timeout);
  447. /*
  448. * Optimization: ->io_poll() handlers often contain RCU read critical
  449. * sections and we therefore see many rcu_read_lock() -> rcu_read_unlock()
  450. * -> rcu_read_lock() -> ... sequences with expensive memory
  451. * synchronization primitives. Make the entire polling loop an RCU
  452. * critical section because nested rcu_read_lock()/rcu_read_unlock() calls
  453. * are cheap.
  454. */
  455. RCU_READ_LOCK_GUARD();
  456. start_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
  457. do {
  458. progress = run_poll_handlers_once(ctx, ready_list,
  459. start_time, timeout);
  460. elapsed_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start_time;
  461. max_ns = qemu_soonest_timeout(*timeout, max_ns);
  462. assert(!(max_ns && progress));
  463. } while (elapsed_time < max_ns && !ctx->fdmon_ops->need_wait(ctx));
  464. if (remove_idle_poll_handlers(ctx, ready_list,
  465. start_time + elapsed_time)) {
  466. *timeout = 0;
  467. progress = true;
  468. }
  469. /* If time has passed with no successful polling, adjust *timeout to
  470. * keep the same ending time.
  471. */
  472. if (*timeout != -1) {
  473. *timeout -= MIN(*timeout, elapsed_time);
  474. }
  475. trace_run_poll_handlers_end(ctx, progress, *timeout);
  476. return progress;
  477. }
  478. /* try_poll_mode:
  479. * @ctx: the AioContext
  480. * @ready_list: list to add handlers that need to be run
  481. * @timeout: timeout for blocking wait, computed by the caller and updated if
  482. * polling succeeds.
  483. *
  484. * Note that the caller must have incremented ctx->list_lock.
  485. *
  486. * Returns: true if progress was made, false otherwise
  487. */
  488. static bool try_poll_mode(AioContext *ctx, AioHandlerList *ready_list,
  489. int64_t *timeout)
  490. {
  491. int64_t max_ns;
  492. if (QLIST_EMPTY_RCU(&ctx->poll_aio_handlers)) {
  493. return false;
  494. }
  495. max_ns = qemu_soonest_timeout(*timeout, ctx->poll_ns);
  496. if (max_ns && !ctx->fdmon_ops->need_wait(ctx)) {
  497. /*
  498. * Enable poll mode. It pairs with the poll_set_started() in
  499. * aio_poll() which disables poll mode.
  500. */
  501. poll_set_started(ctx, ready_list, true);
  502. if (run_poll_handlers(ctx, ready_list, max_ns, timeout)) {
  503. return true;
  504. }
  505. }
  506. return false;
  507. }
  508. bool aio_poll(AioContext *ctx, bool blocking)
  509. {
  510. AioHandlerList ready_list = QLIST_HEAD_INITIALIZER(ready_list);
  511. bool progress;
  512. bool use_notify_me;
  513. int64_t timeout;
  514. int64_t start = 0;
  515. /*
  516. * There cannot be two concurrent aio_poll calls for the same AioContext (or
  517. * an aio_poll concurrent with a GSource prepare/check/dispatch callback).
  518. * We rely on this below to avoid slow locked accesses to ctx->notify_me.
  519. *
  520. * aio_poll() may only be called in the AioContext's thread. iohandler_ctx
  521. * is special in that it runs in the main thread, but that thread's context
  522. * is qemu_aio_context.
  523. */
  524. assert(in_aio_context_home_thread(ctx == iohandler_get_aio_context() ?
  525. qemu_get_aio_context() : ctx));
  526. qemu_lockcnt_inc(&ctx->list_lock);
  527. if (ctx->poll_max_ns) {
  528. start = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
  529. }
  530. timeout = blocking ? aio_compute_timeout(ctx) : 0;
  531. progress = try_poll_mode(ctx, &ready_list, &timeout);
  532. assert(!(timeout && progress));
  533. /*
  534. * aio_notify can avoid the expensive event_notifier_set if
  535. * everything (file descriptors, bottom halves, timers) will
  536. * be re-evaluated before the next blocking poll(). This is
  537. * already true when aio_poll is called with blocking == false;
  538. * if blocking == true, it is only true after poll() returns,
  539. * so disable the optimization now.
  540. */
  541. use_notify_me = timeout != 0;
  542. if (use_notify_me) {
  543. qatomic_set(&ctx->notify_me, qatomic_read(&ctx->notify_me) + 2);
  544. /*
  545. * Write ctx->notify_me before reading ctx->notified. Pairs with
  546. * smp_mb in aio_notify().
  547. */
  548. smp_mb();
  549. /* Don't block if aio_notify() was called */
  550. if (qatomic_read(&ctx->notified)) {
  551. timeout = 0;
  552. }
  553. }
  554. /* If polling is allowed, non-blocking aio_poll does not need the
  555. * system call---a single round of run_poll_handlers_once suffices.
  556. */
  557. if (timeout || ctx->fdmon_ops->need_wait(ctx)) {
  558. /*
  559. * Disable poll mode. poll mode should be disabled before the call
  560. * of ctx->fdmon_ops->wait() so that guest's notification can wake
  561. * up IO threads when some work becomes pending. It is essential to
  562. * avoid hangs or unnecessary latency.
  563. */
  564. if (poll_set_started(ctx, &ready_list, false)) {
  565. timeout = 0;
  566. progress = true;
  567. }
  568. ctx->fdmon_ops->wait(ctx, &ready_list, timeout);
  569. }
  570. if (use_notify_me) {
  571. /* Finish the poll before clearing the flag. */
  572. qatomic_store_release(&ctx->notify_me,
  573. qatomic_read(&ctx->notify_me) - 2);
  574. }
  575. aio_notify_accept(ctx);
  576. /* Adjust polling time */
  577. if (ctx->poll_max_ns) {
  578. int64_t block_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start;
  579. if (block_ns <= ctx->poll_ns) {
  580. /* This is the sweet spot, no adjustment needed */
  581. } else if (block_ns > ctx->poll_max_ns) {
  582. /* We'd have to poll for too long, poll less */
  583. int64_t old = ctx->poll_ns;
  584. if (ctx->poll_shrink) {
  585. ctx->poll_ns /= ctx->poll_shrink;
  586. } else {
  587. ctx->poll_ns = 0;
  588. }
  589. trace_poll_shrink(ctx, old, ctx->poll_ns);
  590. } else if (ctx->poll_ns < ctx->poll_max_ns &&
  591. block_ns < ctx->poll_max_ns) {
  592. /* There is room to grow, poll longer */
  593. int64_t old = ctx->poll_ns;
  594. int64_t grow = ctx->poll_grow;
  595. if (grow == 0) {
  596. grow = 2;
  597. }
  598. if (ctx->poll_ns) {
  599. ctx->poll_ns *= grow;
  600. } else {
  601. ctx->poll_ns = 4000; /* start polling at 4 microseconds */
  602. }
  603. if (ctx->poll_ns > ctx->poll_max_ns) {
  604. ctx->poll_ns = ctx->poll_max_ns;
  605. }
  606. trace_poll_grow(ctx, old, ctx->poll_ns);
  607. }
  608. }
  609. progress |= aio_bh_poll(ctx);
  610. progress |= aio_dispatch_ready_handlers(ctx, &ready_list);
  611. aio_free_deleted_handlers(ctx);
  612. qemu_lockcnt_dec(&ctx->list_lock);
  613. progress |= timerlistgroup_run_timers(&ctx->tlg);
  614. return progress;
  615. }
  616. void aio_context_setup(AioContext *ctx)
  617. {
  618. ctx->fdmon_ops = &fdmon_poll_ops;
  619. ctx->epollfd = -1;
  620. /* Use the fastest fd monitoring implementation if available */
  621. if (fdmon_io_uring_setup(ctx)) {
  622. return;
  623. }
  624. fdmon_epoll_setup(ctx);
  625. }
  626. void aio_context_destroy(AioContext *ctx)
  627. {
  628. fdmon_io_uring_destroy(ctx);
  629. fdmon_epoll_disable(ctx);
  630. aio_free_deleted_handlers(ctx);
  631. }
  632. void aio_context_use_g_source(AioContext *ctx)
  633. {
  634. /*
  635. * Disable io_uring when the glib main loop is used because it doesn't
  636. * support mixed glib/aio_poll() usage. It relies on aio_poll() being
  637. * called regularly so that changes to the monitored file descriptors are
  638. * submitted, otherwise a list of pending fd handlers builds up.
  639. */
  640. fdmon_io_uring_destroy(ctx);
  641. aio_free_deleted_handlers(ctx);
  642. }
  643. void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
  644. int64_t grow, int64_t shrink, Error **errp)
  645. {
  646. /* No thread synchronization here, it doesn't matter if an incorrect value
  647. * is used once.
  648. */
  649. ctx->poll_max_ns = max_ns;
  650. ctx->poll_ns = 0;
  651. ctx->poll_grow = grow;
  652. ctx->poll_shrink = shrink;
  653. aio_notify(ctx);
  654. }
  655. void aio_context_set_aio_params(AioContext *ctx, int64_t max_batch,
  656. Error **errp)
  657. {
  658. /*
  659. * No thread synchronization here, it doesn't matter if an incorrect value
  660. * is used once.
  661. */
  662. ctx->aio_max_batch = max_batch;
  663. aio_notify(ctx);
  664. }