async.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760
  1. /*
  2. * Data plane event loop
  3. *
  4. * Copyright (c) 2003-2008 Fabrice Bellard
  5. * Copyright (c) 2009-2017 QEMU contributors
  6. *
  7. * Permission is hereby granted, free of charge, to any person obtaining a copy
  8. * of this software and associated documentation files (the "Software"), to deal
  9. * in the Software without restriction, including without limitation the rights
  10. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11. * copies of the Software, and to permit persons to whom the Software is
  12. * furnished to do so, subject to the following conditions:
  13. *
  14. * The above copyright notice and this permission notice shall be included in
  15. * all copies or substantial portions of the Software.
  16. *
  17. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23. * THE SOFTWARE.
  24. */
  25. #include "qemu/osdep.h"
  26. #include "qapi/error.h"
  27. #include "block/aio.h"
  28. #include "block/thread-pool.h"
  29. #include "block/graph-lock.h"
  30. #include "qemu/main-loop.h"
  31. #include "qemu/atomic.h"
  32. #include "qemu/rcu_queue.h"
  33. #include "block/raw-aio.h"
  34. #include "qemu/coroutine_int.h"
  35. #include "qemu/coroutine-tls.h"
  36. #include "sysemu/cpu-timers.h"
  37. #include "trace.h"
  38. /***********************************************************/
  39. /* bottom halves (can be seen as timers which expire ASAP) */
  40. /* QEMUBH::flags values */
  41. enum {
  42. /* Already enqueued and waiting for aio_bh_poll() */
  43. BH_PENDING = (1 << 0),
  44. /* Invoke the callback */
  45. BH_SCHEDULED = (1 << 1),
  46. /* Delete without invoking callback */
  47. BH_DELETED = (1 << 2),
  48. /* Delete after invoking callback */
  49. BH_ONESHOT = (1 << 3),
  50. /* Schedule periodically when the event loop is idle */
  51. BH_IDLE = (1 << 4),
  52. };
  53. struct QEMUBH {
  54. AioContext *ctx;
  55. const char *name;
  56. QEMUBHFunc *cb;
  57. void *opaque;
  58. QSLIST_ENTRY(QEMUBH) next;
  59. unsigned flags;
  60. MemReentrancyGuard *reentrancy_guard;
  61. };
  62. /* Called concurrently from any thread */
  63. static void aio_bh_enqueue(QEMUBH *bh, unsigned new_flags)
  64. {
  65. AioContext *ctx = bh->ctx;
  66. unsigned old_flags;
  67. /*
  68. * Synchronizes with atomic_fetch_and() in aio_bh_dequeue(), ensuring that
  69. * insertion starts after BH_PENDING is set.
  70. */
  71. old_flags = qatomic_fetch_or(&bh->flags, BH_PENDING | new_flags);
  72. if (!(old_flags & BH_PENDING)) {
  73. /*
  74. * At this point the bottom half becomes visible to aio_bh_poll().
  75. * This insertion thus synchronizes with QSLIST_MOVE_ATOMIC in
  76. * aio_bh_poll(), ensuring that:
  77. * 1. any writes needed by the callback are visible from the callback
  78. * after aio_bh_dequeue() returns bh.
  79. * 2. ctx is loaded before the callback has a chance to execute and bh
  80. * could be freed.
  81. */
  82. QSLIST_INSERT_HEAD_ATOMIC(&ctx->bh_list, bh, next);
  83. }
  84. aio_notify(ctx);
  85. if (unlikely(icount_enabled())) {
  86. /*
  87. * Workaround for record/replay.
  88. * vCPU execution should be suspended when new BH is set.
  89. * This is needed to avoid guest timeouts caused
  90. * by the long cycles of the execution.
  91. */
  92. icount_notify_exit();
  93. }
  94. }
  95. /* Only called from aio_bh_poll() and aio_ctx_finalize() */
  96. static QEMUBH *aio_bh_dequeue(BHList *head, unsigned *flags)
  97. {
  98. QEMUBH *bh = QSLIST_FIRST_RCU(head);
  99. if (!bh) {
  100. return NULL;
  101. }
  102. QSLIST_REMOVE_HEAD(head, next);
  103. /*
  104. * Synchronizes with qatomic_fetch_or() in aio_bh_enqueue(), ensuring that
  105. * the removal finishes before BH_PENDING is reset.
  106. */
  107. *flags = qatomic_fetch_and(&bh->flags,
  108. ~(BH_PENDING | BH_SCHEDULED | BH_IDLE));
  109. return bh;
  110. }
  111. void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb,
  112. void *opaque, const char *name)
  113. {
  114. QEMUBH *bh;
  115. bh = g_new(QEMUBH, 1);
  116. *bh = (QEMUBH){
  117. .ctx = ctx,
  118. .cb = cb,
  119. .opaque = opaque,
  120. .name = name,
  121. };
  122. aio_bh_enqueue(bh, BH_SCHEDULED | BH_ONESHOT);
  123. }
  124. QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
  125. const char *name, MemReentrancyGuard *reentrancy_guard)
  126. {
  127. QEMUBH *bh;
  128. bh = g_new(QEMUBH, 1);
  129. *bh = (QEMUBH){
  130. .ctx = ctx,
  131. .cb = cb,
  132. .opaque = opaque,
  133. .name = name,
  134. .reentrancy_guard = reentrancy_guard,
  135. };
  136. return bh;
  137. }
  138. void aio_bh_call(QEMUBH *bh)
  139. {
  140. bool last_engaged_in_io = false;
  141. /* Make a copy of the guard-pointer as cb may free the bh */
  142. MemReentrancyGuard *reentrancy_guard = bh->reentrancy_guard;
  143. if (reentrancy_guard) {
  144. last_engaged_in_io = reentrancy_guard->engaged_in_io;
  145. if (reentrancy_guard->engaged_in_io) {
  146. trace_reentrant_aio(bh->ctx, bh->name);
  147. }
  148. reentrancy_guard->engaged_in_io = true;
  149. }
  150. bh->cb(bh->opaque);
  151. if (reentrancy_guard) {
  152. reentrancy_guard->engaged_in_io = last_engaged_in_io;
  153. }
  154. }
  155. /* Multiple occurrences of aio_bh_poll cannot be called concurrently. */
  156. int aio_bh_poll(AioContext *ctx)
  157. {
  158. BHListSlice slice;
  159. BHListSlice *s;
  160. int ret = 0;
  161. /* Synchronizes with QSLIST_INSERT_HEAD_ATOMIC in aio_bh_enqueue(). */
  162. QSLIST_MOVE_ATOMIC(&slice.bh_list, &ctx->bh_list);
  163. /*
  164. * GCC13 [-Werror=dangling-pointer=] complains that the local variable
  165. * 'slice' is being stored in the global 'ctx->bh_slice_list' but the
  166. * list is emptied before this function returns.
  167. */
  168. #if !defined(__clang__)
  169. #pragma GCC diagnostic push
  170. #pragma GCC diagnostic ignored "-Wpragmas"
  171. #pragma GCC diagnostic ignored "-Wdangling-pointer="
  172. #endif
  173. QSIMPLEQ_INSERT_TAIL(&ctx->bh_slice_list, &slice, next);
  174. #if !defined(__clang__)
  175. #pragma GCC diagnostic pop
  176. #endif
  177. while ((s = QSIMPLEQ_FIRST(&ctx->bh_slice_list))) {
  178. QEMUBH *bh;
  179. unsigned flags;
  180. bh = aio_bh_dequeue(&s->bh_list, &flags);
  181. if (!bh) {
  182. QSIMPLEQ_REMOVE_HEAD(&ctx->bh_slice_list, next);
  183. continue;
  184. }
  185. if ((flags & (BH_SCHEDULED | BH_DELETED)) == BH_SCHEDULED) {
  186. /* Idle BHs don't count as progress */
  187. if (!(flags & BH_IDLE)) {
  188. ret = 1;
  189. }
  190. aio_bh_call(bh);
  191. }
  192. if (flags & (BH_DELETED | BH_ONESHOT)) {
  193. g_free(bh);
  194. }
  195. }
  196. return ret;
  197. }
  198. void qemu_bh_schedule_idle(QEMUBH *bh)
  199. {
  200. aio_bh_enqueue(bh, BH_SCHEDULED | BH_IDLE);
  201. }
  202. void qemu_bh_schedule(QEMUBH *bh)
  203. {
  204. aio_bh_enqueue(bh, BH_SCHEDULED);
  205. }
  206. /* This func is async.
  207. */
  208. void qemu_bh_cancel(QEMUBH *bh)
  209. {
  210. qatomic_and(&bh->flags, ~BH_SCHEDULED);
  211. }
  212. /* This func is async.The bottom half will do the delete action at the finial
  213. * end.
  214. */
  215. void qemu_bh_delete(QEMUBH *bh)
  216. {
  217. aio_bh_enqueue(bh, BH_DELETED);
  218. }
  219. static int64_t aio_compute_bh_timeout(BHList *head, int timeout)
  220. {
  221. QEMUBH *bh;
  222. QSLIST_FOREACH_RCU(bh, head, next) {
  223. if ((bh->flags & (BH_SCHEDULED | BH_DELETED)) == BH_SCHEDULED) {
  224. if (bh->flags & BH_IDLE) {
  225. /* idle bottom halves will be polled at least
  226. * every 10ms */
  227. timeout = 10000000;
  228. } else {
  229. /* non-idle bottom halves will be executed
  230. * immediately */
  231. return 0;
  232. }
  233. }
  234. }
  235. return timeout;
  236. }
  237. int64_t
  238. aio_compute_timeout(AioContext *ctx)
  239. {
  240. BHListSlice *s;
  241. int64_t deadline;
  242. int timeout = -1;
  243. timeout = aio_compute_bh_timeout(&ctx->bh_list, timeout);
  244. if (timeout == 0) {
  245. return 0;
  246. }
  247. QSIMPLEQ_FOREACH(s, &ctx->bh_slice_list, next) {
  248. timeout = aio_compute_bh_timeout(&s->bh_list, timeout);
  249. if (timeout == 0) {
  250. return 0;
  251. }
  252. }
  253. deadline = timerlistgroup_deadline_ns(&ctx->tlg);
  254. if (deadline == 0) {
  255. return 0;
  256. } else {
  257. return qemu_soonest_timeout(timeout, deadline);
  258. }
  259. }
  260. static gboolean
  261. aio_ctx_prepare(GSource *source, gint *timeout)
  262. {
  263. AioContext *ctx = (AioContext *) source;
  264. qatomic_set(&ctx->notify_me, qatomic_read(&ctx->notify_me) | 1);
  265. /*
  266. * Write ctx->notify_me before computing the timeout
  267. * (reading bottom half flags, etc.). Pairs with
  268. * smp_mb in aio_notify().
  269. */
  270. smp_mb();
  271. /* We assume there is no timeout already supplied */
  272. *timeout = qemu_timeout_ns_to_ms(aio_compute_timeout(ctx));
  273. if (aio_prepare(ctx)) {
  274. *timeout = 0;
  275. }
  276. return *timeout == 0;
  277. }
  278. static gboolean
  279. aio_ctx_check(GSource *source)
  280. {
  281. AioContext *ctx = (AioContext *) source;
  282. QEMUBH *bh;
  283. BHListSlice *s;
  284. /* Finish computing the timeout before clearing the flag. */
  285. qatomic_store_release(&ctx->notify_me, qatomic_read(&ctx->notify_me) & ~1);
  286. aio_notify_accept(ctx);
  287. QSLIST_FOREACH_RCU(bh, &ctx->bh_list, next) {
  288. if ((bh->flags & (BH_SCHEDULED | BH_DELETED)) == BH_SCHEDULED) {
  289. return true;
  290. }
  291. }
  292. QSIMPLEQ_FOREACH(s, &ctx->bh_slice_list, next) {
  293. QSLIST_FOREACH_RCU(bh, &s->bh_list, next) {
  294. if ((bh->flags & (BH_SCHEDULED | BH_DELETED)) == BH_SCHEDULED) {
  295. return true;
  296. }
  297. }
  298. }
  299. return aio_pending(ctx) || (timerlistgroup_deadline_ns(&ctx->tlg) == 0);
  300. }
  301. static gboolean
  302. aio_ctx_dispatch(GSource *source,
  303. GSourceFunc callback,
  304. gpointer user_data)
  305. {
  306. AioContext *ctx = (AioContext *) source;
  307. assert(callback == NULL);
  308. aio_dispatch(ctx);
  309. return true;
  310. }
  311. static void
  312. aio_ctx_finalize(GSource *source)
  313. {
  314. AioContext *ctx = (AioContext *) source;
  315. QEMUBH *bh;
  316. unsigned flags;
  317. thread_pool_free(ctx->thread_pool);
  318. #ifdef CONFIG_LINUX_AIO
  319. if (ctx->linux_aio) {
  320. laio_detach_aio_context(ctx->linux_aio, ctx);
  321. laio_cleanup(ctx->linux_aio);
  322. ctx->linux_aio = NULL;
  323. }
  324. #endif
  325. #ifdef CONFIG_LINUX_IO_URING
  326. if (ctx->linux_io_uring) {
  327. luring_detach_aio_context(ctx->linux_io_uring, ctx);
  328. luring_cleanup(ctx->linux_io_uring);
  329. ctx->linux_io_uring = NULL;
  330. }
  331. #endif
  332. assert(QSLIST_EMPTY(&ctx->scheduled_coroutines));
  333. qemu_bh_delete(ctx->co_schedule_bh);
  334. /* There must be no aio_bh_poll() calls going on */
  335. assert(QSIMPLEQ_EMPTY(&ctx->bh_slice_list));
  336. while ((bh = aio_bh_dequeue(&ctx->bh_list, &flags))) {
  337. /*
  338. * qemu_bh_delete() must have been called on BHs in this AioContext. In
  339. * many cases memory leaks, hangs, or inconsistent state occur when a
  340. * BH is leaked because something still expects it to run.
  341. *
  342. * If you hit this, fix the lifecycle of the BH so that
  343. * qemu_bh_delete() and any associated cleanup is called before the
  344. * AioContext is finalized.
  345. */
  346. if (unlikely(!(flags & BH_DELETED))) {
  347. fprintf(stderr, "%s: BH '%s' leaked, aborting...\n",
  348. __func__, bh->name);
  349. abort();
  350. }
  351. g_free(bh);
  352. }
  353. aio_set_event_notifier(ctx, &ctx->notifier, NULL, NULL, NULL);
  354. event_notifier_cleanup(&ctx->notifier);
  355. qemu_rec_mutex_destroy(&ctx->lock);
  356. qemu_lockcnt_destroy(&ctx->list_lock);
  357. timerlistgroup_deinit(&ctx->tlg);
  358. unregister_aiocontext(ctx);
  359. aio_context_destroy(ctx);
  360. }
  361. static GSourceFuncs aio_source_funcs = {
  362. aio_ctx_prepare,
  363. aio_ctx_check,
  364. aio_ctx_dispatch,
  365. aio_ctx_finalize
  366. };
  367. GSource *aio_get_g_source(AioContext *ctx)
  368. {
  369. aio_context_use_g_source(ctx);
  370. g_source_ref(&ctx->source);
  371. return &ctx->source;
  372. }
  373. ThreadPool *aio_get_thread_pool(AioContext *ctx)
  374. {
  375. if (!ctx->thread_pool) {
  376. ctx->thread_pool = thread_pool_new(ctx);
  377. }
  378. return ctx->thread_pool;
  379. }
  380. #ifdef CONFIG_LINUX_AIO
  381. LinuxAioState *aio_setup_linux_aio(AioContext *ctx, Error **errp)
  382. {
  383. if (!ctx->linux_aio) {
  384. ctx->linux_aio = laio_init(errp);
  385. if (ctx->linux_aio) {
  386. laio_attach_aio_context(ctx->linux_aio, ctx);
  387. }
  388. }
  389. return ctx->linux_aio;
  390. }
  391. LinuxAioState *aio_get_linux_aio(AioContext *ctx)
  392. {
  393. assert(ctx->linux_aio);
  394. return ctx->linux_aio;
  395. }
  396. #endif
  397. #ifdef CONFIG_LINUX_IO_URING
  398. LuringState *aio_setup_linux_io_uring(AioContext *ctx, Error **errp)
  399. {
  400. if (ctx->linux_io_uring) {
  401. return ctx->linux_io_uring;
  402. }
  403. ctx->linux_io_uring = luring_init(errp);
  404. if (!ctx->linux_io_uring) {
  405. return NULL;
  406. }
  407. luring_attach_aio_context(ctx->linux_io_uring, ctx);
  408. return ctx->linux_io_uring;
  409. }
  410. LuringState *aio_get_linux_io_uring(AioContext *ctx)
  411. {
  412. assert(ctx->linux_io_uring);
  413. return ctx->linux_io_uring;
  414. }
  415. #endif
  416. void aio_notify(AioContext *ctx)
  417. {
  418. /*
  419. * Write e.g. ctx->bh_list before writing ctx->notified. Pairs with
  420. * smp_mb() in aio_notify_accept().
  421. */
  422. smp_wmb();
  423. qatomic_set(&ctx->notified, true);
  424. /*
  425. * Write ctx->notified (and also ctx->bh_list) before reading ctx->notify_me.
  426. * Pairs with smp_mb() in aio_ctx_prepare or aio_poll.
  427. */
  428. smp_mb();
  429. if (qatomic_read(&ctx->notify_me)) {
  430. event_notifier_set(&ctx->notifier);
  431. }
  432. }
  433. void aio_notify_accept(AioContext *ctx)
  434. {
  435. qatomic_set(&ctx->notified, false);
  436. /*
  437. * Order reads of ctx->notified (in aio_context_notifier_poll()) and the
  438. * above clearing of ctx->notified before reads of e.g. bh->flags. Pairs
  439. * with smp_wmb() in aio_notify.
  440. */
  441. smp_mb();
  442. }
  443. static void aio_timerlist_notify(void *opaque, QEMUClockType type)
  444. {
  445. aio_notify(opaque);
  446. }
  447. static void aio_context_notifier_cb(EventNotifier *e)
  448. {
  449. AioContext *ctx = container_of(e, AioContext, notifier);
  450. event_notifier_test_and_clear(&ctx->notifier);
  451. }
  452. /* Returns true if aio_notify() was called (e.g. a BH was scheduled) */
  453. static bool aio_context_notifier_poll(void *opaque)
  454. {
  455. EventNotifier *e = opaque;
  456. AioContext *ctx = container_of(e, AioContext, notifier);
  457. /*
  458. * No need for load-acquire because we just want to kick the
  459. * event loop. aio_notify_accept() takes care of synchronizing
  460. * the event loop with the producers.
  461. */
  462. return qatomic_read(&ctx->notified);
  463. }
  464. static void aio_context_notifier_poll_ready(EventNotifier *e)
  465. {
  466. /* Do nothing, we just wanted to kick the event loop */
  467. }
  468. static void co_schedule_bh_cb(void *opaque)
  469. {
  470. AioContext *ctx = opaque;
  471. QSLIST_HEAD(, Coroutine) straight, reversed;
  472. QSLIST_MOVE_ATOMIC(&reversed, &ctx->scheduled_coroutines);
  473. QSLIST_INIT(&straight);
  474. while (!QSLIST_EMPTY(&reversed)) {
  475. Coroutine *co = QSLIST_FIRST(&reversed);
  476. QSLIST_REMOVE_HEAD(&reversed, co_scheduled_next);
  477. QSLIST_INSERT_HEAD(&straight, co, co_scheduled_next);
  478. }
  479. while (!QSLIST_EMPTY(&straight)) {
  480. Coroutine *co = QSLIST_FIRST(&straight);
  481. QSLIST_REMOVE_HEAD(&straight, co_scheduled_next);
  482. trace_aio_co_schedule_bh_cb(ctx, co);
  483. /* Protected by write barrier in qemu_aio_coroutine_enter */
  484. qatomic_set(&co->scheduled, NULL);
  485. qemu_aio_coroutine_enter(ctx, co);
  486. }
  487. }
  488. AioContext *aio_context_new(Error **errp)
  489. {
  490. int ret;
  491. AioContext *ctx;
  492. ctx = (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext));
  493. QSLIST_INIT(&ctx->bh_list);
  494. QSIMPLEQ_INIT(&ctx->bh_slice_list);
  495. aio_context_setup(ctx);
  496. ret = event_notifier_init(&ctx->notifier, false);
  497. if (ret < 0) {
  498. error_setg_errno(errp, -ret, "Failed to initialize event notifier");
  499. goto fail;
  500. }
  501. g_source_set_can_recurse(&ctx->source, true);
  502. qemu_lockcnt_init(&ctx->list_lock);
  503. ctx->co_schedule_bh = aio_bh_new(ctx, co_schedule_bh_cb, ctx);
  504. QSLIST_INIT(&ctx->scheduled_coroutines);
  505. aio_set_event_notifier(ctx, &ctx->notifier,
  506. aio_context_notifier_cb,
  507. aio_context_notifier_poll,
  508. aio_context_notifier_poll_ready);
  509. #ifdef CONFIG_LINUX_AIO
  510. ctx->linux_aio = NULL;
  511. #endif
  512. #ifdef CONFIG_LINUX_IO_URING
  513. ctx->linux_io_uring = NULL;
  514. #endif
  515. ctx->thread_pool = NULL;
  516. qemu_rec_mutex_init(&ctx->lock);
  517. timerlistgroup_init(&ctx->tlg, aio_timerlist_notify, ctx);
  518. ctx->poll_ns = 0;
  519. ctx->poll_max_ns = 0;
  520. ctx->poll_grow = 0;
  521. ctx->poll_shrink = 0;
  522. ctx->aio_max_batch = 0;
  523. ctx->thread_pool_min = 0;
  524. ctx->thread_pool_max = THREAD_POOL_MAX_THREADS_DEFAULT;
  525. register_aiocontext(ctx);
  526. return ctx;
  527. fail:
  528. g_source_destroy(&ctx->source);
  529. return NULL;
  530. }
  531. void aio_co_schedule(AioContext *ctx, Coroutine *co)
  532. {
  533. trace_aio_co_schedule(ctx, co);
  534. const char *scheduled = qatomic_cmpxchg(&co->scheduled, NULL,
  535. __func__);
  536. if (scheduled) {
  537. fprintf(stderr,
  538. "%s: Co-routine was already scheduled in '%s'\n",
  539. __func__, scheduled);
  540. abort();
  541. }
  542. /* The coroutine might run and release the last ctx reference before we
  543. * invoke qemu_bh_schedule(). Take a reference to keep ctx alive until
  544. * we're done.
  545. */
  546. aio_context_ref(ctx);
  547. QSLIST_INSERT_HEAD_ATOMIC(&ctx->scheduled_coroutines,
  548. co, co_scheduled_next);
  549. qemu_bh_schedule(ctx->co_schedule_bh);
  550. aio_context_unref(ctx);
  551. }
  552. typedef struct AioCoRescheduleSelf {
  553. Coroutine *co;
  554. AioContext *new_ctx;
  555. } AioCoRescheduleSelf;
  556. static void aio_co_reschedule_self_bh(void *opaque)
  557. {
  558. AioCoRescheduleSelf *data = opaque;
  559. aio_co_schedule(data->new_ctx, data->co);
  560. }
  561. void coroutine_fn aio_co_reschedule_self(AioContext *new_ctx)
  562. {
  563. AioContext *old_ctx = qemu_get_current_aio_context();
  564. if (old_ctx != new_ctx) {
  565. AioCoRescheduleSelf data = {
  566. .co = qemu_coroutine_self(),
  567. .new_ctx = new_ctx,
  568. };
  569. /*
  570. * We can't directly schedule the coroutine in the target context
  571. * because this would be racy: The other thread could try to enter the
  572. * coroutine before it has yielded in this one.
  573. */
  574. aio_bh_schedule_oneshot(old_ctx, aio_co_reschedule_self_bh, &data);
  575. qemu_coroutine_yield();
  576. }
  577. }
  578. void aio_co_wake(Coroutine *co)
  579. {
  580. AioContext *ctx;
  581. /* Read coroutine before co->ctx. Matches smp_wmb in
  582. * qemu_coroutine_enter.
  583. */
  584. smp_read_barrier_depends();
  585. ctx = qatomic_read(&co->ctx);
  586. aio_co_enter(ctx, co);
  587. }
  588. void aio_co_enter(AioContext *ctx, Coroutine *co)
  589. {
  590. if (ctx != qemu_get_current_aio_context()) {
  591. aio_co_schedule(ctx, co);
  592. return;
  593. }
  594. if (qemu_in_coroutine()) {
  595. Coroutine *self = qemu_coroutine_self();
  596. assert(self != co);
  597. QSIMPLEQ_INSERT_TAIL(&self->co_queue_wakeup, co, co_queue_next);
  598. } else {
  599. qemu_aio_coroutine_enter(ctx, co);
  600. }
  601. }
  602. void aio_context_ref(AioContext *ctx)
  603. {
  604. g_source_ref(&ctx->source);
  605. }
  606. void aio_context_unref(AioContext *ctx)
  607. {
  608. g_source_unref(&ctx->source);
  609. }
  610. QEMU_DEFINE_STATIC_CO_TLS(AioContext *, my_aiocontext)
  611. AioContext *qemu_get_current_aio_context(void)
  612. {
  613. AioContext *ctx = get_my_aiocontext();
  614. if (ctx) {
  615. return ctx;
  616. }
  617. if (bql_locked()) {
  618. /* Possibly in a vCPU thread. */
  619. return qemu_get_aio_context();
  620. }
  621. return NULL;
  622. }
  623. void qemu_set_current_aio_context(AioContext *ctx)
  624. {
  625. assert(!get_my_aiocontext());
  626. set_my_aiocontext(ctx);
  627. }
  628. void aio_context_set_thread_pool_params(AioContext *ctx, int64_t min,
  629. int64_t max, Error **errp)
  630. {
  631. if (min > max || !max || min > INT_MAX || max > INT_MAX) {
  632. error_setg(errp, "bad thread-pool-min/thread-pool-max values");
  633. return;
  634. }
  635. ctx->thread_pool_min = min;
  636. ctx->thread_pool_max = max;
  637. if (ctx->thread_pool) {
  638. thread_pool_update_params(ctx->thread_pool, ctx);
  639. }
  640. }