async.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768
  1. /*
  2. * Data plane event loop
  3. *
  4. * Copyright (c) 2003-2008 Fabrice Bellard
  5. * Copyright (c) 2009-2017 QEMU contributors
  6. *
  7. * Permission is hereby granted, free of charge, to any person obtaining a copy
  8. * of this software and associated documentation files (the "Software"), to deal
  9. * in the Software without restriction, including without limitation the rights
  10. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11. * copies of the Software, and to permit persons to whom the Software is
  12. * furnished to do so, subject to the following conditions:
  13. *
  14. * The above copyright notice and this permission notice shall be included in
  15. * all copies or substantial portions of the Software.
  16. *
  17. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23. * THE SOFTWARE.
  24. */
  25. #include "qemu/osdep.h"
  26. #include "qapi/error.h"
  27. #include "block/aio.h"
  28. #include "block/thread-pool.h"
  29. #include "block/graph-lock.h"
  30. #include "qemu/main-loop.h"
  31. #include "qemu/atomic.h"
  32. #include "qemu/rcu_queue.h"
  33. #include "block/raw-aio.h"
  34. #include "qemu/coroutine_int.h"
  35. #include "qemu/coroutine-tls.h"
  36. #include "sysemu/cpu-timers.h"
  37. #include "trace.h"
  38. /***********************************************************/
  39. /* bottom halves (can be seen as timers which expire ASAP) */
  40. /* QEMUBH::flags values */
  41. enum {
  42. /* Already enqueued and waiting for aio_bh_poll() */
  43. BH_PENDING = (1 << 0),
  44. /* Invoke the callback */
  45. BH_SCHEDULED = (1 << 1),
  46. /* Delete without invoking callback */
  47. BH_DELETED = (1 << 2),
  48. /* Delete after invoking callback */
  49. BH_ONESHOT = (1 << 3),
  50. /* Schedule periodically when the event loop is idle */
  51. BH_IDLE = (1 << 4),
  52. };
  53. struct QEMUBH {
  54. AioContext *ctx;
  55. const char *name;
  56. QEMUBHFunc *cb;
  57. void *opaque;
  58. QSLIST_ENTRY(QEMUBH) next;
  59. unsigned flags;
  60. MemReentrancyGuard *reentrancy_guard;
  61. };
  62. /* Called concurrently from any thread */
  63. static void aio_bh_enqueue(QEMUBH *bh, unsigned new_flags)
  64. {
  65. AioContext *ctx = bh->ctx;
  66. unsigned old_flags;
  67. /*
  68. * Synchronizes with atomic_fetch_and() in aio_bh_dequeue(), ensuring that
  69. * insertion starts after BH_PENDING is set.
  70. */
  71. old_flags = qatomic_fetch_or(&bh->flags, BH_PENDING | new_flags);
  72. if (!(old_flags & BH_PENDING)) {
  73. /*
  74. * At this point the bottom half becomes visible to aio_bh_poll().
  75. * This insertion thus synchronizes with QSLIST_MOVE_ATOMIC in
  76. * aio_bh_poll(), ensuring that:
  77. * 1. any writes needed by the callback are visible from the callback
  78. * after aio_bh_dequeue() returns bh.
  79. * 2. ctx is loaded before the callback has a chance to execute and bh
  80. * could be freed.
  81. */
  82. QSLIST_INSERT_HEAD_ATOMIC(&ctx->bh_list, bh, next);
  83. }
  84. aio_notify(ctx);
  85. /*
  86. * Workaround for record/replay.
  87. * vCPU execution should be suspended when new BH is set.
  88. * This is needed to avoid guest timeouts caused
  89. * by the long cycles of the execution.
  90. */
  91. icount_notify_exit();
  92. }
  93. /* Only called from aio_bh_poll() and aio_ctx_finalize() */
  94. static QEMUBH *aio_bh_dequeue(BHList *head, unsigned *flags)
  95. {
  96. QEMUBH *bh = QSLIST_FIRST_RCU(head);
  97. if (!bh) {
  98. return NULL;
  99. }
  100. QSLIST_REMOVE_HEAD(head, next);
  101. /*
  102. * Synchronizes with qatomic_fetch_or() in aio_bh_enqueue(), ensuring that
  103. * the removal finishes before BH_PENDING is reset.
  104. */
  105. *flags = qatomic_fetch_and(&bh->flags,
  106. ~(BH_PENDING | BH_SCHEDULED | BH_IDLE));
  107. return bh;
  108. }
  109. void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb,
  110. void *opaque, const char *name)
  111. {
  112. QEMUBH *bh;
  113. bh = g_new(QEMUBH, 1);
  114. *bh = (QEMUBH){
  115. .ctx = ctx,
  116. .cb = cb,
  117. .opaque = opaque,
  118. .name = name,
  119. };
  120. aio_bh_enqueue(bh, BH_SCHEDULED | BH_ONESHOT);
  121. }
  122. QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
  123. const char *name, MemReentrancyGuard *reentrancy_guard)
  124. {
  125. QEMUBH *bh;
  126. bh = g_new(QEMUBH, 1);
  127. *bh = (QEMUBH){
  128. .ctx = ctx,
  129. .cb = cb,
  130. .opaque = opaque,
  131. .name = name,
  132. .reentrancy_guard = reentrancy_guard,
  133. };
  134. return bh;
  135. }
  136. void aio_bh_call(QEMUBH *bh)
  137. {
  138. bool last_engaged_in_io = false;
  139. /* Make a copy of the guard-pointer as cb may free the bh */
  140. MemReentrancyGuard *reentrancy_guard = bh->reentrancy_guard;
  141. if (reentrancy_guard) {
  142. last_engaged_in_io = reentrancy_guard->engaged_in_io;
  143. if (reentrancy_guard->engaged_in_io) {
  144. trace_reentrant_aio(bh->ctx, bh->name);
  145. }
  146. reentrancy_guard->engaged_in_io = true;
  147. }
  148. bh->cb(bh->opaque);
  149. if (reentrancy_guard) {
  150. reentrancy_guard->engaged_in_io = last_engaged_in_io;
  151. }
  152. }
  153. /* Multiple occurrences of aio_bh_poll cannot be called concurrently. */
  154. int aio_bh_poll(AioContext *ctx)
  155. {
  156. BHListSlice slice;
  157. BHListSlice *s;
  158. int ret = 0;
  159. /* Synchronizes with QSLIST_INSERT_HEAD_ATOMIC in aio_bh_enqueue(). */
  160. QSLIST_MOVE_ATOMIC(&slice.bh_list, &ctx->bh_list);
  161. /*
  162. * GCC13 [-Werror=dangling-pointer=] complains that the local variable
  163. * 'slice' is being stored in the global 'ctx->bh_slice_list' but the
  164. * list is emptied before this function returns.
  165. */
  166. #if !defined(__clang__)
  167. #pragma GCC diagnostic push
  168. #pragma GCC diagnostic ignored "-Wpragmas"
  169. #pragma GCC diagnostic ignored "-Wdangling-pointer="
  170. #endif
  171. QSIMPLEQ_INSERT_TAIL(&ctx->bh_slice_list, &slice, next);
  172. #if !defined(__clang__)
  173. #pragma GCC diagnostic pop
  174. #endif
  175. while ((s = QSIMPLEQ_FIRST(&ctx->bh_slice_list))) {
  176. QEMUBH *bh;
  177. unsigned flags;
  178. bh = aio_bh_dequeue(&s->bh_list, &flags);
  179. if (!bh) {
  180. QSIMPLEQ_REMOVE_HEAD(&ctx->bh_slice_list, next);
  181. continue;
  182. }
  183. if ((flags & (BH_SCHEDULED | BH_DELETED)) == BH_SCHEDULED) {
  184. /* Idle BHs don't count as progress */
  185. if (!(flags & BH_IDLE)) {
  186. ret = 1;
  187. }
  188. aio_bh_call(bh);
  189. }
  190. if (flags & (BH_DELETED | BH_ONESHOT)) {
  191. g_free(bh);
  192. }
  193. }
  194. return ret;
  195. }
  196. void qemu_bh_schedule_idle(QEMUBH *bh)
  197. {
  198. aio_bh_enqueue(bh, BH_SCHEDULED | BH_IDLE);
  199. }
  200. void qemu_bh_schedule(QEMUBH *bh)
  201. {
  202. aio_bh_enqueue(bh, BH_SCHEDULED);
  203. }
  204. /* This func is async.
  205. */
  206. void qemu_bh_cancel(QEMUBH *bh)
  207. {
  208. qatomic_and(&bh->flags, ~BH_SCHEDULED);
  209. }
  210. /* This func is async.The bottom half will do the delete action at the finial
  211. * end.
  212. */
  213. void qemu_bh_delete(QEMUBH *bh)
  214. {
  215. aio_bh_enqueue(bh, BH_DELETED);
  216. }
  217. static int64_t aio_compute_bh_timeout(BHList *head, int timeout)
  218. {
  219. QEMUBH *bh;
  220. QSLIST_FOREACH_RCU(bh, head, next) {
  221. if ((bh->flags & (BH_SCHEDULED | BH_DELETED)) == BH_SCHEDULED) {
  222. if (bh->flags & BH_IDLE) {
  223. /* idle bottom halves will be polled at least
  224. * every 10ms */
  225. timeout = 10000000;
  226. } else {
  227. /* non-idle bottom halves will be executed
  228. * immediately */
  229. return 0;
  230. }
  231. }
  232. }
  233. return timeout;
  234. }
  235. int64_t
  236. aio_compute_timeout(AioContext *ctx)
  237. {
  238. BHListSlice *s;
  239. int64_t deadline;
  240. int timeout = -1;
  241. timeout = aio_compute_bh_timeout(&ctx->bh_list, timeout);
  242. if (timeout == 0) {
  243. return 0;
  244. }
  245. QSIMPLEQ_FOREACH(s, &ctx->bh_slice_list, next) {
  246. timeout = aio_compute_bh_timeout(&s->bh_list, timeout);
  247. if (timeout == 0) {
  248. return 0;
  249. }
  250. }
  251. deadline = timerlistgroup_deadline_ns(&ctx->tlg);
  252. if (deadline == 0) {
  253. return 0;
  254. } else {
  255. return qemu_soonest_timeout(timeout, deadline);
  256. }
  257. }
  258. static gboolean
  259. aio_ctx_prepare(GSource *source, gint *timeout)
  260. {
  261. AioContext *ctx = (AioContext *) source;
  262. qatomic_set(&ctx->notify_me, qatomic_read(&ctx->notify_me) | 1);
  263. /*
  264. * Write ctx->notify_me before computing the timeout
  265. * (reading bottom half flags, etc.). Pairs with
  266. * smp_mb in aio_notify().
  267. */
  268. smp_mb();
  269. /* We assume there is no timeout already supplied */
  270. *timeout = qemu_timeout_ns_to_ms(aio_compute_timeout(ctx));
  271. if (aio_prepare(ctx)) {
  272. *timeout = 0;
  273. }
  274. return *timeout == 0;
  275. }
  276. static gboolean
  277. aio_ctx_check(GSource *source)
  278. {
  279. AioContext *ctx = (AioContext *) source;
  280. QEMUBH *bh;
  281. BHListSlice *s;
  282. /* Finish computing the timeout before clearing the flag. */
  283. qatomic_store_release(&ctx->notify_me, qatomic_read(&ctx->notify_me) & ~1);
  284. aio_notify_accept(ctx);
  285. QSLIST_FOREACH_RCU(bh, &ctx->bh_list, next) {
  286. if ((bh->flags & (BH_SCHEDULED | BH_DELETED)) == BH_SCHEDULED) {
  287. return true;
  288. }
  289. }
  290. QSIMPLEQ_FOREACH(s, &ctx->bh_slice_list, next) {
  291. QSLIST_FOREACH_RCU(bh, &s->bh_list, next) {
  292. if ((bh->flags & (BH_SCHEDULED | BH_DELETED)) == BH_SCHEDULED) {
  293. return true;
  294. }
  295. }
  296. }
  297. return aio_pending(ctx) || (timerlistgroup_deadline_ns(&ctx->tlg) == 0);
  298. }
  299. static gboolean
  300. aio_ctx_dispatch(GSource *source,
  301. GSourceFunc callback,
  302. gpointer user_data)
  303. {
  304. AioContext *ctx = (AioContext *) source;
  305. assert(callback == NULL);
  306. aio_dispatch(ctx);
  307. return true;
  308. }
  309. static void
  310. aio_ctx_finalize(GSource *source)
  311. {
  312. AioContext *ctx = (AioContext *) source;
  313. QEMUBH *bh;
  314. unsigned flags;
  315. thread_pool_free(ctx->thread_pool);
  316. #ifdef CONFIG_LINUX_AIO
  317. if (ctx->linux_aio) {
  318. laio_detach_aio_context(ctx->linux_aio, ctx);
  319. laio_cleanup(ctx->linux_aio);
  320. ctx->linux_aio = NULL;
  321. }
  322. #endif
  323. #ifdef CONFIG_LINUX_IO_URING
  324. if (ctx->linux_io_uring) {
  325. luring_detach_aio_context(ctx->linux_io_uring, ctx);
  326. luring_cleanup(ctx->linux_io_uring);
  327. ctx->linux_io_uring = NULL;
  328. }
  329. #endif
  330. assert(QSLIST_EMPTY(&ctx->scheduled_coroutines));
  331. qemu_bh_delete(ctx->co_schedule_bh);
  332. /* There must be no aio_bh_poll() calls going on */
  333. assert(QSIMPLEQ_EMPTY(&ctx->bh_slice_list));
  334. while ((bh = aio_bh_dequeue(&ctx->bh_list, &flags))) {
  335. /*
  336. * qemu_bh_delete() must have been called on BHs in this AioContext. In
  337. * many cases memory leaks, hangs, or inconsistent state occur when a
  338. * BH is leaked because something still expects it to run.
  339. *
  340. * If you hit this, fix the lifecycle of the BH so that
  341. * qemu_bh_delete() and any associated cleanup is called before the
  342. * AioContext is finalized.
  343. */
  344. if (unlikely(!(flags & BH_DELETED))) {
  345. fprintf(stderr, "%s: BH '%s' leaked, aborting...\n",
  346. __func__, bh->name);
  347. abort();
  348. }
  349. g_free(bh);
  350. }
  351. aio_set_event_notifier(ctx, &ctx->notifier, NULL, NULL, NULL);
  352. event_notifier_cleanup(&ctx->notifier);
  353. qemu_rec_mutex_destroy(&ctx->lock);
  354. qemu_lockcnt_destroy(&ctx->list_lock);
  355. timerlistgroup_deinit(&ctx->tlg);
  356. unregister_aiocontext(ctx);
  357. aio_context_destroy(ctx);
  358. }
  359. static GSourceFuncs aio_source_funcs = {
  360. aio_ctx_prepare,
  361. aio_ctx_check,
  362. aio_ctx_dispatch,
  363. aio_ctx_finalize
  364. };
  365. GSource *aio_get_g_source(AioContext *ctx)
  366. {
  367. aio_context_use_g_source(ctx);
  368. g_source_ref(&ctx->source);
  369. return &ctx->source;
  370. }
  371. ThreadPool *aio_get_thread_pool(AioContext *ctx)
  372. {
  373. if (!ctx->thread_pool) {
  374. ctx->thread_pool = thread_pool_new(ctx);
  375. }
  376. return ctx->thread_pool;
  377. }
  378. #ifdef CONFIG_LINUX_AIO
  379. LinuxAioState *aio_setup_linux_aio(AioContext *ctx, Error **errp)
  380. {
  381. if (!ctx->linux_aio) {
  382. ctx->linux_aio = laio_init(errp);
  383. if (ctx->linux_aio) {
  384. laio_attach_aio_context(ctx->linux_aio, ctx);
  385. }
  386. }
  387. return ctx->linux_aio;
  388. }
  389. LinuxAioState *aio_get_linux_aio(AioContext *ctx)
  390. {
  391. assert(ctx->linux_aio);
  392. return ctx->linux_aio;
  393. }
  394. #endif
  395. #ifdef CONFIG_LINUX_IO_URING
  396. LuringState *aio_setup_linux_io_uring(AioContext *ctx, Error **errp)
  397. {
  398. if (ctx->linux_io_uring) {
  399. return ctx->linux_io_uring;
  400. }
  401. ctx->linux_io_uring = luring_init(errp);
  402. if (!ctx->linux_io_uring) {
  403. return NULL;
  404. }
  405. luring_attach_aio_context(ctx->linux_io_uring, ctx);
  406. return ctx->linux_io_uring;
  407. }
  408. LuringState *aio_get_linux_io_uring(AioContext *ctx)
  409. {
  410. assert(ctx->linux_io_uring);
  411. return ctx->linux_io_uring;
  412. }
  413. #endif
  414. void aio_notify(AioContext *ctx)
  415. {
  416. /*
  417. * Write e.g. ctx->bh_list before writing ctx->notified. Pairs with
  418. * smp_mb() in aio_notify_accept().
  419. */
  420. smp_wmb();
  421. qatomic_set(&ctx->notified, true);
  422. /*
  423. * Write ctx->notified (and also ctx->bh_list) before reading ctx->notify_me.
  424. * Pairs with smp_mb() in aio_ctx_prepare or aio_poll.
  425. */
  426. smp_mb();
  427. if (qatomic_read(&ctx->notify_me)) {
  428. event_notifier_set(&ctx->notifier);
  429. }
  430. }
  431. void aio_notify_accept(AioContext *ctx)
  432. {
  433. qatomic_set(&ctx->notified, false);
  434. /*
  435. * Order reads of ctx->notified (in aio_context_notifier_poll()) and the
  436. * above clearing of ctx->notified before reads of e.g. bh->flags. Pairs
  437. * with smp_wmb() in aio_notify.
  438. */
  439. smp_mb();
  440. }
  441. static void aio_timerlist_notify(void *opaque, QEMUClockType type)
  442. {
  443. aio_notify(opaque);
  444. }
  445. static void aio_context_notifier_cb(EventNotifier *e)
  446. {
  447. AioContext *ctx = container_of(e, AioContext, notifier);
  448. event_notifier_test_and_clear(&ctx->notifier);
  449. }
  450. /* Returns true if aio_notify() was called (e.g. a BH was scheduled) */
  451. static bool aio_context_notifier_poll(void *opaque)
  452. {
  453. EventNotifier *e = opaque;
  454. AioContext *ctx = container_of(e, AioContext, notifier);
  455. /*
  456. * No need for load-acquire because we just want to kick the
  457. * event loop. aio_notify_accept() takes care of synchronizing
  458. * the event loop with the producers.
  459. */
  460. return qatomic_read(&ctx->notified);
  461. }
  462. static void aio_context_notifier_poll_ready(EventNotifier *e)
  463. {
  464. /* Do nothing, we just wanted to kick the event loop */
  465. }
  466. static void co_schedule_bh_cb(void *opaque)
  467. {
  468. AioContext *ctx = opaque;
  469. QSLIST_HEAD(, Coroutine) straight, reversed;
  470. QSLIST_MOVE_ATOMIC(&reversed, &ctx->scheduled_coroutines);
  471. QSLIST_INIT(&straight);
  472. while (!QSLIST_EMPTY(&reversed)) {
  473. Coroutine *co = QSLIST_FIRST(&reversed);
  474. QSLIST_REMOVE_HEAD(&reversed, co_scheduled_next);
  475. QSLIST_INSERT_HEAD(&straight, co, co_scheduled_next);
  476. }
  477. while (!QSLIST_EMPTY(&straight)) {
  478. Coroutine *co = QSLIST_FIRST(&straight);
  479. QSLIST_REMOVE_HEAD(&straight, co_scheduled_next);
  480. trace_aio_co_schedule_bh_cb(ctx, co);
  481. /* Protected by write barrier in qemu_aio_coroutine_enter */
  482. qatomic_set(&co->scheduled, NULL);
  483. qemu_aio_coroutine_enter(ctx, co);
  484. }
  485. }
  486. AioContext *aio_context_new(Error **errp)
  487. {
  488. int ret;
  489. AioContext *ctx;
  490. ctx = (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext));
  491. QSLIST_INIT(&ctx->bh_list);
  492. QSIMPLEQ_INIT(&ctx->bh_slice_list);
  493. aio_context_setup(ctx);
  494. ret = event_notifier_init(&ctx->notifier, false);
  495. if (ret < 0) {
  496. error_setg_errno(errp, -ret, "Failed to initialize event notifier");
  497. goto fail;
  498. }
  499. g_source_set_can_recurse(&ctx->source, true);
  500. qemu_lockcnt_init(&ctx->list_lock);
  501. ctx->co_schedule_bh = aio_bh_new(ctx, co_schedule_bh_cb, ctx);
  502. QSLIST_INIT(&ctx->scheduled_coroutines);
  503. aio_set_event_notifier(ctx, &ctx->notifier,
  504. aio_context_notifier_cb,
  505. aio_context_notifier_poll,
  506. aio_context_notifier_poll_ready);
  507. #ifdef CONFIG_LINUX_AIO
  508. ctx->linux_aio = NULL;
  509. #endif
  510. #ifdef CONFIG_LINUX_IO_URING
  511. ctx->linux_io_uring = NULL;
  512. #endif
  513. ctx->thread_pool = NULL;
  514. qemu_rec_mutex_init(&ctx->lock);
  515. timerlistgroup_init(&ctx->tlg, aio_timerlist_notify, ctx);
  516. ctx->poll_ns = 0;
  517. ctx->poll_max_ns = 0;
  518. ctx->poll_grow = 0;
  519. ctx->poll_shrink = 0;
  520. ctx->aio_max_batch = 0;
  521. ctx->thread_pool_min = 0;
  522. ctx->thread_pool_max = THREAD_POOL_MAX_THREADS_DEFAULT;
  523. register_aiocontext(ctx);
  524. return ctx;
  525. fail:
  526. g_source_destroy(&ctx->source);
  527. return NULL;
  528. }
  529. void aio_co_schedule(AioContext *ctx, Coroutine *co)
  530. {
  531. trace_aio_co_schedule(ctx, co);
  532. const char *scheduled = qatomic_cmpxchg(&co->scheduled, NULL,
  533. __func__);
  534. if (scheduled) {
  535. fprintf(stderr,
  536. "%s: Co-routine was already scheduled in '%s'\n",
  537. __func__, scheduled);
  538. abort();
  539. }
  540. /* The coroutine might run and release the last ctx reference before we
  541. * invoke qemu_bh_schedule(). Take a reference to keep ctx alive until
  542. * we're done.
  543. */
  544. aio_context_ref(ctx);
  545. QSLIST_INSERT_HEAD_ATOMIC(&ctx->scheduled_coroutines,
  546. co, co_scheduled_next);
  547. qemu_bh_schedule(ctx->co_schedule_bh);
  548. aio_context_unref(ctx);
  549. }
  550. typedef struct AioCoRescheduleSelf {
  551. Coroutine *co;
  552. AioContext *new_ctx;
  553. } AioCoRescheduleSelf;
  554. static void aio_co_reschedule_self_bh(void *opaque)
  555. {
  556. AioCoRescheduleSelf *data = opaque;
  557. aio_co_schedule(data->new_ctx, data->co);
  558. }
  559. void coroutine_fn aio_co_reschedule_self(AioContext *new_ctx)
  560. {
  561. AioContext *old_ctx = qemu_get_current_aio_context();
  562. if (old_ctx != new_ctx) {
  563. AioCoRescheduleSelf data = {
  564. .co = qemu_coroutine_self(),
  565. .new_ctx = new_ctx,
  566. };
  567. /*
  568. * We can't directly schedule the coroutine in the target context
  569. * because this would be racy: The other thread could try to enter the
  570. * coroutine before it has yielded in this one.
  571. */
  572. aio_bh_schedule_oneshot(old_ctx, aio_co_reschedule_self_bh, &data);
  573. qemu_coroutine_yield();
  574. }
  575. }
  576. void aio_co_wake(Coroutine *co)
  577. {
  578. AioContext *ctx;
  579. /* Read coroutine before co->ctx. Matches smp_wmb in
  580. * qemu_coroutine_enter.
  581. */
  582. smp_read_barrier_depends();
  583. ctx = qatomic_read(&co->ctx);
  584. aio_co_enter(ctx, co);
  585. }
  586. void aio_co_enter(AioContext *ctx, Coroutine *co)
  587. {
  588. if (ctx != qemu_get_current_aio_context()) {
  589. aio_co_schedule(ctx, co);
  590. return;
  591. }
  592. if (qemu_in_coroutine()) {
  593. Coroutine *self = qemu_coroutine_self();
  594. assert(self != co);
  595. QSIMPLEQ_INSERT_TAIL(&self->co_queue_wakeup, co, co_queue_next);
  596. } else {
  597. qemu_aio_coroutine_enter(ctx, co);
  598. }
  599. }
  600. void aio_context_ref(AioContext *ctx)
  601. {
  602. g_source_ref(&ctx->source);
  603. }
  604. void aio_context_unref(AioContext *ctx)
  605. {
  606. g_source_unref(&ctx->source);
  607. }
  608. void aio_context_acquire(AioContext *ctx)
  609. {
  610. /* TODO remove this function */
  611. }
  612. void aio_context_release(AioContext *ctx)
  613. {
  614. /* TODO remove this function */
  615. }
  616. QEMU_DEFINE_STATIC_CO_TLS(AioContext *, my_aiocontext)
  617. AioContext *qemu_get_current_aio_context(void)
  618. {
  619. AioContext *ctx = get_my_aiocontext();
  620. if (ctx) {
  621. return ctx;
  622. }
  623. if (qemu_mutex_iothread_locked()) {
  624. /* Possibly in a vCPU thread. */
  625. return qemu_get_aio_context();
  626. }
  627. return NULL;
  628. }
  629. void qemu_set_current_aio_context(AioContext *ctx)
  630. {
  631. assert(!get_my_aiocontext());
  632. set_my_aiocontext(ctx);
  633. }
  634. void aio_context_set_thread_pool_params(AioContext *ctx, int64_t min,
  635. int64_t max, Error **errp)
  636. {
  637. if (min > max || !max || min > INT_MAX || max > INT_MAX) {
  638. error_setg(errp, "bad thread-pool-min/thread-pool-max values");
  639. return;
  640. }
  641. ctx->thread_pool_min = min;
  642. ctx->thread_pool_max = max;
  643. if (ctx->thread_pool) {
  644. thread_pool_update_params(ctx->thread_pool, ctx);
  645. }
  646. }