async.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761
  1. /*
  2. * Data plane event loop
  3. *
  4. * Copyright (c) 2003-2008 Fabrice Bellard
  5. * Copyright (c) 2009-2017 QEMU contributors
  6. *
  7. * Permission is hereby granted, free of charge, to any person obtaining a copy
  8. * of this software and associated documentation files (the "Software"), to deal
  9. * in the Software without restriction, including without limitation the rights
  10. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11. * copies of the Software, and to permit persons to whom the Software is
  12. * furnished to do so, subject to the following conditions:
  13. *
  14. * The above copyright notice and this permission notice shall be included in
  15. * all copies or substantial portions of the Software.
  16. *
  17. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23. * THE SOFTWARE.
  24. */
  25. #include "qemu/osdep.h"
  26. #include "qapi/error.h"
  27. #include "block/aio.h"
  28. #include "block/thread-pool.h"
  29. #include "block/graph-lock.h"
  30. #include "qemu/main-loop.h"
  31. #include "qemu/atomic.h"
  32. #include "qemu/lockcnt.h"
  33. #include "qemu/rcu_queue.h"
  34. #include "block/raw-aio.h"
  35. #include "qemu/coroutine_int.h"
  36. #include "qemu/coroutine-tls.h"
  37. #include "sysemu/cpu-timers.h"
  38. #include "trace.h"
  39. /***********************************************************/
  40. /* bottom halves (can be seen as timers which expire ASAP) */
  41. /* QEMUBH::flags values */
  42. enum {
  43. /* Already enqueued and waiting for aio_bh_poll() */
  44. BH_PENDING = (1 << 0),
  45. /* Invoke the callback */
  46. BH_SCHEDULED = (1 << 1),
  47. /* Delete without invoking callback */
  48. BH_DELETED = (1 << 2),
  49. /* Delete after invoking callback */
  50. BH_ONESHOT = (1 << 3),
  51. /* Schedule periodically when the event loop is idle */
  52. BH_IDLE = (1 << 4),
  53. };
  54. struct QEMUBH {
  55. AioContext *ctx;
  56. const char *name;
  57. QEMUBHFunc *cb;
  58. void *opaque;
  59. QSLIST_ENTRY(QEMUBH) next;
  60. unsigned flags;
  61. MemReentrancyGuard *reentrancy_guard;
  62. };
  63. /* Called concurrently from any thread */
  64. static void aio_bh_enqueue(QEMUBH *bh, unsigned new_flags)
  65. {
  66. AioContext *ctx = bh->ctx;
  67. unsigned old_flags;
  68. /*
  69. * Synchronizes with atomic_fetch_and() in aio_bh_dequeue(), ensuring that
  70. * insertion starts after BH_PENDING is set.
  71. */
  72. old_flags = qatomic_fetch_or(&bh->flags, BH_PENDING | new_flags);
  73. if (!(old_flags & BH_PENDING)) {
  74. /*
  75. * At this point the bottom half becomes visible to aio_bh_poll().
  76. * This insertion thus synchronizes with QSLIST_MOVE_ATOMIC in
  77. * aio_bh_poll(), ensuring that:
  78. * 1. any writes needed by the callback are visible from the callback
  79. * after aio_bh_dequeue() returns bh.
  80. * 2. ctx is loaded before the callback has a chance to execute and bh
  81. * could be freed.
  82. */
  83. QSLIST_INSERT_HEAD_ATOMIC(&ctx->bh_list, bh, next);
  84. }
  85. aio_notify(ctx);
  86. if (unlikely(icount_enabled())) {
  87. /*
  88. * Workaround for record/replay.
  89. * vCPU execution should be suspended when new BH is set.
  90. * This is needed to avoid guest timeouts caused
  91. * by the long cycles of the execution.
  92. */
  93. icount_notify_exit();
  94. }
  95. }
  96. /* Only called from aio_bh_poll() and aio_ctx_finalize() */
  97. static QEMUBH *aio_bh_dequeue(BHList *head, unsigned *flags)
  98. {
  99. QEMUBH *bh = QSLIST_FIRST_RCU(head);
  100. if (!bh) {
  101. return NULL;
  102. }
  103. QSLIST_REMOVE_HEAD(head, next);
  104. /*
  105. * Synchronizes with qatomic_fetch_or() in aio_bh_enqueue(), ensuring that
  106. * the removal finishes before BH_PENDING is reset.
  107. */
  108. *flags = qatomic_fetch_and(&bh->flags,
  109. ~(BH_PENDING | BH_SCHEDULED | BH_IDLE));
  110. return bh;
  111. }
  112. void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb,
  113. void *opaque, const char *name)
  114. {
  115. QEMUBH *bh;
  116. bh = g_new(QEMUBH, 1);
  117. *bh = (QEMUBH){
  118. .ctx = ctx,
  119. .cb = cb,
  120. .opaque = opaque,
  121. .name = name,
  122. };
  123. aio_bh_enqueue(bh, BH_SCHEDULED | BH_ONESHOT);
  124. }
  125. QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
  126. const char *name, MemReentrancyGuard *reentrancy_guard)
  127. {
  128. QEMUBH *bh;
  129. bh = g_new(QEMUBH, 1);
  130. *bh = (QEMUBH){
  131. .ctx = ctx,
  132. .cb = cb,
  133. .opaque = opaque,
  134. .name = name,
  135. .reentrancy_guard = reentrancy_guard,
  136. };
  137. return bh;
  138. }
  139. void aio_bh_call(QEMUBH *bh)
  140. {
  141. bool last_engaged_in_io = false;
  142. /* Make a copy of the guard-pointer as cb may free the bh */
  143. MemReentrancyGuard *reentrancy_guard = bh->reentrancy_guard;
  144. if (reentrancy_guard) {
  145. last_engaged_in_io = reentrancy_guard->engaged_in_io;
  146. if (reentrancy_guard->engaged_in_io) {
  147. trace_reentrant_aio(bh->ctx, bh->name);
  148. }
  149. reentrancy_guard->engaged_in_io = true;
  150. }
  151. bh->cb(bh->opaque);
  152. if (reentrancy_guard) {
  153. reentrancy_guard->engaged_in_io = last_engaged_in_io;
  154. }
  155. }
  156. /* Multiple occurrences of aio_bh_poll cannot be called concurrently. */
  157. int aio_bh_poll(AioContext *ctx)
  158. {
  159. BHListSlice slice;
  160. BHListSlice *s;
  161. int ret = 0;
  162. /* Synchronizes with QSLIST_INSERT_HEAD_ATOMIC in aio_bh_enqueue(). */
  163. QSLIST_MOVE_ATOMIC(&slice.bh_list, &ctx->bh_list);
  164. /*
  165. * GCC13 [-Werror=dangling-pointer=] complains that the local variable
  166. * 'slice' is being stored in the global 'ctx->bh_slice_list' but the
  167. * list is emptied before this function returns.
  168. */
  169. #if !defined(__clang__)
  170. #pragma GCC diagnostic push
  171. #pragma GCC diagnostic ignored "-Wpragmas"
  172. #pragma GCC diagnostic ignored "-Wdangling-pointer="
  173. #endif
  174. QSIMPLEQ_INSERT_TAIL(&ctx->bh_slice_list, &slice, next);
  175. #if !defined(__clang__)
  176. #pragma GCC diagnostic pop
  177. #endif
  178. while ((s = QSIMPLEQ_FIRST(&ctx->bh_slice_list))) {
  179. QEMUBH *bh;
  180. unsigned flags;
  181. bh = aio_bh_dequeue(&s->bh_list, &flags);
  182. if (!bh) {
  183. QSIMPLEQ_REMOVE_HEAD(&ctx->bh_slice_list, next);
  184. continue;
  185. }
  186. if ((flags & (BH_SCHEDULED | BH_DELETED)) == BH_SCHEDULED) {
  187. /* Idle BHs don't count as progress */
  188. if (!(flags & BH_IDLE)) {
  189. ret = 1;
  190. }
  191. aio_bh_call(bh);
  192. }
  193. if (flags & (BH_DELETED | BH_ONESHOT)) {
  194. g_free(bh);
  195. }
  196. }
  197. return ret;
  198. }
  199. void qemu_bh_schedule_idle(QEMUBH *bh)
  200. {
  201. aio_bh_enqueue(bh, BH_SCHEDULED | BH_IDLE);
  202. }
  203. void qemu_bh_schedule(QEMUBH *bh)
  204. {
  205. aio_bh_enqueue(bh, BH_SCHEDULED);
  206. }
  207. /* This func is async.
  208. */
  209. void qemu_bh_cancel(QEMUBH *bh)
  210. {
  211. qatomic_and(&bh->flags, ~BH_SCHEDULED);
  212. }
  213. /* This func is async.The bottom half will do the delete action at the finial
  214. * end.
  215. */
  216. void qemu_bh_delete(QEMUBH *bh)
  217. {
  218. aio_bh_enqueue(bh, BH_DELETED);
  219. }
  220. static int64_t aio_compute_bh_timeout(BHList *head, int timeout)
  221. {
  222. QEMUBH *bh;
  223. QSLIST_FOREACH_RCU(bh, head, next) {
  224. if ((bh->flags & (BH_SCHEDULED | BH_DELETED)) == BH_SCHEDULED) {
  225. if (bh->flags & BH_IDLE) {
  226. /* idle bottom halves will be polled at least
  227. * every 10ms */
  228. timeout = 10000000;
  229. } else {
  230. /* non-idle bottom halves will be executed
  231. * immediately */
  232. return 0;
  233. }
  234. }
  235. }
  236. return timeout;
  237. }
  238. int64_t
  239. aio_compute_timeout(AioContext *ctx)
  240. {
  241. BHListSlice *s;
  242. int64_t deadline;
  243. int timeout = -1;
  244. timeout = aio_compute_bh_timeout(&ctx->bh_list, timeout);
  245. if (timeout == 0) {
  246. return 0;
  247. }
  248. QSIMPLEQ_FOREACH(s, &ctx->bh_slice_list, next) {
  249. timeout = aio_compute_bh_timeout(&s->bh_list, timeout);
  250. if (timeout == 0) {
  251. return 0;
  252. }
  253. }
  254. deadline = timerlistgroup_deadline_ns(&ctx->tlg);
  255. if (deadline == 0) {
  256. return 0;
  257. } else {
  258. return qemu_soonest_timeout(timeout, deadline);
  259. }
  260. }
  261. static gboolean
  262. aio_ctx_prepare(GSource *source, gint *timeout)
  263. {
  264. AioContext *ctx = (AioContext *) source;
  265. qatomic_set(&ctx->notify_me, qatomic_read(&ctx->notify_me) | 1);
  266. /*
  267. * Write ctx->notify_me before computing the timeout
  268. * (reading bottom half flags, etc.). Pairs with
  269. * smp_mb in aio_notify().
  270. */
  271. smp_mb();
  272. /* We assume there is no timeout already supplied */
  273. *timeout = qemu_timeout_ns_to_ms(aio_compute_timeout(ctx));
  274. if (aio_prepare(ctx)) {
  275. *timeout = 0;
  276. }
  277. return *timeout == 0;
  278. }
  279. static gboolean
  280. aio_ctx_check(GSource *source)
  281. {
  282. AioContext *ctx = (AioContext *) source;
  283. QEMUBH *bh;
  284. BHListSlice *s;
  285. /* Finish computing the timeout before clearing the flag. */
  286. qatomic_store_release(&ctx->notify_me, qatomic_read(&ctx->notify_me) & ~1);
  287. aio_notify_accept(ctx);
  288. QSLIST_FOREACH_RCU(bh, &ctx->bh_list, next) {
  289. if ((bh->flags & (BH_SCHEDULED | BH_DELETED)) == BH_SCHEDULED) {
  290. return true;
  291. }
  292. }
  293. QSIMPLEQ_FOREACH(s, &ctx->bh_slice_list, next) {
  294. QSLIST_FOREACH_RCU(bh, &s->bh_list, next) {
  295. if ((bh->flags & (BH_SCHEDULED | BH_DELETED)) == BH_SCHEDULED) {
  296. return true;
  297. }
  298. }
  299. }
  300. return aio_pending(ctx) || (timerlistgroup_deadline_ns(&ctx->tlg) == 0);
  301. }
  302. static gboolean
  303. aio_ctx_dispatch(GSource *source,
  304. GSourceFunc callback,
  305. gpointer user_data)
  306. {
  307. AioContext *ctx = (AioContext *) source;
  308. assert(callback == NULL);
  309. aio_dispatch(ctx);
  310. return true;
  311. }
  312. static void
  313. aio_ctx_finalize(GSource *source)
  314. {
  315. AioContext *ctx = (AioContext *) source;
  316. QEMUBH *bh;
  317. unsigned flags;
  318. thread_pool_free(ctx->thread_pool);
  319. #ifdef CONFIG_LINUX_AIO
  320. if (ctx->linux_aio) {
  321. laio_detach_aio_context(ctx->linux_aio, ctx);
  322. laio_cleanup(ctx->linux_aio);
  323. ctx->linux_aio = NULL;
  324. }
  325. #endif
  326. #ifdef CONFIG_LINUX_IO_URING
  327. if (ctx->linux_io_uring) {
  328. luring_detach_aio_context(ctx->linux_io_uring, ctx);
  329. luring_cleanup(ctx->linux_io_uring);
  330. ctx->linux_io_uring = NULL;
  331. }
  332. #endif
  333. assert(QSLIST_EMPTY(&ctx->scheduled_coroutines));
  334. qemu_bh_delete(ctx->co_schedule_bh);
  335. /* There must be no aio_bh_poll() calls going on */
  336. assert(QSIMPLEQ_EMPTY(&ctx->bh_slice_list));
  337. while ((bh = aio_bh_dequeue(&ctx->bh_list, &flags))) {
  338. /*
  339. * qemu_bh_delete() must have been called on BHs in this AioContext. In
  340. * many cases memory leaks, hangs, or inconsistent state occur when a
  341. * BH is leaked because something still expects it to run.
  342. *
  343. * If you hit this, fix the lifecycle of the BH so that
  344. * qemu_bh_delete() and any associated cleanup is called before the
  345. * AioContext is finalized.
  346. */
  347. if (unlikely(!(flags & BH_DELETED))) {
  348. fprintf(stderr, "%s: BH '%s' leaked, aborting...\n",
  349. __func__, bh->name);
  350. abort();
  351. }
  352. g_free(bh);
  353. }
  354. aio_set_event_notifier(ctx, &ctx->notifier, NULL, NULL, NULL);
  355. event_notifier_cleanup(&ctx->notifier);
  356. qemu_rec_mutex_destroy(&ctx->lock);
  357. qemu_lockcnt_destroy(&ctx->list_lock);
  358. timerlistgroup_deinit(&ctx->tlg);
  359. unregister_aiocontext(ctx);
  360. aio_context_destroy(ctx);
  361. }
  362. static GSourceFuncs aio_source_funcs = {
  363. aio_ctx_prepare,
  364. aio_ctx_check,
  365. aio_ctx_dispatch,
  366. aio_ctx_finalize
  367. };
  368. GSource *aio_get_g_source(AioContext *ctx)
  369. {
  370. aio_context_use_g_source(ctx);
  371. g_source_ref(&ctx->source);
  372. return &ctx->source;
  373. }
  374. ThreadPool *aio_get_thread_pool(AioContext *ctx)
  375. {
  376. if (!ctx->thread_pool) {
  377. ctx->thread_pool = thread_pool_new(ctx);
  378. }
  379. return ctx->thread_pool;
  380. }
  381. #ifdef CONFIG_LINUX_AIO
  382. LinuxAioState *aio_setup_linux_aio(AioContext *ctx, Error **errp)
  383. {
  384. if (!ctx->linux_aio) {
  385. ctx->linux_aio = laio_init(errp);
  386. if (ctx->linux_aio) {
  387. laio_attach_aio_context(ctx->linux_aio, ctx);
  388. }
  389. }
  390. return ctx->linux_aio;
  391. }
  392. LinuxAioState *aio_get_linux_aio(AioContext *ctx)
  393. {
  394. assert(ctx->linux_aio);
  395. return ctx->linux_aio;
  396. }
  397. #endif
  398. #ifdef CONFIG_LINUX_IO_URING
  399. LuringState *aio_setup_linux_io_uring(AioContext *ctx, Error **errp)
  400. {
  401. if (ctx->linux_io_uring) {
  402. return ctx->linux_io_uring;
  403. }
  404. ctx->linux_io_uring = luring_init(errp);
  405. if (!ctx->linux_io_uring) {
  406. return NULL;
  407. }
  408. luring_attach_aio_context(ctx->linux_io_uring, ctx);
  409. return ctx->linux_io_uring;
  410. }
  411. LuringState *aio_get_linux_io_uring(AioContext *ctx)
  412. {
  413. assert(ctx->linux_io_uring);
  414. return ctx->linux_io_uring;
  415. }
  416. #endif
  417. void aio_notify(AioContext *ctx)
  418. {
  419. /*
  420. * Write e.g. ctx->bh_list before writing ctx->notified. Pairs with
  421. * smp_mb() in aio_notify_accept().
  422. */
  423. smp_wmb();
  424. qatomic_set(&ctx->notified, true);
  425. /*
  426. * Write ctx->notified (and also ctx->bh_list) before reading ctx->notify_me.
  427. * Pairs with smp_mb() in aio_ctx_prepare or aio_poll.
  428. */
  429. smp_mb();
  430. if (qatomic_read(&ctx->notify_me)) {
  431. event_notifier_set(&ctx->notifier);
  432. }
  433. }
  434. void aio_notify_accept(AioContext *ctx)
  435. {
  436. qatomic_set(&ctx->notified, false);
  437. /*
  438. * Order reads of ctx->notified (in aio_context_notifier_poll()) and the
  439. * above clearing of ctx->notified before reads of e.g. bh->flags. Pairs
  440. * with smp_wmb() in aio_notify.
  441. */
  442. smp_mb();
  443. }
  444. static void aio_timerlist_notify(void *opaque, QEMUClockType type)
  445. {
  446. aio_notify(opaque);
  447. }
  448. static void aio_context_notifier_cb(EventNotifier *e)
  449. {
  450. AioContext *ctx = container_of(e, AioContext, notifier);
  451. event_notifier_test_and_clear(&ctx->notifier);
  452. }
  453. /* Returns true if aio_notify() was called (e.g. a BH was scheduled) */
  454. static bool aio_context_notifier_poll(void *opaque)
  455. {
  456. EventNotifier *e = opaque;
  457. AioContext *ctx = container_of(e, AioContext, notifier);
  458. /*
  459. * No need for load-acquire because we just want to kick the
  460. * event loop. aio_notify_accept() takes care of synchronizing
  461. * the event loop with the producers.
  462. */
  463. return qatomic_read(&ctx->notified);
  464. }
  465. static void aio_context_notifier_poll_ready(EventNotifier *e)
  466. {
  467. /* Do nothing, we just wanted to kick the event loop */
  468. }
  469. static void co_schedule_bh_cb(void *opaque)
  470. {
  471. AioContext *ctx = opaque;
  472. QSLIST_HEAD(, Coroutine) straight, reversed;
  473. QSLIST_MOVE_ATOMIC(&reversed, &ctx->scheduled_coroutines);
  474. QSLIST_INIT(&straight);
  475. while (!QSLIST_EMPTY(&reversed)) {
  476. Coroutine *co = QSLIST_FIRST(&reversed);
  477. QSLIST_REMOVE_HEAD(&reversed, co_scheduled_next);
  478. QSLIST_INSERT_HEAD(&straight, co, co_scheduled_next);
  479. }
  480. while (!QSLIST_EMPTY(&straight)) {
  481. Coroutine *co = QSLIST_FIRST(&straight);
  482. QSLIST_REMOVE_HEAD(&straight, co_scheduled_next);
  483. trace_aio_co_schedule_bh_cb(ctx, co);
  484. /* Protected by write barrier in qemu_aio_coroutine_enter */
  485. qatomic_set(&co->scheduled, NULL);
  486. qemu_aio_coroutine_enter(ctx, co);
  487. }
  488. }
  489. AioContext *aio_context_new(Error **errp)
  490. {
  491. int ret;
  492. AioContext *ctx;
  493. ctx = (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext));
  494. QSLIST_INIT(&ctx->bh_list);
  495. QSIMPLEQ_INIT(&ctx->bh_slice_list);
  496. aio_context_setup(ctx);
  497. ret = event_notifier_init(&ctx->notifier, false);
  498. if (ret < 0) {
  499. error_setg_errno(errp, -ret, "Failed to initialize event notifier");
  500. goto fail;
  501. }
  502. g_source_set_can_recurse(&ctx->source, true);
  503. qemu_lockcnt_init(&ctx->list_lock);
  504. ctx->co_schedule_bh = aio_bh_new(ctx, co_schedule_bh_cb, ctx);
  505. QSLIST_INIT(&ctx->scheduled_coroutines);
  506. aio_set_event_notifier(ctx, &ctx->notifier,
  507. aio_context_notifier_cb,
  508. aio_context_notifier_poll,
  509. aio_context_notifier_poll_ready);
  510. #ifdef CONFIG_LINUX_AIO
  511. ctx->linux_aio = NULL;
  512. #endif
  513. #ifdef CONFIG_LINUX_IO_URING
  514. ctx->linux_io_uring = NULL;
  515. #endif
  516. ctx->thread_pool = NULL;
  517. qemu_rec_mutex_init(&ctx->lock);
  518. timerlistgroup_init(&ctx->tlg, aio_timerlist_notify, ctx);
  519. ctx->poll_ns = 0;
  520. ctx->poll_max_ns = 0;
  521. ctx->poll_grow = 0;
  522. ctx->poll_shrink = 0;
  523. ctx->aio_max_batch = 0;
  524. ctx->thread_pool_min = 0;
  525. ctx->thread_pool_max = THREAD_POOL_MAX_THREADS_DEFAULT;
  526. register_aiocontext(ctx);
  527. return ctx;
  528. fail:
  529. g_source_destroy(&ctx->source);
  530. return NULL;
  531. }
  532. void aio_co_schedule(AioContext *ctx, Coroutine *co)
  533. {
  534. trace_aio_co_schedule(ctx, co);
  535. const char *scheduled = qatomic_cmpxchg(&co->scheduled, NULL,
  536. __func__);
  537. if (scheduled) {
  538. fprintf(stderr,
  539. "%s: Co-routine was already scheduled in '%s'\n",
  540. __func__, scheduled);
  541. abort();
  542. }
  543. /* The coroutine might run and release the last ctx reference before we
  544. * invoke qemu_bh_schedule(). Take a reference to keep ctx alive until
  545. * we're done.
  546. */
  547. aio_context_ref(ctx);
  548. QSLIST_INSERT_HEAD_ATOMIC(&ctx->scheduled_coroutines,
  549. co, co_scheduled_next);
  550. qemu_bh_schedule(ctx->co_schedule_bh);
  551. aio_context_unref(ctx);
  552. }
  553. typedef struct AioCoRescheduleSelf {
  554. Coroutine *co;
  555. AioContext *new_ctx;
  556. } AioCoRescheduleSelf;
  557. static void aio_co_reschedule_self_bh(void *opaque)
  558. {
  559. AioCoRescheduleSelf *data = opaque;
  560. aio_co_schedule(data->new_ctx, data->co);
  561. }
  562. void coroutine_fn aio_co_reschedule_self(AioContext *new_ctx)
  563. {
  564. AioContext *old_ctx = qemu_get_current_aio_context();
  565. if (old_ctx != new_ctx) {
  566. AioCoRescheduleSelf data = {
  567. .co = qemu_coroutine_self(),
  568. .new_ctx = new_ctx,
  569. };
  570. /*
  571. * We can't directly schedule the coroutine in the target context
  572. * because this would be racy: The other thread could try to enter the
  573. * coroutine before it has yielded in this one.
  574. */
  575. aio_bh_schedule_oneshot(old_ctx, aio_co_reschedule_self_bh, &data);
  576. qemu_coroutine_yield();
  577. }
  578. }
  579. void aio_co_wake(Coroutine *co)
  580. {
  581. AioContext *ctx;
  582. /* Read coroutine before co->ctx. Matches smp_wmb in
  583. * qemu_coroutine_enter.
  584. */
  585. smp_read_barrier_depends();
  586. ctx = qatomic_read(&co->ctx);
  587. aio_co_enter(ctx, co);
  588. }
  589. void aio_co_enter(AioContext *ctx, Coroutine *co)
  590. {
  591. if (ctx != qemu_get_current_aio_context()) {
  592. aio_co_schedule(ctx, co);
  593. return;
  594. }
  595. if (qemu_in_coroutine()) {
  596. Coroutine *self = qemu_coroutine_self();
  597. assert(self != co);
  598. QSIMPLEQ_INSERT_TAIL(&self->co_queue_wakeup, co, co_queue_next);
  599. } else {
  600. qemu_aio_coroutine_enter(ctx, co);
  601. }
  602. }
  603. void aio_context_ref(AioContext *ctx)
  604. {
  605. g_source_ref(&ctx->source);
  606. }
  607. void aio_context_unref(AioContext *ctx)
  608. {
  609. g_source_unref(&ctx->source);
  610. }
  611. QEMU_DEFINE_STATIC_CO_TLS(AioContext *, my_aiocontext)
  612. AioContext *qemu_get_current_aio_context(void)
  613. {
  614. AioContext *ctx = get_my_aiocontext();
  615. if (ctx) {
  616. return ctx;
  617. }
  618. if (bql_locked()) {
  619. /* Possibly in a vCPU thread. */
  620. return qemu_get_aio_context();
  621. }
  622. return NULL;
  623. }
  624. void qemu_set_current_aio_context(AioContext *ctx)
  625. {
  626. assert(!get_my_aiocontext());
  627. set_my_aiocontext(ctx);
  628. }
  629. void aio_context_set_thread_pool_params(AioContext *ctx, int64_t min,
  630. int64_t max, Error **errp)
  631. {
  632. if (min > max || max <= 0 || min < 0 || min > INT_MAX || max > INT_MAX) {
  633. error_setg(errp, "bad thread-pool-min/thread-pool-max values");
  634. return;
  635. }
  636. ctx->thread_pool_min = min;
  637. ctx->thread_pool_max = max;
  638. if (ctx->thread_pool) {
  639. thread_pool_update_params(ctx->thread_pool, ctx);
  640. }
  641. }