2
0

job.c 32 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279
  1. /*
  2. * Background jobs (long-running operations)
  3. *
  4. * Copyright (c) 2011 IBM Corp.
  5. * Copyright (c) 2012, 2018 Red Hat, Inc.
  6. *
  7. * Permission is hereby granted, free of charge, to any person obtaining a copy
  8. * of this software and associated documentation files (the "Software"), to deal
  9. * in the Software without restriction, including without limitation the rights
  10. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11. * copies of the Software, and to permit persons to whom the Software is
  12. * furnished to do so, subject to the following conditions:
  13. *
  14. * The above copyright notice and this permission notice shall be included in
  15. * all copies or substantial portions of the Software.
  16. *
  17. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23. * THE SOFTWARE.
  24. */
  25. #include "qemu/osdep.h"
  26. #include "qapi/error.h"
  27. #include "qemu/job.h"
  28. #include "qemu/id.h"
  29. #include "qemu/main-loop.h"
  30. #include "block/aio-wait.h"
  31. #include "trace/trace-root.h"
  32. #include "qapi/qapi-events-job.h"
  33. /*
  34. * The job API is composed of two categories of functions.
  35. *
  36. * The first includes functions used by the monitor. The monitor is
  37. * peculiar in that it accesses the job list with job_get, and
  38. * therefore needs consistency across job_get and the actual operation
  39. * (e.g. job_user_cancel). To achieve this consistency, the caller
  40. * calls job_lock/job_unlock itself around the whole operation.
  41. *
  42. *
  43. * The second includes functions used by the job drivers and sometimes
  44. * by the core block layer. These delegate the locking to the callee instead.
  45. */
  46. /*
  47. * job_mutex protects the jobs list, but also makes the
  48. * struct job fields thread-safe.
  49. */
  50. QemuMutex job_mutex;
  51. /* Protected by job_mutex */
  52. static QLIST_HEAD(, Job) jobs = QLIST_HEAD_INITIALIZER(jobs);
  53. /* Job State Transition Table */
  54. bool JobSTT[JOB_STATUS__MAX][JOB_STATUS__MAX] = {
  55. /* U, C, R, P, Y, S, W, D, X, E, N */
  56. /* U: */ [JOB_STATUS_UNDEFINED] = {0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
  57. /* C: */ [JOB_STATUS_CREATED] = {0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1},
  58. /* R: */ [JOB_STATUS_RUNNING] = {0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0},
  59. /* P: */ [JOB_STATUS_PAUSED] = {0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0},
  60. /* Y: */ [JOB_STATUS_READY] = {0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0},
  61. /* S: */ [JOB_STATUS_STANDBY] = {0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0},
  62. /* W: */ [JOB_STATUS_WAITING] = {0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0},
  63. /* D: */ [JOB_STATUS_PENDING] = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0},
  64. /* X: */ [JOB_STATUS_ABORTING] = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0},
  65. /* E: */ [JOB_STATUS_CONCLUDED] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1},
  66. /* N: */ [JOB_STATUS_NULL] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
  67. };
  68. bool JobVerbTable[JOB_VERB__MAX][JOB_STATUS__MAX] = {
  69. /* U, C, R, P, Y, S, W, D, X, E, N */
  70. [JOB_VERB_CANCEL] = {0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0},
  71. [JOB_VERB_PAUSE] = {0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0},
  72. [JOB_VERB_RESUME] = {0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0},
  73. [JOB_VERB_SET_SPEED] = {0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0},
  74. [JOB_VERB_COMPLETE] = {0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0},
  75. [JOB_VERB_FINALIZE] = {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0},
  76. [JOB_VERB_DISMISS] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0},
  77. };
  78. /* Transactional group of jobs */
  79. struct JobTxn {
  80. /* Is this txn being cancelled? */
  81. bool aborting;
  82. /* List of jobs */
  83. QLIST_HEAD(, Job) jobs;
  84. /* Reference count */
  85. int refcnt;
  86. };
  87. void job_lock(void)
  88. {
  89. qemu_mutex_lock(&job_mutex);
  90. }
  91. void job_unlock(void)
  92. {
  93. qemu_mutex_unlock(&job_mutex);
  94. }
  95. static void __attribute__((__constructor__)) job_init(void)
  96. {
  97. qemu_mutex_init(&job_mutex);
  98. }
  99. JobTxn *job_txn_new(void)
  100. {
  101. JobTxn *txn = g_new0(JobTxn, 1);
  102. QLIST_INIT(&txn->jobs);
  103. txn->refcnt = 1;
  104. return txn;
  105. }
  106. /* Called with job_mutex held. */
  107. static void job_txn_ref_locked(JobTxn *txn)
  108. {
  109. txn->refcnt++;
  110. }
  111. void job_txn_unref_locked(JobTxn *txn)
  112. {
  113. if (txn && --txn->refcnt == 0) {
  114. g_free(txn);
  115. }
  116. }
  117. void job_txn_unref(JobTxn *txn)
  118. {
  119. JOB_LOCK_GUARD();
  120. job_txn_unref_locked(txn);
  121. }
  122. /**
  123. * @txn: The transaction (may be NULL)
  124. * @job: Job to add to the transaction
  125. *
  126. * Add @job to the transaction. The @job must not already be in a transaction.
  127. * The caller must call either job_txn_unref() or job_completed() to release
  128. * the reference that is automatically grabbed here.
  129. *
  130. * If @txn is NULL, the function does nothing.
  131. *
  132. * Called with job_mutex held.
  133. */
  134. static void job_txn_add_job_locked(JobTxn *txn, Job *job)
  135. {
  136. if (!txn) {
  137. return;
  138. }
  139. assert(!job->txn);
  140. job->txn = txn;
  141. QLIST_INSERT_HEAD(&txn->jobs, job, txn_list);
  142. job_txn_ref_locked(txn);
  143. }
  144. /* Called with job_mutex held. */
  145. static void job_txn_del_job_locked(Job *job)
  146. {
  147. if (job->txn) {
  148. QLIST_REMOVE(job, txn_list);
  149. job_txn_unref_locked(job->txn);
  150. job->txn = NULL;
  151. }
  152. }
  153. /* Called with job_mutex held, but releases it temporarily. */
  154. static int job_txn_apply_locked(Job *job, int fn(Job *))
  155. {
  156. Job *other_job, *next;
  157. JobTxn *txn = job->txn;
  158. int rc = 0;
  159. /*
  160. * Similar to job_completed_txn_abort, we take each job's lock before
  161. * applying fn, but since we assume that outer_ctx is held by the caller,
  162. * we need to release it here to avoid holding the lock twice - which would
  163. * break AIO_WAIT_WHILE from within fn.
  164. */
  165. job_ref_locked(job);
  166. QLIST_FOREACH_SAFE(other_job, &txn->jobs, txn_list, next) {
  167. rc = fn(other_job);
  168. if (rc) {
  169. break;
  170. }
  171. }
  172. job_unref_locked(job);
  173. return rc;
  174. }
  175. bool job_is_internal(Job *job)
  176. {
  177. return (job->id == NULL);
  178. }
  179. /* Called with job_mutex held. */
  180. static void job_state_transition_locked(Job *job, JobStatus s1)
  181. {
  182. JobStatus s0 = job->status;
  183. assert(s1 >= 0 && s1 < JOB_STATUS__MAX);
  184. trace_job_state_transition(job, job->ret,
  185. JobSTT[s0][s1] ? "allowed" : "disallowed",
  186. JobStatus_str(s0), JobStatus_str(s1));
  187. assert(JobSTT[s0][s1]);
  188. job->status = s1;
  189. if (!job_is_internal(job) && s1 != s0) {
  190. qapi_event_send_job_status_change(job->id, job->status);
  191. }
  192. }
  193. int job_apply_verb_locked(Job *job, JobVerb verb, Error **errp)
  194. {
  195. JobStatus s0 = job->status;
  196. assert(verb >= 0 && verb < JOB_VERB__MAX);
  197. trace_job_apply_verb(job, JobStatus_str(s0), JobVerb_str(verb),
  198. JobVerbTable[verb][s0] ? "allowed" : "prohibited");
  199. if (JobVerbTable[verb][s0]) {
  200. return 0;
  201. }
  202. error_setg(errp, "Job '%s' in state '%s' cannot accept command verb '%s'",
  203. job->id, JobStatus_str(s0), JobVerb_str(verb));
  204. return -EPERM;
  205. }
  206. JobType job_type(const Job *job)
  207. {
  208. return job->driver->job_type;
  209. }
  210. const char *job_type_str(const Job *job)
  211. {
  212. return JobType_str(job_type(job));
  213. }
  214. bool job_is_cancelled_locked(Job *job)
  215. {
  216. /* force_cancel may be true only if cancelled is true, too */
  217. assert(job->cancelled || !job->force_cancel);
  218. return job->force_cancel;
  219. }
  220. bool job_is_cancelled(Job *job)
  221. {
  222. JOB_LOCK_GUARD();
  223. return job_is_cancelled_locked(job);
  224. }
  225. /* Called with job_mutex held. */
  226. static bool job_cancel_requested_locked(Job *job)
  227. {
  228. return job->cancelled;
  229. }
  230. bool job_cancel_requested(Job *job)
  231. {
  232. JOB_LOCK_GUARD();
  233. return job_cancel_requested_locked(job);
  234. }
  235. bool job_is_ready_locked(Job *job)
  236. {
  237. switch (job->status) {
  238. case JOB_STATUS_UNDEFINED:
  239. case JOB_STATUS_CREATED:
  240. case JOB_STATUS_RUNNING:
  241. case JOB_STATUS_PAUSED:
  242. case JOB_STATUS_WAITING:
  243. case JOB_STATUS_PENDING:
  244. case JOB_STATUS_ABORTING:
  245. case JOB_STATUS_CONCLUDED:
  246. case JOB_STATUS_NULL:
  247. return false;
  248. case JOB_STATUS_READY:
  249. case JOB_STATUS_STANDBY:
  250. return true;
  251. default:
  252. g_assert_not_reached();
  253. }
  254. return false;
  255. }
  256. bool job_is_ready(Job *job)
  257. {
  258. JOB_LOCK_GUARD();
  259. return job_is_ready_locked(job);
  260. }
  261. bool job_is_completed_locked(Job *job)
  262. {
  263. switch (job->status) {
  264. case JOB_STATUS_UNDEFINED:
  265. case JOB_STATUS_CREATED:
  266. case JOB_STATUS_RUNNING:
  267. case JOB_STATUS_PAUSED:
  268. case JOB_STATUS_READY:
  269. case JOB_STATUS_STANDBY:
  270. return false;
  271. case JOB_STATUS_WAITING:
  272. case JOB_STATUS_PENDING:
  273. case JOB_STATUS_ABORTING:
  274. case JOB_STATUS_CONCLUDED:
  275. case JOB_STATUS_NULL:
  276. return true;
  277. default:
  278. g_assert_not_reached();
  279. }
  280. return false;
  281. }
  282. static bool job_is_completed(Job *job)
  283. {
  284. JOB_LOCK_GUARD();
  285. return job_is_completed_locked(job);
  286. }
  287. static bool job_started_locked(Job *job)
  288. {
  289. return job->co;
  290. }
  291. /* Called with job_mutex held. */
  292. static bool job_should_pause_locked(Job *job)
  293. {
  294. return job->pause_count > 0;
  295. }
  296. Job *job_next_locked(Job *job)
  297. {
  298. if (!job) {
  299. return QLIST_FIRST(&jobs);
  300. }
  301. return QLIST_NEXT(job, job_list);
  302. }
  303. Job *job_next(Job *job)
  304. {
  305. JOB_LOCK_GUARD();
  306. return job_next_locked(job);
  307. }
  308. Job *job_get_locked(const char *id)
  309. {
  310. Job *job;
  311. QLIST_FOREACH(job, &jobs, job_list) {
  312. if (job->id && !strcmp(id, job->id)) {
  313. return job;
  314. }
  315. }
  316. return NULL;
  317. }
  318. void job_set_aio_context(Job *job, AioContext *ctx)
  319. {
  320. /* protect against read in job_finish_sync_locked and job_start */
  321. GLOBAL_STATE_CODE();
  322. /* protect against read in job_do_yield_locked */
  323. JOB_LOCK_GUARD();
  324. /* ensure the job is quiescent while the AioContext is changed */
  325. assert(job->paused || job_is_completed_locked(job));
  326. job->aio_context = ctx;
  327. }
  328. /* Called with job_mutex *not* held. */
  329. static void job_sleep_timer_cb(void *opaque)
  330. {
  331. Job *job = opaque;
  332. job_enter(job);
  333. }
  334. void *job_create(const char *job_id, const JobDriver *driver, JobTxn *txn,
  335. AioContext *ctx, int flags, BlockCompletionFunc *cb,
  336. void *opaque, Error **errp)
  337. {
  338. Job *job;
  339. JOB_LOCK_GUARD();
  340. if (job_id) {
  341. if (flags & JOB_INTERNAL) {
  342. error_setg(errp, "Cannot specify job ID for internal job");
  343. return NULL;
  344. }
  345. if (!id_wellformed(job_id)) {
  346. error_setg(errp, "Invalid job ID '%s'", job_id);
  347. return NULL;
  348. }
  349. if (job_get_locked(job_id)) {
  350. error_setg(errp, "Job ID '%s' already in use", job_id);
  351. return NULL;
  352. }
  353. } else if (!(flags & JOB_INTERNAL)) {
  354. error_setg(errp, "An explicit job ID is required");
  355. return NULL;
  356. }
  357. job = g_malloc0(driver->instance_size);
  358. job->driver = driver;
  359. job->id = g_strdup(job_id);
  360. job->refcnt = 1;
  361. job->aio_context = ctx;
  362. job->busy = false;
  363. job->paused = true;
  364. job->pause_count = 1;
  365. job->auto_finalize = !(flags & JOB_MANUAL_FINALIZE);
  366. job->auto_dismiss = !(flags & JOB_MANUAL_DISMISS);
  367. job->cb = cb;
  368. job->opaque = opaque;
  369. progress_init(&job->progress);
  370. notifier_list_init(&job->on_finalize_cancelled);
  371. notifier_list_init(&job->on_finalize_completed);
  372. notifier_list_init(&job->on_pending);
  373. notifier_list_init(&job->on_ready);
  374. notifier_list_init(&job->on_idle);
  375. job_state_transition_locked(job, JOB_STATUS_CREATED);
  376. aio_timer_init(qemu_get_aio_context(), &job->sleep_timer,
  377. QEMU_CLOCK_REALTIME, SCALE_NS,
  378. job_sleep_timer_cb, job);
  379. QLIST_INSERT_HEAD(&jobs, job, job_list);
  380. /* Single jobs are modeled as single-job transactions for sake of
  381. * consolidating the job management logic */
  382. if (!txn) {
  383. txn = job_txn_new();
  384. job_txn_add_job_locked(txn, job);
  385. job_txn_unref_locked(txn);
  386. } else {
  387. job_txn_add_job_locked(txn, job);
  388. }
  389. return job;
  390. }
  391. void job_ref_locked(Job *job)
  392. {
  393. ++job->refcnt;
  394. }
  395. void job_unref_locked(Job *job)
  396. {
  397. GLOBAL_STATE_CODE();
  398. if (--job->refcnt == 0) {
  399. assert(job->status == JOB_STATUS_NULL);
  400. assert(!timer_pending(&job->sleep_timer));
  401. assert(!job->txn);
  402. if (job->driver->free) {
  403. AioContext *aio_context = job->aio_context;
  404. job_unlock();
  405. /* FIXME: aiocontext lock is required because cb calls blk_unref */
  406. aio_context_acquire(aio_context);
  407. job->driver->free(job);
  408. aio_context_release(aio_context);
  409. job_lock();
  410. }
  411. QLIST_REMOVE(job, job_list);
  412. progress_destroy(&job->progress);
  413. error_free(job->err);
  414. g_free(job->id);
  415. g_free(job);
  416. }
  417. }
  418. void job_progress_update(Job *job, uint64_t done)
  419. {
  420. progress_work_done(&job->progress, done);
  421. }
  422. void job_progress_set_remaining(Job *job, uint64_t remaining)
  423. {
  424. progress_set_remaining(&job->progress, remaining);
  425. }
  426. void job_progress_increase_remaining(Job *job, uint64_t delta)
  427. {
  428. progress_increase_remaining(&job->progress, delta);
  429. }
  430. /**
  431. * To be called when a cancelled job is finalised.
  432. * Called with job_mutex held.
  433. */
  434. static void job_event_cancelled_locked(Job *job)
  435. {
  436. notifier_list_notify(&job->on_finalize_cancelled, job);
  437. }
  438. /**
  439. * To be called when a successfully completed job is finalised.
  440. * Called with job_mutex held.
  441. */
  442. static void job_event_completed_locked(Job *job)
  443. {
  444. notifier_list_notify(&job->on_finalize_completed, job);
  445. }
  446. /* Called with job_mutex held. */
  447. static void job_event_pending_locked(Job *job)
  448. {
  449. notifier_list_notify(&job->on_pending, job);
  450. }
  451. /* Called with job_mutex held. */
  452. static void job_event_ready_locked(Job *job)
  453. {
  454. notifier_list_notify(&job->on_ready, job);
  455. }
  456. /* Called with job_mutex held. */
  457. static void job_event_idle_locked(Job *job)
  458. {
  459. notifier_list_notify(&job->on_idle, job);
  460. }
  461. void job_enter_cond_locked(Job *job, bool(*fn)(Job *job))
  462. {
  463. if (!job_started_locked(job)) {
  464. return;
  465. }
  466. if (job->deferred_to_main_loop) {
  467. return;
  468. }
  469. if (job->busy) {
  470. return;
  471. }
  472. if (fn && !fn(job)) {
  473. return;
  474. }
  475. assert(!job->deferred_to_main_loop);
  476. timer_del(&job->sleep_timer);
  477. job->busy = true;
  478. job_unlock();
  479. aio_co_wake(job->co);
  480. job_lock();
  481. }
  482. void job_enter(Job *job)
  483. {
  484. JOB_LOCK_GUARD();
  485. job_enter_cond_locked(job, NULL);
  486. }
  487. /* Yield, and schedule a timer to reenter the coroutine after @ns nanoseconds.
  488. * Reentering the job coroutine with job_enter() before the timer has expired
  489. * is allowed and cancels the timer.
  490. *
  491. * If @ns is (uint64_t) -1, no timer is scheduled and job_enter() must be
  492. * called explicitly.
  493. *
  494. * Called with job_mutex held, but releases it temporarily.
  495. */
  496. static void coroutine_fn job_do_yield_locked(Job *job, uint64_t ns)
  497. {
  498. AioContext *next_aio_context;
  499. if (ns != -1) {
  500. timer_mod(&job->sleep_timer, ns);
  501. }
  502. job->busy = false;
  503. job_event_idle_locked(job);
  504. job_unlock();
  505. qemu_coroutine_yield();
  506. job_lock();
  507. next_aio_context = job->aio_context;
  508. /*
  509. * Coroutine has resumed, but in the meanwhile the job AioContext
  510. * might have changed via bdrv_try_change_aio_context(), so we need to move
  511. * the coroutine too in the new aiocontext.
  512. */
  513. while (qemu_get_current_aio_context() != next_aio_context) {
  514. job_unlock();
  515. aio_co_reschedule_self(next_aio_context);
  516. job_lock();
  517. next_aio_context = job->aio_context;
  518. }
  519. /* Set by job_enter_cond_locked() before re-entering the coroutine. */
  520. assert(job->busy);
  521. }
  522. /* Called with job_mutex held, but releases it temporarily. */
  523. static void coroutine_fn job_pause_point_locked(Job *job)
  524. {
  525. assert(job && job_started_locked(job));
  526. if (!job_should_pause_locked(job)) {
  527. return;
  528. }
  529. if (job_is_cancelled_locked(job)) {
  530. return;
  531. }
  532. if (job->driver->pause) {
  533. job_unlock();
  534. job->driver->pause(job);
  535. job_lock();
  536. }
  537. if (job_should_pause_locked(job) && !job_is_cancelled_locked(job)) {
  538. JobStatus status = job->status;
  539. job_state_transition_locked(job, status == JOB_STATUS_READY
  540. ? JOB_STATUS_STANDBY
  541. : JOB_STATUS_PAUSED);
  542. job->paused = true;
  543. job_do_yield_locked(job, -1);
  544. job->paused = false;
  545. job_state_transition_locked(job, status);
  546. }
  547. if (job->driver->resume) {
  548. job_unlock();
  549. job->driver->resume(job);
  550. job_lock();
  551. }
  552. }
  553. void coroutine_fn job_pause_point(Job *job)
  554. {
  555. JOB_LOCK_GUARD();
  556. job_pause_point_locked(job);
  557. }
  558. void coroutine_fn job_yield(Job *job)
  559. {
  560. JOB_LOCK_GUARD();
  561. assert(job->busy);
  562. /* Check cancellation *before* setting busy = false, too! */
  563. if (job_is_cancelled_locked(job)) {
  564. return;
  565. }
  566. if (!job_should_pause_locked(job)) {
  567. job_do_yield_locked(job, -1);
  568. }
  569. job_pause_point_locked(job);
  570. }
  571. void coroutine_fn job_sleep_ns(Job *job, int64_t ns)
  572. {
  573. JOB_LOCK_GUARD();
  574. assert(job->busy);
  575. /* Check cancellation *before* setting busy = false, too! */
  576. if (job_is_cancelled_locked(job)) {
  577. return;
  578. }
  579. if (!job_should_pause_locked(job)) {
  580. job_do_yield_locked(job, qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + ns);
  581. }
  582. job_pause_point_locked(job);
  583. }
  584. /* Assumes the job_mutex is held */
  585. static bool job_timer_not_pending_locked(Job *job)
  586. {
  587. return !timer_pending(&job->sleep_timer);
  588. }
  589. void job_pause_locked(Job *job)
  590. {
  591. job->pause_count++;
  592. if (!job->paused) {
  593. job_enter_cond_locked(job, NULL);
  594. }
  595. }
  596. void job_pause(Job *job)
  597. {
  598. JOB_LOCK_GUARD();
  599. job_pause_locked(job);
  600. }
  601. void job_resume_locked(Job *job)
  602. {
  603. assert(job->pause_count > 0);
  604. job->pause_count--;
  605. if (job->pause_count) {
  606. return;
  607. }
  608. /* kick only if no timer is pending */
  609. job_enter_cond_locked(job, job_timer_not_pending_locked);
  610. }
  611. void job_resume(Job *job)
  612. {
  613. JOB_LOCK_GUARD();
  614. job_resume_locked(job);
  615. }
  616. void job_user_pause_locked(Job *job, Error **errp)
  617. {
  618. if (job_apply_verb_locked(job, JOB_VERB_PAUSE, errp)) {
  619. return;
  620. }
  621. if (job->user_paused) {
  622. error_setg(errp, "Job is already paused");
  623. return;
  624. }
  625. job->user_paused = true;
  626. job_pause_locked(job);
  627. }
  628. bool job_user_paused_locked(Job *job)
  629. {
  630. return job->user_paused;
  631. }
  632. void job_user_resume_locked(Job *job, Error **errp)
  633. {
  634. assert(job);
  635. GLOBAL_STATE_CODE();
  636. if (!job->user_paused || job->pause_count <= 0) {
  637. error_setg(errp, "Can't resume a job that was not paused");
  638. return;
  639. }
  640. if (job_apply_verb_locked(job, JOB_VERB_RESUME, errp)) {
  641. return;
  642. }
  643. if (job->driver->user_resume) {
  644. job_unlock();
  645. job->driver->user_resume(job);
  646. job_lock();
  647. }
  648. job->user_paused = false;
  649. job_resume_locked(job);
  650. }
  651. /* Called with job_mutex held, but releases it temporarily. */
  652. static void job_do_dismiss_locked(Job *job)
  653. {
  654. assert(job);
  655. job->busy = false;
  656. job->paused = false;
  657. job->deferred_to_main_loop = true;
  658. job_txn_del_job_locked(job);
  659. job_state_transition_locked(job, JOB_STATUS_NULL);
  660. job_unref_locked(job);
  661. }
  662. void job_dismiss_locked(Job **jobptr, Error **errp)
  663. {
  664. Job *job = *jobptr;
  665. /* similarly to _complete, this is QMP-interface only. */
  666. assert(job->id);
  667. if (job_apply_verb_locked(job, JOB_VERB_DISMISS, errp)) {
  668. return;
  669. }
  670. job_do_dismiss_locked(job);
  671. *jobptr = NULL;
  672. }
  673. void job_early_fail(Job *job)
  674. {
  675. JOB_LOCK_GUARD();
  676. assert(job->status == JOB_STATUS_CREATED);
  677. job_do_dismiss_locked(job);
  678. }
  679. /* Called with job_mutex held. */
  680. static void job_conclude_locked(Job *job)
  681. {
  682. job_state_transition_locked(job, JOB_STATUS_CONCLUDED);
  683. if (job->auto_dismiss || !job_started_locked(job)) {
  684. job_do_dismiss_locked(job);
  685. }
  686. }
  687. /* Called with job_mutex held. */
  688. static void job_update_rc_locked(Job *job)
  689. {
  690. if (!job->ret && job_is_cancelled_locked(job)) {
  691. job->ret = -ECANCELED;
  692. }
  693. if (job->ret) {
  694. if (!job->err) {
  695. error_setg(&job->err, "%s", strerror(-job->ret));
  696. }
  697. job_state_transition_locked(job, JOB_STATUS_ABORTING);
  698. }
  699. }
  700. static void job_commit(Job *job)
  701. {
  702. assert(!job->ret);
  703. GLOBAL_STATE_CODE();
  704. if (job->driver->commit) {
  705. job->driver->commit(job);
  706. }
  707. }
  708. static void job_abort(Job *job)
  709. {
  710. assert(job->ret);
  711. GLOBAL_STATE_CODE();
  712. if (job->driver->abort) {
  713. job->driver->abort(job);
  714. }
  715. }
  716. static void job_clean(Job *job)
  717. {
  718. GLOBAL_STATE_CODE();
  719. if (job->driver->clean) {
  720. job->driver->clean(job);
  721. }
  722. }
  723. /*
  724. * Called with job_mutex held, but releases it temporarily.
  725. * Takes AioContext lock internally to invoke a job->driver callback.
  726. */
  727. static int job_finalize_single_locked(Job *job)
  728. {
  729. int job_ret;
  730. AioContext *ctx = job->aio_context;
  731. assert(job_is_completed_locked(job));
  732. /* Ensure abort is called for late-transactional failures */
  733. job_update_rc_locked(job);
  734. job_ret = job->ret;
  735. job_unlock();
  736. aio_context_acquire(ctx);
  737. if (!job_ret) {
  738. job_commit(job);
  739. } else {
  740. job_abort(job);
  741. }
  742. job_clean(job);
  743. if (job->cb) {
  744. job->cb(job->opaque, job_ret);
  745. }
  746. aio_context_release(ctx);
  747. job_lock();
  748. /* Emit events only if we actually started */
  749. if (job_started_locked(job)) {
  750. if (job_is_cancelled_locked(job)) {
  751. job_event_cancelled_locked(job);
  752. } else {
  753. job_event_completed_locked(job);
  754. }
  755. }
  756. job_txn_del_job_locked(job);
  757. job_conclude_locked(job);
  758. return 0;
  759. }
  760. /*
  761. * Called with job_mutex held, but releases it temporarily.
  762. * Takes AioContext lock internally to invoke a job->driver callback.
  763. */
  764. static void job_cancel_async_locked(Job *job, bool force)
  765. {
  766. AioContext *ctx = job->aio_context;
  767. GLOBAL_STATE_CODE();
  768. if (job->driver->cancel) {
  769. job_unlock();
  770. aio_context_acquire(ctx);
  771. force = job->driver->cancel(job, force);
  772. aio_context_release(ctx);
  773. job_lock();
  774. } else {
  775. /* No .cancel() means the job will behave as if force-cancelled */
  776. force = true;
  777. }
  778. if (job->user_paused) {
  779. /* Do not call job_enter here, the caller will handle it. */
  780. if (job->driver->user_resume) {
  781. job_unlock();
  782. job->driver->user_resume(job);
  783. job_lock();
  784. }
  785. job->user_paused = false;
  786. assert(job->pause_count > 0);
  787. job->pause_count--;
  788. }
  789. /*
  790. * Ignore soft cancel requests after the job is already done
  791. * (We will still invoke job->driver->cancel() above, but if the
  792. * job driver supports soft cancelling and the job is done, that
  793. * should be a no-op, too. We still call it so it can override
  794. * @force.)
  795. */
  796. if (force || !job->deferred_to_main_loop) {
  797. job->cancelled = true;
  798. /* To prevent 'force == false' overriding a previous 'force == true' */
  799. job->force_cancel |= force;
  800. }
  801. }
  802. /*
  803. * Called with job_mutex held, but releases it temporarily.
  804. * Takes AioContext lock internally to invoke a job->driver callback.
  805. */
  806. static void job_completed_txn_abort_locked(Job *job)
  807. {
  808. JobTxn *txn = job->txn;
  809. Job *other_job;
  810. if (txn->aborting) {
  811. /*
  812. * We are cancelled by another job, which will handle everything.
  813. */
  814. return;
  815. }
  816. txn->aborting = true;
  817. job_txn_ref_locked(txn);
  818. job_ref_locked(job);
  819. /* Other jobs are effectively cancelled by us, set the status for
  820. * them; this job, however, may or may not be cancelled, depending
  821. * on the caller, so leave it. */
  822. QLIST_FOREACH(other_job, &txn->jobs, txn_list) {
  823. if (other_job != job) {
  824. /*
  825. * This is a transaction: If one job failed, no result will matter.
  826. * Therefore, pass force=true to terminate all other jobs as quickly
  827. * as possible.
  828. */
  829. job_cancel_async_locked(other_job, true);
  830. }
  831. }
  832. while (!QLIST_EMPTY(&txn->jobs)) {
  833. other_job = QLIST_FIRST(&txn->jobs);
  834. if (!job_is_completed_locked(other_job)) {
  835. assert(job_cancel_requested_locked(other_job));
  836. job_finish_sync_locked(other_job, NULL, NULL);
  837. }
  838. job_finalize_single_locked(other_job);
  839. }
  840. job_unref_locked(job);
  841. job_txn_unref_locked(txn);
  842. }
  843. /* Called with job_mutex held, but releases it temporarily */
  844. static int job_prepare_locked(Job *job)
  845. {
  846. int ret;
  847. AioContext *ctx = job->aio_context;
  848. GLOBAL_STATE_CODE();
  849. if (job->ret == 0 && job->driver->prepare) {
  850. job_unlock();
  851. aio_context_acquire(ctx);
  852. ret = job->driver->prepare(job);
  853. aio_context_release(ctx);
  854. job_lock();
  855. job->ret = ret;
  856. job_update_rc_locked(job);
  857. }
  858. return job->ret;
  859. }
  860. /* Called with job_mutex held */
  861. static int job_needs_finalize_locked(Job *job)
  862. {
  863. return !job->auto_finalize;
  864. }
  865. /* Called with job_mutex held */
  866. static void job_do_finalize_locked(Job *job)
  867. {
  868. int rc;
  869. assert(job && job->txn);
  870. /* prepare the transaction to complete */
  871. rc = job_txn_apply_locked(job, job_prepare_locked);
  872. if (rc) {
  873. job_completed_txn_abort_locked(job);
  874. } else {
  875. job_txn_apply_locked(job, job_finalize_single_locked);
  876. }
  877. }
  878. void job_finalize_locked(Job *job, Error **errp)
  879. {
  880. assert(job && job->id);
  881. if (job_apply_verb_locked(job, JOB_VERB_FINALIZE, errp)) {
  882. return;
  883. }
  884. job_do_finalize_locked(job);
  885. }
  886. /* Called with job_mutex held. */
  887. static int job_transition_to_pending_locked(Job *job)
  888. {
  889. job_state_transition_locked(job, JOB_STATUS_PENDING);
  890. if (!job->auto_finalize) {
  891. job_event_pending_locked(job);
  892. }
  893. return 0;
  894. }
  895. void job_transition_to_ready(Job *job)
  896. {
  897. JOB_LOCK_GUARD();
  898. job_state_transition_locked(job, JOB_STATUS_READY);
  899. job_event_ready_locked(job);
  900. }
  901. /* Called with job_mutex held. */
  902. static void job_completed_txn_success_locked(Job *job)
  903. {
  904. JobTxn *txn = job->txn;
  905. Job *other_job;
  906. job_state_transition_locked(job, JOB_STATUS_WAITING);
  907. /*
  908. * Successful completion, see if there are other running jobs in this
  909. * txn.
  910. */
  911. QLIST_FOREACH(other_job, &txn->jobs, txn_list) {
  912. if (!job_is_completed_locked(other_job)) {
  913. return;
  914. }
  915. assert(other_job->ret == 0);
  916. }
  917. job_txn_apply_locked(job, job_transition_to_pending_locked);
  918. /* If no jobs need manual finalization, automatically do so */
  919. if (job_txn_apply_locked(job, job_needs_finalize_locked) == 0) {
  920. job_do_finalize_locked(job);
  921. }
  922. }
  923. /* Called with job_mutex held. */
  924. static void job_completed_locked(Job *job)
  925. {
  926. assert(job && job->txn && !job_is_completed_locked(job));
  927. job_update_rc_locked(job);
  928. trace_job_completed(job, job->ret);
  929. if (job->ret) {
  930. job_completed_txn_abort_locked(job);
  931. } else {
  932. job_completed_txn_success_locked(job);
  933. }
  934. }
  935. /**
  936. * Useful only as a type shim for aio_bh_schedule_oneshot.
  937. * Called with job_mutex *not* held.
  938. */
  939. static void job_exit(void *opaque)
  940. {
  941. Job *job = (Job *)opaque;
  942. JOB_LOCK_GUARD();
  943. job_ref_locked(job);
  944. /* This is a lie, we're not quiescent, but still doing the completion
  945. * callbacks. However, completion callbacks tend to involve operations that
  946. * drain block nodes, and if .drained_poll still returned true, we would
  947. * deadlock. */
  948. job->busy = false;
  949. job_event_idle_locked(job);
  950. job_completed_locked(job);
  951. job_unref_locked(job);
  952. }
  953. /**
  954. * All jobs must allow a pause point before entering their job proper. This
  955. * ensures that jobs can be paused prior to being started, then resumed later.
  956. */
  957. static void coroutine_fn job_co_entry(void *opaque)
  958. {
  959. Job *job = opaque;
  960. int ret;
  961. assert(job && job->driver && job->driver->run);
  962. WITH_JOB_LOCK_GUARD() {
  963. assert(job->aio_context == qemu_get_current_aio_context());
  964. job_pause_point_locked(job);
  965. }
  966. ret = job->driver->run(job, &job->err);
  967. WITH_JOB_LOCK_GUARD() {
  968. job->ret = ret;
  969. job->deferred_to_main_loop = true;
  970. job->busy = true;
  971. }
  972. aio_bh_schedule_oneshot(qemu_get_aio_context(), job_exit, job);
  973. }
  974. void job_start(Job *job)
  975. {
  976. assert(qemu_in_main_thread());
  977. WITH_JOB_LOCK_GUARD() {
  978. assert(job && !job_started_locked(job) && job->paused &&
  979. job->driver && job->driver->run);
  980. job->co = qemu_coroutine_create(job_co_entry, job);
  981. job->pause_count--;
  982. job->busy = true;
  983. job->paused = false;
  984. job_state_transition_locked(job, JOB_STATUS_RUNNING);
  985. }
  986. aio_co_enter(job->aio_context, job->co);
  987. }
  988. void job_cancel_locked(Job *job, bool force)
  989. {
  990. if (job->status == JOB_STATUS_CONCLUDED) {
  991. job_do_dismiss_locked(job);
  992. return;
  993. }
  994. job_cancel_async_locked(job, force);
  995. if (!job_started_locked(job)) {
  996. job_completed_locked(job);
  997. } else if (job->deferred_to_main_loop) {
  998. /*
  999. * job_cancel_async() ignores soft-cancel requests for jobs
  1000. * that are already done (i.e. deferred to the main loop). We
  1001. * have to check again whether the job is really cancelled.
  1002. * (job_cancel_requested() and job_is_cancelled() are equivalent
  1003. * here, because job_cancel_async() will make soft-cancel
  1004. * requests no-ops when deferred_to_main_loop is true. We
  1005. * choose to call job_is_cancelled() to show that we invoke
  1006. * job_completed_txn_abort() only for force-cancelled jobs.)
  1007. */
  1008. if (job_is_cancelled_locked(job)) {
  1009. job_completed_txn_abort_locked(job);
  1010. }
  1011. } else {
  1012. job_enter_cond_locked(job, NULL);
  1013. }
  1014. }
  1015. void job_user_cancel_locked(Job *job, bool force, Error **errp)
  1016. {
  1017. if (job_apply_verb_locked(job, JOB_VERB_CANCEL, errp)) {
  1018. return;
  1019. }
  1020. job_cancel_locked(job, force);
  1021. }
  1022. /* A wrapper around job_cancel_locked() taking an Error ** parameter so it may
  1023. * be used with job_finish_sync_locked() without the need for (rather nasty)
  1024. * function pointer casts there.
  1025. *
  1026. * Called with job_mutex held.
  1027. */
  1028. static void job_cancel_err_locked(Job *job, Error **errp)
  1029. {
  1030. job_cancel_locked(job, false);
  1031. }
  1032. /**
  1033. * Same as job_cancel_err(), but force-cancel.
  1034. * Called with job_mutex held.
  1035. */
  1036. static void job_force_cancel_err_locked(Job *job, Error **errp)
  1037. {
  1038. job_cancel_locked(job, true);
  1039. }
  1040. int job_cancel_sync_locked(Job *job, bool force)
  1041. {
  1042. if (force) {
  1043. return job_finish_sync_locked(job, &job_force_cancel_err_locked, NULL);
  1044. } else {
  1045. return job_finish_sync_locked(job, &job_cancel_err_locked, NULL);
  1046. }
  1047. }
  1048. int job_cancel_sync(Job *job, bool force)
  1049. {
  1050. JOB_LOCK_GUARD();
  1051. return job_cancel_sync_locked(job, force);
  1052. }
  1053. void job_cancel_sync_all(void)
  1054. {
  1055. Job *job;
  1056. JOB_LOCK_GUARD();
  1057. while ((job = job_next_locked(NULL))) {
  1058. job_cancel_sync_locked(job, true);
  1059. }
  1060. }
  1061. int job_complete_sync_locked(Job *job, Error **errp)
  1062. {
  1063. return job_finish_sync_locked(job, job_complete_locked, errp);
  1064. }
  1065. void job_complete_locked(Job *job, Error **errp)
  1066. {
  1067. /* Should not be reachable via external interface for internal jobs */
  1068. assert(job->id);
  1069. GLOBAL_STATE_CODE();
  1070. if (job_apply_verb_locked(job, JOB_VERB_COMPLETE, errp)) {
  1071. return;
  1072. }
  1073. if (job_cancel_requested_locked(job) || !job->driver->complete) {
  1074. error_setg(errp, "The active block job '%s' cannot be completed",
  1075. job->id);
  1076. return;
  1077. }
  1078. job_unlock();
  1079. job->driver->complete(job, errp);
  1080. job_lock();
  1081. }
  1082. int job_finish_sync_locked(Job *job,
  1083. void (*finish)(Job *, Error **errp),
  1084. Error **errp)
  1085. {
  1086. Error *local_err = NULL;
  1087. int ret;
  1088. GLOBAL_STATE_CODE();
  1089. job_ref_locked(job);
  1090. if (finish) {
  1091. finish(job, &local_err);
  1092. }
  1093. if (local_err) {
  1094. error_propagate(errp, local_err);
  1095. job_unref_locked(job);
  1096. return -EBUSY;
  1097. }
  1098. job_unlock();
  1099. AIO_WAIT_WHILE_UNLOCKED(job->aio_context,
  1100. (job_enter(job), !job_is_completed(job)));
  1101. job_lock();
  1102. ret = (job_is_cancelled_locked(job) && job->ret == 0)
  1103. ? -ECANCELED : job->ret;
  1104. job_unref_locked(job);
  1105. return ret;
  1106. }