job.c 32 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264
  1. /*
  2. * Background jobs (long-running operations)
  3. *
  4. * Copyright (c) 2011 IBM Corp.
  5. * Copyright (c) 2012, 2018 Red Hat, Inc.
  6. *
  7. * Permission is hereby granted, free of charge, to any person obtaining a copy
  8. * of this software and associated documentation files (the "Software"), to deal
  9. * in the Software without restriction, including without limitation the rights
  10. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11. * copies of the Software, and to permit persons to whom the Software is
  12. * furnished to do so, subject to the following conditions:
  13. *
  14. * The above copyright notice and this permission notice shall be included in
  15. * all copies or substantial portions of the Software.
  16. *
  17. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23. * THE SOFTWARE.
  24. */
  25. #include "qemu/osdep.h"
  26. #include "qapi/error.h"
  27. #include "qemu/job.h"
  28. #include "qemu/id.h"
  29. #include "qemu/main-loop.h"
  30. #include "block/aio-wait.h"
  31. #include "trace/trace-root.h"
  32. #include "qapi/qapi-events-job.h"
  33. /*
  34. * The job API is composed of two categories of functions.
  35. *
  36. * The first includes functions used by the monitor. The monitor is
  37. * peculiar in that it accesses the job list with job_get, and
  38. * therefore needs consistency across job_get and the actual operation
  39. * (e.g. job_user_cancel). To achieve this consistency, the caller
  40. * calls job_lock/job_unlock itself around the whole operation.
  41. *
  42. *
  43. * The second includes functions used by the job drivers and sometimes
  44. * by the core block layer. These delegate the locking to the callee instead.
  45. */
  46. /*
  47. * job_mutex protects the jobs list, but also makes the
  48. * struct job fields thread-safe.
  49. */
  50. QemuMutex job_mutex;
  51. /* Protected by job_mutex */
  52. static QLIST_HEAD(, Job) jobs = QLIST_HEAD_INITIALIZER(jobs);
  53. /* Job State Transition Table */
  54. bool JobSTT[JOB_STATUS__MAX][JOB_STATUS__MAX] = {
  55. /* U, C, R, P, Y, S, W, D, X, E, N */
  56. /* U: */ [JOB_STATUS_UNDEFINED] = {0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
  57. /* C: */ [JOB_STATUS_CREATED] = {0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1},
  58. /* R: */ [JOB_STATUS_RUNNING] = {0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0},
  59. /* P: */ [JOB_STATUS_PAUSED] = {0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0},
  60. /* Y: */ [JOB_STATUS_READY] = {0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0},
  61. /* S: */ [JOB_STATUS_STANDBY] = {0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0},
  62. /* W: */ [JOB_STATUS_WAITING] = {0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0},
  63. /* D: */ [JOB_STATUS_PENDING] = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0},
  64. /* X: */ [JOB_STATUS_ABORTING] = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0},
  65. /* E: */ [JOB_STATUS_CONCLUDED] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1},
  66. /* N: */ [JOB_STATUS_NULL] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
  67. };
  68. bool JobVerbTable[JOB_VERB__MAX][JOB_STATUS__MAX] = {
  69. /* U, C, R, P, Y, S, W, D, X, E, N */
  70. [JOB_VERB_CANCEL] = {0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0},
  71. [JOB_VERB_PAUSE] = {0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0},
  72. [JOB_VERB_RESUME] = {0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0},
  73. [JOB_VERB_SET_SPEED] = {0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0},
  74. [JOB_VERB_COMPLETE] = {0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0},
  75. [JOB_VERB_FINALIZE] = {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0},
  76. [JOB_VERB_DISMISS] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0},
  77. [JOB_VERB_CHANGE] = {0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0},
  78. };
  79. /* Transactional group of jobs */
  80. struct JobTxn {
  81. /* Is this txn being cancelled? */
  82. bool aborting;
  83. /* List of jobs */
  84. QLIST_HEAD(, Job) jobs;
  85. /* Reference count */
  86. int refcnt;
  87. };
  88. void job_lock(void)
  89. {
  90. qemu_mutex_lock(&job_mutex);
  91. }
  92. void job_unlock(void)
  93. {
  94. qemu_mutex_unlock(&job_mutex);
  95. }
  96. static void __attribute__((__constructor__)) job_init(void)
  97. {
  98. qemu_mutex_init(&job_mutex);
  99. }
  100. JobTxn *job_txn_new(void)
  101. {
  102. JobTxn *txn = g_new0(JobTxn, 1);
  103. QLIST_INIT(&txn->jobs);
  104. txn->refcnt = 1;
  105. return txn;
  106. }
  107. /* Called with job_mutex held. */
  108. static void job_txn_ref_locked(JobTxn *txn)
  109. {
  110. txn->refcnt++;
  111. }
  112. void job_txn_unref_locked(JobTxn *txn)
  113. {
  114. if (txn && --txn->refcnt == 0) {
  115. g_free(txn);
  116. }
  117. }
  118. void job_txn_unref(JobTxn *txn)
  119. {
  120. JOB_LOCK_GUARD();
  121. job_txn_unref_locked(txn);
  122. }
  123. /**
  124. * @txn: The transaction (may be NULL)
  125. * @job: Job to add to the transaction
  126. *
  127. * Add @job to the transaction. The @job must not already be in a transaction.
  128. * The caller must call either job_txn_unref() or job_completed() to release
  129. * the reference that is automatically grabbed here.
  130. *
  131. * If @txn is NULL, the function does nothing.
  132. *
  133. * Called with job_mutex held.
  134. */
  135. static void job_txn_add_job_locked(JobTxn *txn, Job *job)
  136. {
  137. if (!txn) {
  138. return;
  139. }
  140. assert(!job->txn);
  141. job->txn = txn;
  142. QLIST_INSERT_HEAD(&txn->jobs, job, txn_list);
  143. job_txn_ref_locked(txn);
  144. }
  145. /* Called with job_mutex held. */
  146. static void job_txn_del_job_locked(Job *job)
  147. {
  148. if (job->txn) {
  149. QLIST_REMOVE(job, txn_list);
  150. job_txn_unref_locked(job->txn);
  151. job->txn = NULL;
  152. }
  153. }
  154. /* Called with job_mutex held, but releases it temporarily. */
  155. static int job_txn_apply_locked(Job *job, int fn(Job *))
  156. {
  157. Job *other_job, *next;
  158. JobTxn *txn = job->txn;
  159. int rc = 0;
  160. /*
  161. * Similar to job_completed_txn_abort, we take each job's lock before
  162. * applying fn, but since we assume that outer_ctx is held by the caller,
  163. * we need to release it here to avoid holding the lock twice - which would
  164. * break AIO_WAIT_WHILE from within fn.
  165. */
  166. job_ref_locked(job);
  167. QLIST_FOREACH_SAFE(other_job, &txn->jobs, txn_list, next) {
  168. rc = fn(other_job);
  169. if (rc) {
  170. break;
  171. }
  172. }
  173. job_unref_locked(job);
  174. return rc;
  175. }
  176. bool job_is_internal(Job *job)
  177. {
  178. return (job->id == NULL);
  179. }
  180. /* Called with job_mutex held. */
  181. static void job_state_transition_locked(Job *job, JobStatus s1)
  182. {
  183. JobStatus s0 = job->status;
  184. assert(s1 >= 0 && s1 < JOB_STATUS__MAX);
  185. trace_job_state_transition(job, job->ret,
  186. JobSTT[s0][s1] ? "allowed" : "disallowed",
  187. JobStatus_str(s0), JobStatus_str(s1));
  188. assert(JobSTT[s0][s1]);
  189. job->status = s1;
  190. if (!job_is_internal(job) && s1 != s0) {
  191. qapi_event_send_job_status_change(job->id, job->status);
  192. }
  193. }
  194. int job_apply_verb_locked(Job *job, JobVerb verb, Error **errp)
  195. {
  196. JobStatus s0 = job->status;
  197. assert(verb >= 0 && verb < JOB_VERB__MAX);
  198. trace_job_apply_verb(job, JobStatus_str(s0), JobVerb_str(verb),
  199. JobVerbTable[verb][s0] ? "allowed" : "prohibited");
  200. if (JobVerbTable[verb][s0]) {
  201. return 0;
  202. }
  203. error_setg(errp, "Job '%s' in state '%s' cannot accept command verb '%s'",
  204. job->id, JobStatus_str(s0), JobVerb_str(verb));
  205. return -EPERM;
  206. }
  207. JobType job_type(const Job *job)
  208. {
  209. return job->driver->job_type;
  210. }
  211. const char *job_type_str(const Job *job)
  212. {
  213. return JobType_str(job_type(job));
  214. }
  215. bool job_is_cancelled_locked(Job *job)
  216. {
  217. /* force_cancel may be true only if cancelled is true, too */
  218. assert(job->cancelled || !job->force_cancel);
  219. return job->force_cancel;
  220. }
  221. bool job_is_cancelled(Job *job)
  222. {
  223. JOB_LOCK_GUARD();
  224. return job_is_cancelled_locked(job);
  225. }
  226. /* Called with job_mutex held. */
  227. static bool job_cancel_requested_locked(Job *job)
  228. {
  229. return job->cancelled;
  230. }
  231. bool job_cancel_requested(Job *job)
  232. {
  233. JOB_LOCK_GUARD();
  234. return job_cancel_requested_locked(job);
  235. }
  236. bool job_is_ready_locked(Job *job)
  237. {
  238. switch (job->status) {
  239. case JOB_STATUS_UNDEFINED:
  240. case JOB_STATUS_CREATED:
  241. case JOB_STATUS_RUNNING:
  242. case JOB_STATUS_PAUSED:
  243. case JOB_STATUS_WAITING:
  244. case JOB_STATUS_PENDING:
  245. case JOB_STATUS_ABORTING:
  246. case JOB_STATUS_CONCLUDED:
  247. case JOB_STATUS_NULL:
  248. return false;
  249. case JOB_STATUS_READY:
  250. case JOB_STATUS_STANDBY:
  251. return true;
  252. default:
  253. g_assert_not_reached();
  254. }
  255. return false;
  256. }
  257. bool job_is_ready(Job *job)
  258. {
  259. JOB_LOCK_GUARD();
  260. return job_is_ready_locked(job);
  261. }
  262. bool job_is_completed_locked(Job *job)
  263. {
  264. switch (job->status) {
  265. case JOB_STATUS_UNDEFINED:
  266. case JOB_STATUS_CREATED:
  267. case JOB_STATUS_RUNNING:
  268. case JOB_STATUS_PAUSED:
  269. case JOB_STATUS_READY:
  270. case JOB_STATUS_STANDBY:
  271. return false;
  272. case JOB_STATUS_WAITING:
  273. case JOB_STATUS_PENDING:
  274. case JOB_STATUS_ABORTING:
  275. case JOB_STATUS_CONCLUDED:
  276. case JOB_STATUS_NULL:
  277. return true;
  278. default:
  279. g_assert_not_reached();
  280. }
  281. return false;
  282. }
  283. static bool job_is_completed(Job *job)
  284. {
  285. JOB_LOCK_GUARD();
  286. return job_is_completed_locked(job);
  287. }
  288. static bool job_started_locked(Job *job)
  289. {
  290. return job->co;
  291. }
  292. /* Called with job_mutex held. */
  293. static bool job_should_pause_locked(Job *job)
  294. {
  295. return job->pause_count > 0;
  296. }
  297. Job *job_next_locked(Job *job)
  298. {
  299. if (!job) {
  300. return QLIST_FIRST(&jobs);
  301. }
  302. return QLIST_NEXT(job, job_list);
  303. }
  304. Job *job_next(Job *job)
  305. {
  306. JOB_LOCK_GUARD();
  307. return job_next_locked(job);
  308. }
  309. Job *job_get_locked(const char *id)
  310. {
  311. Job *job;
  312. QLIST_FOREACH(job, &jobs, job_list) {
  313. if (job->id && !strcmp(id, job->id)) {
  314. return job;
  315. }
  316. }
  317. return NULL;
  318. }
  319. void job_set_aio_context(Job *job, AioContext *ctx)
  320. {
  321. /* protect against read in job_finish_sync_locked and job_start */
  322. GLOBAL_STATE_CODE();
  323. /* protect against read in job_do_yield_locked */
  324. JOB_LOCK_GUARD();
  325. /* ensure the job is quiescent while the AioContext is changed */
  326. assert(job->paused || job_is_completed_locked(job));
  327. job->aio_context = ctx;
  328. }
  329. /* Called with job_mutex *not* held. */
  330. static void job_sleep_timer_cb(void *opaque)
  331. {
  332. Job *job = opaque;
  333. job_enter(job);
  334. }
  335. void *job_create(const char *job_id, const JobDriver *driver, JobTxn *txn,
  336. AioContext *ctx, int flags, BlockCompletionFunc *cb,
  337. void *opaque, Error **errp)
  338. {
  339. Job *job;
  340. JOB_LOCK_GUARD();
  341. if (job_id) {
  342. if (flags & JOB_INTERNAL) {
  343. error_setg(errp, "Cannot specify job ID for internal job");
  344. return NULL;
  345. }
  346. if (!id_wellformed(job_id)) {
  347. error_setg(errp, "Invalid job ID '%s'", job_id);
  348. return NULL;
  349. }
  350. if (job_get_locked(job_id)) {
  351. error_setg(errp, "Job ID '%s' already in use", job_id);
  352. return NULL;
  353. }
  354. } else if (!(flags & JOB_INTERNAL)) {
  355. error_setg(errp, "An explicit job ID is required");
  356. return NULL;
  357. }
  358. job = g_malloc0(driver->instance_size);
  359. job->driver = driver;
  360. job->id = g_strdup(job_id);
  361. job->refcnt = 1;
  362. job->aio_context = ctx;
  363. job->busy = false;
  364. job->paused = true;
  365. job->pause_count = 1;
  366. job->auto_finalize = !(flags & JOB_MANUAL_FINALIZE);
  367. job->auto_dismiss = !(flags & JOB_MANUAL_DISMISS);
  368. job->cb = cb;
  369. job->opaque = opaque;
  370. progress_init(&job->progress);
  371. notifier_list_init(&job->on_finalize_cancelled);
  372. notifier_list_init(&job->on_finalize_completed);
  373. notifier_list_init(&job->on_pending);
  374. notifier_list_init(&job->on_ready);
  375. notifier_list_init(&job->on_idle);
  376. job_state_transition_locked(job, JOB_STATUS_CREATED);
  377. aio_timer_init(qemu_get_aio_context(), &job->sleep_timer,
  378. QEMU_CLOCK_REALTIME, SCALE_NS,
  379. job_sleep_timer_cb, job);
  380. QLIST_INSERT_HEAD(&jobs, job, job_list);
  381. /* Single jobs are modeled as single-job transactions for sake of
  382. * consolidating the job management logic */
  383. if (!txn) {
  384. txn = job_txn_new();
  385. job_txn_add_job_locked(txn, job);
  386. job_txn_unref_locked(txn);
  387. } else {
  388. job_txn_add_job_locked(txn, job);
  389. }
  390. return job;
  391. }
  392. void job_ref_locked(Job *job)
  393. {
  394. ++job->refcnt;
  395. }
  396. void job_unref_locked(Job *job)
  397. {
  398. GLOBAL_STATE_CODE();
  399. if (--job->refcnt == 0) {
  400. assert(job->status == JOB_STATUS_NULL);
  401. assert(!timer_pending(&job->sleep_timer));
  402. assert(!job->txn);
  403. if (job->driver->free) {
  404. job_unlock();
  405. job->driver->free(job);
  406. job_lock();
  407. }
  408. QLIST_REMOVE(job, job_list);
  409. progress_destroy(&job->progress);
  410. error_free(job->err);
  411. g_free(job->id);
  412. g_free(job);
  413. }
  414. }
  415. void job_progress_update(Job *job, uint64_t done)
  416. {
  417. progress_work_done(&job->progress, done);
  418. }
  419. void job_progress_set_remaining(Job *job, uint64_t remaining)
  420. {
  421. progress_set_remaining(&job->progress, remaining);
  422. }
  423. void job_progress_increase_remaining(Job *job, uint64_t delta)
  424. {
  425. progress_increase_remaining(&job->progress, delta);
  426. }
  427. /**
  428. * To be called when a cancelled job is finalised.
  429. * Called with job_mutex held.
  430. */
  431. static void job_event_cancelled_locked(Job *job)
  432. {
  433. notifier_list_notify(&job->on_finalize_cancelled, job);
  434. }
  435. /**
  436. * To be called when a successfully completed job is finalised.
  437. * Called with job_mutex held.
  438. */
  439. static void job_event_completed_locked(Job *job)
  440. {
  441. notifier_list_notify(&job->on_finalize_completed, job);
  442. }
  443. /* Called with job_mutex held. */
  444. static void job_event_pending_locked(Job *job)
  445. {
  446. notifier_list_notify(&job->on_pending, job);
  447. }
  448. /* Called with job_mutex held. */
  449. static void job_event_ready_locked(Job *job)
  450. {
  451. notifier_list_notify(&job->on_ready, job);
  452. }
  453. /* Called with job_mutex held. */
  454. static void job_event_idle_locked(Job *job)
  455. {
  456. notifier_list_notify(&job->on_idle, job);
  457. }
  458. void job_enter_cond_locked(Job *job, bool(*fn)(Job *job))
  459. {
  460. if (!job_started_locked(job)) {
  461. return;
  462. }
  463. if (job->deferred_to_main_loop) {
  464. return;
  465. }
  466. if (job->busy) {
  467. return;
  468. }
  469. if (fn && !fn(job)) {
  470. return;
  471. }
  472. assert(!job->deferred_to_main_loop);
  473. timer_del(&job->sleep_timer);
  474. job->busy = true;
  475. job_unlock();
  476. aio_co_wake(job->co);
  477. job_lock();
  478. }
  479. void job_enter(Job *job)
  480. {
  481. JOB_LOCK_GUARD();
  482. job_enter_cond_locked(job, NULL);
  483. }
  484. /* Yield, and schedule a timer to reenter the coroutine after @ns nanoseconds.
  485. * Reentering the job coroutine with job_enter() before the timer has expired
  486. * is allowed and cancels the timer.
  487. *
  488. * If @ns is (uint64_t) -1, no timer is scheduled and job_enter() must be
  489. * called explicitly.
  490. *
  491. * Called with job_mutex held, but releases it temporarily.
  492. */
  493. static void coroutine_fn job_do_yield_locked(Job *job, uint64_t ns)
  494. {
  495. AioContext *next_aio_context;
  496. if (ns != -1) {
  497. timer_mod(&job->sleep_timer, ns);
  498. }
  499. job->busy = false;
  500. job_event_idle_locked(job);
  501. job_unlock();
  502. qemu_coroutine_yield();
  503. job_lock();
  504. next_aio_context = job->aio_context;
  505. /*
  506. * Coroutine has resumed, but in the meanwhile the job AioContext
  507. * might have changed via bdrv_try_change_aio_context(), so we need to move
  508. * the coroutine too in the new aiocontext.
  509. */
  510. while (qemu_get_current_aio_context() != next_aio_context) {
  511. job_unlock();
  512. aio_co_reschedule_self(next_aio_context);
  513. job_lock();
  514. next_aio_context = job->aio_context;
  515. }
  516. /* Set by job_enter_cond_locked() before re-entering the coroutine. */
  517. assert(job->busy);
  518. }
  519. /* Called with job_mutex held, but releases it temporarily. */
  520. static void coroutine_fn job_pause_point_locked(Job *job)
  521. {
  522. assert(job && job_started_locked(job));
  523. if (!job_should_pause_locked(job)) {
  524. return;
  525. }
  526. if (job_is_cancelled_locked(job)) {
  527. return;
  528. }
  529. if (job->driver->pause) {
  530. job_unlock();
  531. job->driver->pause(job);
  532. job_lock();
  533. }
  534. if (job_should_pause_locked(job) && !job_is_cancelled_locked(job)) {
  535. JobStatus status = job->status;
  536. job_state_transition_locked(job, status == JOB_STATUS_READY
  537. ? JOB_STATUS_STANDBY
  538. : JOB_STATUS_PAUSED);
  539. job->paused = true;
  540. job_do_yield_locked(job, -1);
  541. job->paused = false;
  542. job_state_transition_locked(job, status);
  543. }
  544. if (job->driver->resume) {
  545. job_unlock();
  546. job->driver->resume(job);
  547. job_lock();
  548. }
  549. }
  550. void coroutine_fn job_pause_point(Job *job)
  551. {
  552. JOB_LOCK_GUARD();
  553. job_pause_point_locked(job);
  554. }
  555. void coroutine_fn job_yield(Job *job)
  556. {
  557. JOB_LOCK_GUARD();
  558. assert(job->busy);
  559. /* Check cancellation *before* setting busy = false, too! */
  560. if (job_is_cancelled_locked(job)) {
  561. return;
  562. }
  563. if (!job_should_pause_locked(job)) {
  564. job_do_yield_locked(job, -1);
  565. }
  566. job_pause_point_locked(job);
  567. }
  568. void coroutine_fn job_sleep_ns(Job *job, int64_t ns)
  569. {
  570. JOB_LOCK_GUARD();
  571. assert(job->busy);
  572. /* Check cancellation *before* setting busy = false, too! */
  573. if (job_is_cancelled_locked(job)) {
  574. return;
  575. }
  576. if (!job_should_pause_locked(job)) {
  577. job_do_yield_locked(job, qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + ns);
  578. }
  579. job_pause_point_locked(job);
  580. }
  581. /* Assumes the job_mutex is held */
  582. static bool job_timer_not_pending_locked(Job *job)
  583. {
  584. return !timer_pending(&job->sleep_timer);
  585. }
  586. void job_pause_locked(Job *job)
  587. {
  588. job->pause_count++;
  589. if (!job->paused) {
  590. job_enter_cond_locked(job, NULL);
  591. }
  592. }
  593. void job_pause(Job *job)
  594. {
  595. JOB_LOCK_GUARD();
  596. job_pause_locked(job);
  597. }
  598. void job_resume_locked(Job *job)
  599. {
  600. assert(job->pause_count > 0);
  601. job->pause_count--;
  602. if (job->pause_count) {
  603. return;
  604. }
  605. /* kick only if no timer is pending */
  606. job_enter_cond_locked(job, job_timer_not_pending_locked);
  607. }
  608. void job_resume(Job *job)
  609. {
  610. JOB_LOCK_GUARD();
  611. job_resume_locked(job);
  612. }
  613. void job_user_pause_locked(Job *job, Error **errp)
  614. {
  615. if (job_apply_verb_locked(job, JOB_VERB_PAUSE, errp)) {
  616. return;
  617. }
  618. if (job->user_paused) {
  619. error_setg(errp, "Job is already paused");
  620. return;
  621. }
  622. job->user_paused = true;
  623. job_pause_locked(job);
  624. }
  625. bool job_user_paused_locked(Job *job)
  626. {
  627. return job->user_paused;
  628. }
  629. void job_user_resume_locked(Job *job, Error **errp)
  630. {
  631. assert(job);
  632. GLOBAL_STATE_CODE();
  633. if (!job->user_paused || job->pause_count <= 0) {
  634. error_setg(errp, "Can't resume a job that was not paused");
  635. return;
  636. }
  637. if (job_apply_verb_locked(job, JOB_VERB_RESUME, errp)) {
  638. return;
  639. }
  640. if (job->driver->user_resume) {
  641. job_unlock();
  642. job->driver->user_resume(job);
  643. job_lock();
  644. }
  645. job->user_paused = false;
  646. job_resume_locked(job);
  647. }
  648. /* Called with job_mutex held, but releases it temporarily. */
  649. static void job_do_dismiss_locked(Job *job)
  650. {
  651. assert(job);
  652. job->busy = false;
  653. job->paused = false;
  654. job->deferred_to_main_loop = true;
  655. job_txn_del_job_locked(job);
  656. job_state_transition_locked(job, JOB_STATUS_NULL);
  657. job_unref_locked(job);
  658. }
  659. void job_dismiss_locked(Job **jobptr, Error **errp)
  660. {
  661. Job *job = *jobptr;
  662. /* similarly to _complete, this is QMP-interface only. */
  663. assert(job->id);
  664. if (job_apply_verb_locked(job, JOB_VERB_DISMISS, errp)) {
  665. return;
  666. }
  667. job_do_dismiss_locked(job);
  668. *jobptr = NULL;
  669. }
  670. void job_early_fail(Job *job)
  671. {
  672. JOB_LOCK_GUARD();
  673. assert(job->status == JOB_STATUS_CREATED);
  674. job_do_dismiss_locked(job);
  675. }
  676. /* Called with job_mutex held. */
  677. static void job_conclude_locked(Job *job)
  678. {
  679. job_state_transition_locked(job, JOB_STATUS_CONCLUDED);
  680. if (job->auto_dismiss || !job_started_locked(job)) {
  681. job_do_dismiss_locked(job);
  682. }
  683. }
  684. /* Called with job_mutex held. */
  685. static void job_update_rc_locked(Job *job)
  686. {
  687. if (!job->ret && job_is_cancelled_locked(job)) {
  688. job->ret = -ECANCELED;
  689. }
  690. if (job->ret) {
  691. if (!job->err) {
  692. error_setg(&job->err, "%s", strerror(-job->ret));
  693. }
  694. job_state_transition_locked(job, JOB_STATUS_ABORTING);
  695. }
  696. }
  697. static void job_commit(Job *job)
  698. {
  699. assert(!job->ret);
  700. GLOBAL_STATE_CODE();
  701. if (job->driver->commit) {
  702. job->driver->commit(job);
  703. }
  704. }
  705. static void job_abort(Job *job)
  706. {
  707. assert(job->ret);
  708. GLOBAL_STATE_CODE();
  709. if (job->driver->abort) {
  710. job->driver->abort(job);
  711. }
  712. }
  713. static void job_clean(Job *job)
  714. {
  715. GLOBAL_STATE_CODE();
  716. if (job->driver->clean) {
  717. job->driver->clean(job);
  718. }
  719. }
  720. /*
  721. * Called with job_mutex held, but releases it temporarily.
  722. */
  723. static int job_finalize_single_locked(Job *job)
  724. {
  725. int job_ret;
  726. assert(job_is_completed_locked(job));
  727. /* Ensure abort is called for late-transactional failures */
  728. job_update_rc_locked(job);
  729. job_ret = job->ret;
  730. job_unlock();
  731. if (!job_ret) {
  732. job_commit(job);
  733. } else {
  734. job_abort(job);
  735. }
  736. job_clean(job);
  737. if (job->cb) {
  738. job->cb(job->opaque, job_ret);
  739. }
  740. job_lock();
  741. /* Emit events only if we actually started */
  742. if (job_started_locked(job)) {
  743. if (job_is_cancelled_locked(job)) {
  744. job_event_cancelled_locked(job);
  745. } else {
  746. job_event_completed_locked(job);
  747. }
  748. }
  749. job_txn_del_job_locked(job);
  750. job_conclude_locked(job);
  751. return 0;
  752. }
  753. /*
  754. * Called with job_mutex held, but releases it temporarily.
  755. */
  756. static void job_cancel_async_locked(Job *job, bool force)
  757. {
  758. GLOBAL_STATE_CODE();
  759. if (job->driver->cancel) {
  760. job_unlock();
  761. force = job->driver->cancel(job, force);
  762. job_lock();
  763. } else {
  764. /* No .cancel() means the job will behave as if force-cancelled */
  765. force = true;
  766. }
  767. if (job->user_paused) {
  768. /* Do not call job_enter here, the caller will handle it. */
  769. if (job->driver->user_resume) {
  770. job_unlock();
  771. job->driver->user_resume(job);
  772. job_lock();
  773. }
  774. job->user_paused = false;
  775. assert(job->pause_count > 0);
  776. job->pause_count--;
  777. }
  778. /*
  779. * Ignore soft cancel requests after the job is already done
  780. * (We will still invoke job->driver->cancel() above, but if the
  781. * job driver supports soft cancelling and the job is done, that
  782. * should be a no-op, too. We still call it so it can override
  783. * @force.)
  784. */
  785. if (force || !job->deferred_to_main_loop) {
  786. job->cancelled = true;
  787. /* To prevent 'force == false' overriding a previous 'force == true' */
  788. job->force_cancel |= force;
  789. }
  790. }
  791. /*
  792. * Called with job_mutex held, but releases it temporarily.
  793. */
  794. static void job_completed_txn_abort_locked(Job *job)
  795. {
  796. JobTxn *txn = job->txn;
  797. Job *other_job;
  798. if (txn->aborting) {
  799. /*
  800. * We are cancelled by another job, which will handle everything.
  801. */
  802. return;
  803. }
  804. txn->aborting = true;
  805. job_txn_ref_locked(txn);
  806. job_ref_locked(job);
  807. /* Other jobs are effectively cancelled by us, set the status for
  808. * them; this job, however, may or may not be cancelled, depending
  809. * on the caller, so leave it. */
  810. QLIST_FOREACH(other_job, &txn->jobs, txn_list) {
  811. if (other_job != job) {
  812. /*
  813. * This is a transaction: If one job failed, no result will matter.
  814. * Therefore, pass force=true to terminate all other jobs as quickly
  815. * as possible.
  816. */
  817. job_cancel_async_locked(other_job, true);
  818. }
  819. }
  820. while (!QLIST_EMPTY(&txn->jobs)) {
  821. other_job = QLIST_FIRST(&txn->jobs);
  822. if (!job_is_completed_locked(other_job)) {
  823. assert(job_cancel_requested_locked(other_job));
  824. job_finish_sync_locked(other_job, NULL, NULL);
  825. }
  826. job_finalize_single_locked(other_job);
  827. }
  828. job_unref_locked(job);
  829. job_txn_unref_locked(txn);
  830. }
  831. /* Called with job_mutex held, but releases it temporarily */
  832. static int job_prepare_locked(Job *job)
  833. {
  834. int ret;
  835. GLOBAL_STATE_CODE();
  836. if (job->ret == 0 && job->driver->prepare) {
  837. job_unlock();
  838. ret = job->driver->prepare(job);
  839. job_lock();
  840. job->ret = ret;
  841. job_update_rc_locked(job);
  842. }
  843. return job->ret;
  844. }
  845. /* Called with job_mutex held */
  846. static int job_needs_finalize_locked(Job *job)
  847. {
  848. return !job->auto_finalize;
  849. }
  850. /* Called with job_mutex held */
  851. static void job_do_finalize_locked(Job *job)
  852. {
  853. int rc;
  854. assert(job && job->txn);
  855. /* prepare the transaction to complete */
  856. rc = job_txn_apply_locked(job, job_prepare_locked);
  857. if (rc) {
  858. job_completed_txn_abort_locked(job);
  859. } else {
  860. job_txn_apply_locked(job, job_finalize_single_locked);
  861. }
  862. }
  863. void job_finalize_locked(Job *job, Error **errp)
  864. {
  865. assert(job && job->id);
  866. if (job_apply_verb_locked(job, JOB_VERB_FINALIZE, errp)) {
  867. return;
  868. }
  869. job_do_finalize_locked(job);
  870. }
  871. /* Called with job_mutex held. */
  872. static int job_transition_to_pending_locked(Job *job)
  873. {
  874. job_state_transition_locked(job, JOB_STATUS_PENDING);
  875. if (!job->auto_finalize) {
  876. job_event_pending_locked(job);
  877. }
  878. return 0;
  879. }
  880. void job_transition_to_ready(Job *job)
  881. {
  882. JOB_LOCK_GUARD();
  883. job_state_transition_locked(job, JOB_STATUS_READY);
  884. job_event_ready_locked(job);
  885. }
  886. /* Called with job_mutex held. */
  887. static void job_completed_txn_success_locked(Job *job)
  888. {
  889. JobTxn *txn = job->txn;
  890. Job *other_job;
  891. job_state_transition_locked(job, JOB_STATUS_WAITING);
  892. /*
  893. * Successful completion, see if there are other running jobs in this
  894. * txn.
  895. */
  896. QLIST_FOREACH(other_job, &txn->jobs, txn_list) {
  897. if (!job_is_completed_locked(other_job)) {
  898. return;
  899. }
  900. assert(other_job->ret == 0);
  901. }
  902. job_txn_apply_locked(job, job_transition_to_pending_locked);
  903. /* If no jobs need manual finalization, automatically do so */
  904. if (job_txn_apply_locked(job, job_needs_finalize_locked) == 0) {
  905. job_do_finalize_locked(job);
  906. }
  907. }
  908. /* Called with job_mutex held. */
  909. static void job_completed_locked(Job *job)
  910. {
  911. assert(job && job->txn && !job_is_completed_locked(job));
  912. job_update_rc_locked(job);
  913. trace_job_completed(job, job->ret);
  914. if (job->ret) {
  915. job_completed_txn_abort_locked(job);
  916. } else {
  917. job_completed_txn_success_locked(job);
  918. }
  919. }
  920. /**
  921. * Useful only as a type shim for aio_bh_schedule_oneshot.
  922. * Called with job_mutex *not* held.
  923. */
  924. static void job_exit(void *opaque)
  925. {
  926. Job *job = (Job *)opaque;
  927. JOB_LOCK_GUARD();
  928. job_ref_locked(job);
  929. /* This is a lie, we're not quiescent, but still doing the completion
  930. * callbacks. However, completion callbacks tend to involve operations that
  931. * drain block nodes, and if .drained_poll still returned true, we would
  932. * deadlock. */
  933. job->busy = false;
  934. job_event_idle_locked(job);
  935. job_completed_locked(job);
  936. job_unref_locked(job);
  937. }
  938. /**
  939. * All jobs must allow a pause point before entering their job proper. This
  940. * ensures that jobs can be paused prior to being started, then resumed later.
  941. */
  942. static void coroutine_fn job_co_entry(void *opaque)
  943. {
  944. Job *job = opaque;
  945. int ret;
  946. assert(job && job->driver && job->driver->run);
  947. WITH_JOB_LOCK_GUARD() {
  948. assert(job->aio_context == qemu_get_current_aio_context());
  949. job_pause_point_locked(job);
  950. }
  951. ret = job->driver->run(job, &job->err);
  952. WITH_JOB_LOCK_GUARD() {
  953. job->ret = ret;
  954. job->deferred_to_main_loop = true;
  955. job->busy = true;
  956. }
  957. aio_bh_schedule_oneshot(qemu_get_aio_context(), job_exit, job);
  958. }
  959. void job_start(Job *job)
  960. {
  961. assert(qemu_in_main_thread());
  962. WITH_JOB_LOCK_GUARD() {
  963. assert(job && !job_started_locked(job) && job->paused &&
  964. job->driver && job->driver->run);
  965. job->co = qemu_coroutine_create(job_co_entry, job);
  966. job->pause_count--;
  967. job->busy = true;
  968. job->paused = false;
  969. job_state_transition_locked(job, JOB_STATUS_RUNNING);
  970. }
  971. aio_co_enter(job->aio_context, job->co);
  972. }
  973. void job_cancel_locked(Job *job, bool force)
  974. {
  975. if (job->status == JOB_STATUS_CONCLUDED) {
  976. job_do_dismiss_locked(job);
  977. return;
  978. }
  979. job_cancel_async_locked(job, force);
  980. if (!job_started_locked(job)) {
  981. job_completed_locked(job);
  982. } else if (job->deferred_to_main_loop) {
  983. /*
  984. * job_cancel_async() ignores soft-cancel requests for jobs
  985. * that are already done (i.e. deferred to the main loop). We
  986. * have to check again whether the job is really cancelled.
  987. * (job_cancel_requested() and job_is_cancelled() are equivalent
  988. * here, because job_cancel_async() will make soft-cancel
  989. * requests no-ops when deferred_to_main_loop is true. We
  990. * choose to call job_is_cancelled() to show that we invoke
  991. * job_completed_txn_abort() only for force-cancelled jobs.)
  992. */
  993. if (job_is_cancelled_locked(job)) {
  994. job_completed_txn_abort_locked(job);
  995. }
  996. } else {
  997. job_enter_cond_locked(job, NULL);
  998. }
  999. }
  1000. void job_user_cancel_locked(Job *job, bool force, Error **errp)
  1001. {
  1002. if (job_apply_verb_locked(job, JOB_VERB_CANCEL, errp)) {
  1003. return;
  1004. }
  1005. job_cancel_locked(job, force);
  1006. }
  1007. /* A wrapper around job_cancel_locked() taking an Error ** parameter so it may
  1008. * be used with job_finish_sync_locked() without the need for (rather nasty)
  1009. * function pointer casts there.
  1010. *
  1011. * Called with job_mutex held.
  1012. */
  1013. static void job_cancel_err_locked(Job *job, Error **errp)
  1014. {
  1015. job_cancel_locked(job, false);
  1016. }
  1017. /**
  1018. * Same as job_cancel_err(), but force-cancel.
  1019. * Called with job_mutex held.
  1020. */
  1021. static void job_force_cancel_err_locked(Job *job, Error **errp)
  1022. {
  1023. job_cancel_locked(job, true);
  1024. }
  1025. int job_cancel_sync_locked(Job *job, bool force)
  1026. {
  1027. if (force) {
  1028. return job_finish_sync_locked(job, &job_force_cancel_err_locked, NULL);
  1029. } else {
  1030. return job_finish_sync_locked(job, &job_cancel_err_locked, NULL);
  1031. }
  1032. }
  1033. int job_cancel_sync(Job *job, bool force)
  1034. {
  1035. JOB_LOCK_GUARD();
  1036. return job_cancel_sync_locked(job, force);
  1037. }
  1038. void job_cancel_sync_all(void)
  1039. {
  1040. Job *job;
  1041. JOB_LOCK_GUARD();
  1042. while ((job = job_next_locked(NULL))) {
  1043. job_cancel_sync_locked(job, true);
  1044. }
  1045. }
  1046. int job_complete_sync_locked(Job *job, Error **errp)
  1047. {
  1048. return job_finish_sync_locked(job, job_complete_locked, errp);
  1049. }
  1050. void job_complete_locked(Job *job, Error **errp)
  1051. {
  1052. /* Should not be reachable via external interface for internal jobs */
  1053. assert(job->id);
  1054. GLOBAL_STATE_CODE();
  1055. if (job_apply_verb_locked(job, JOB_VERB_COMPLETE, errp)) {
  1056. return;
  1057. }
  1058. if (job_cancel_requested_locked(job) || !job->driver->complete) {
  1059. error_setg(errp, "The active block job '%s' cannot be completed",
  1060. job->id);
  1061. return;
  1062. }
  1063. job_unlock();
  1064. job->driver->complete(job, errp);
  1065. job_lock();
  1066. }
  1067. int job_finish_sync_locked(Job *job,
  1068. void (*finish)(Job *, Error **errp),
  1069. Error **errp)
  1070. {
  1071. Error *local_err = NULL;
  1072. int ret;
  1073. GLOBAL_STATE_CODE();
  1074. job_ref_locked(job);
  1075. if (finish) {
  1076. finish(job, &local_err);
  1077. }
  1078. if (local_err) {
  1079. error_propagate(errp, local_err);
  1080. job_unref_locked(job);
  1081. return -EBUSY;
  1082. }
  1083. job_unlock();
  1084. AIO_WAIT_WHILE_UNLOCKED(job->aio_context,
  1085. (job_enter(job), !job_is_completed(job)));
  1086. job_lock();
  1087. ret = (job_is_cancelled_locked(job) && job->ret == 0)
  1088. ? -ECANCELED : job->ret;
  1089. job_unref_locked(job);
  1090. return ret;
  1091. }