job.c 32 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270
  1. /*
  2. * Background jobs (long-running operations)
  3. *
  4. * Copyright (c) 2011 IBM Corp.
  5. * Copyright (c) 2012, 2018 Red Hat, Inc.
  6. *
  7. * Permission is hereby granted, free of charge, to any person obtaining a copy
  8. * of this software and associated documentation files (the "Software"), to deal
  9. * in the Software without restriction, including without limitation the rights
  10. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11. * copies of the Software, and to permit persons to whom the Software is
  12. * furnished to do so, subject to the following conditions:
  13. *
  14. * The above copyright notice and this permission notice shall be included in
  15. * all copies or substantial portions of the Software.
  16. *
  17. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23. * THE SOFTWARE.
  24. */
  25. #include "qemu/osdep.h"
  26. #include "qapi/error.h"
  27. #include "qemu/job.h"
  28. #include "qemu/id.h"
  29. #include "qemu/main-loop.h"
  30. #include "block/aio-wait.h"
  31. #include "trace/trace-root.h"
  32. #include "qapi/qapi-events-job.h"
  33. /*
  34. * The job API is composed of two categories of functions.
  35. *
  36. * The first includes functions used by the monitor. The monitor is
  37. * peculiar in that it accesses the job list with job_get, and
  38. * therefore needs consistency across job_get and the actual operation
  39. * (e.g. job_user_cancel). To achieve this consistency, the caller
  40. * calls job_lock/job_unlock itself around the whole operation.
  41. *
  42. *
  43. * The second includes functions used by the job drivers and sometimes
  44. * by the core block layer. These delegate the locking to the callee instead.
  45. */
  46. /*
  47. * job_mutex protects the jobs list, but also makes the
  48. * struct job fields thread-safe.
  49. */
  50. QemuMutex job_mutex;
  51. /* Protected by job_mutex */
  52. static QLIST_HEAD(, Job) jobs = QLIST_HEAD_INITIALIZER(jobs);
  53. /* Job State Transition Table */
  54. bool JobSTT[JOB_STATUS__MAX][JOB_STATUS__MAX] = {
  55. /* U, C, R, P, Y, S, W, D, X, E, N */
  56. /* U: */ [JOB_STATUS_UNDEFINED] = {0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
  57. /* C: */ [JOB_STATUS_CREATED] = {0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1},
  58. /* R: */ [JOB_STATUS_RUNNING] = {0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0},
  59. /* P: */ [JOB_STATUS_PAUSED] = {0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0},
  60. /* Y: */ [JOB_STATUS_READY] = {0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0},
  61. /* S: */ [JOB_STATUS_STANDBY] = {0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0},
  62. /* W: */ [JOB_STATUS_WAITING] = {0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0},
  63. /* D: */ [JOB_STATUS_PENDING] = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0},
  64. /* X: */ [JOB_STATUS_ABORTING] = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0},
  65. /* E: */ [JOB_STATUS_CONCLUDED] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1},
  66. /* N: */ [JOB_STATUS_NULL] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
  67. };
  68. bool JobVerbTable[JOB_VERB__MAX][JOB_STATUS__MAX] = {
  69. /* U, C, R, P, Y, S, W, D, X, E, N */
  70. [JOB_VERB_CANCEL] = {0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0},
  71. [JOB_VERB_PAUSE] = {0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0},
  72. [JOB_VERB_RESUME] = {0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0},
  73. [JOB_VERB_SET_SPEED] = {0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0},
  74. [JOB_VERB_COMPLETE] = {0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0},
  75. [JOB_VERB_FINALIZE] = {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0},
  76. [JOB_VERB_DISMISS] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0},
  77. [JOB_VERB_CHANGE] = {0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0},
  78. };
  79. /* Transactional group of jobs */
  80. struct JobTxn {
  81. /* Is this txn being cancelled? */
  82. bool aborting;
  83. /* List of jobs */
  84. QLIST_HEAD(, Job) jobs;
  85. /* Reference count */
  86. int refcnt;
  87. };
  88. void job_lock(void)
  89. {
  90. qemu_mutex_lock(&job_mutex);
  91. }
  92. void job_unlock(void)
  93. {
  94. qemu_mutex_unlock(&job_mutex);
  95. }
  96. static void __attribute__((__constructor__)) job_init(void)
  97. {
  98. qemu_mutex_init(&job_mutex);
  99. }
  100. JobTxn *job_txn_new(void)
  101. {
  102. JobTxn *txn = g_new0(JobTxn, 1);
  103. QLIST_INIT(&txn->jobs);
  104. txn->refcnt = 1;
  105. return txn;
  106. }
  107. /* Called with job_mutex held. */
  108. static void job_txn_ref_locked(JobTxn *txn)
  109. {
  110. txn->refcnt++;
  111. }
  112. void job_txn_unref_locked(JobTxn *txn)
  113. {
  114. if (txn && --txn->refcnt == 0) {
  115. g_free(txn);
  116. }
  117. }
  118. void job_txn_unref(JobTxn *txn)
  119. {
  120. JOB_LOCK_GUARD();
  121. job_txn_unref_locked(txn);
  122. }
  123. /**
  124. * @txn: The transaction (may be NULL)
  125. * @job: Job to add to the transaction
  126. *
  127. * Add @job to the transaction. The @job must not already be in a transaction.
  128. * The caller must call either job_txn_unref() or job_completed() to release
  129. * the reference that is automatically grabbed here.
  130. *
  131. * If @txn is NULL, the function does nothing.
  132. *
  133. * Called with job_mutex held.
  134. */
  135. static void job_txn_add_job_locked(JobTxn *txn, Job *job)
  136. {
  137. if (!txn) {
  138. return;
  139. }
  140. assert(!job->txn);
  141. job->txn = txn;
  142. QLIST_INSERT_HEAD(&txn->jobs, job, txn_list);
  143. job_txn_ref_locked(txn);
  144. }
  145. /* Called with job_mutex held. */
  146. static void job_txn_del_job_locked(Job *job)
  147. {
  148. if (job->txn) {
  149. QLIST_REMOVE(job, txn_list);
  150. job_txn_unref_locked(job->txn);
  151. job->txn = NULL;
  152. }
  153. }
  154. /* Called with job_mutex held, but releases it temporarily. */
  155. static int job_txn_apply_locked(Job *job, int fn(Job *))
  156. {
  157. Job *other_job, *next;
  158. JobTxn *txn = job->txn;
  159. int rc = 0;
  160. /*
  161. * Similar to job_completed_txn_abort, we take each job's lock before
  162. * applying fn, but since we assume that outer_ctx is held by the caller,
  163. * we need to release it here to avoid holding the lock twice - which would
  164. * break AIO_WAIT_WHILE from within fn.
  165. */
  166. job_ref_locked(job);
  167. QLIST_FOREACH_SAFE(other_job, &txn->jobs, txn_list, next) {
  168. rc = fn(other_job);
  169. if (rc) {
  170. break;
  171. }
  172. }
  173. job_unref_locked(job);
  174. return rc;
  175. }
  176. bool job_is_internal(Job *job)
  177. {
  178. return (job->id == NULL);
  179. }
  180. /* Called with job_mutex held. */
  181. static void job_state_transition_locked(Job *job, JobStatus s1)
  182. {
  183. JobStatus s0 = job->status;
  184. assert(s1 >= 0 && s1 < JOB_STATUS__MAX);
  185. trace_job_state_transition(job, job->ret,
  186. JobSTT[s0][s1] ? "allowed" : "disallowed",
  187. JobStatus_str(s0), JobStatus_str(s1));
  188. assert(JobSTT[s0][s1]);
  189. job->status = s1;
  190. if (!job_is_internal(job) && s1 != s0) {
  191. qapi_event_send_job_status_change(job->id, job->status);
  192. }
  193. }
  194. int job_apply_verb_locked(Job *job, JobVerb verb, Error **errp)
  195. {
  196. JobStatus s0 = job->status;
  197. assert(verb >= 0 && verb < JOB_VERB__MAX);
  198. trace_job_apply_verb(job, JobStatus_str(s0), JobVerb_str(verb),
  199. JobVerbTable[verb][s0] ? "allowed" : "prohibited");
  200. if (JobVerbTable[verb][s0]) {
  201. return 0;
  202. }
  203. error_setg(errp, "Job '%s' in state '%s' cannot accept command verb '%s'",
  204. job->id, JobStatus_str(s0), JobVerb_str(verb));
  205. return -EPERM;
  206. }
  207. JobType job_type(const Job *job)
  208. {
  209. return job->driver->job_type;
  210. }
  211. const char *job_type_str(const Job *job)
  212. {
  213. return JobType_str(job_type(job));
  214. }
  215. bool job_is_cancelled_locked(Job *job)
  216. {
  217. /* force_cancel may be true only if cancelled is true, too */
  218. assert(job->cancelled || !job->force_cancel);
  219. return job->force_cancel;
  220. }
  221. bool job_is_paused(Job *job)
  222. {
  223. JOB_LOCK_GUARD();
  224. return job->paused;
  225. }
  226. bool job_is_cancelled(Job *job)
  227. {
  228. JOB_LOCK_GUARD();
  229. return job_is_cancelled_locked(job);
  230. }
  231. /* Called with job_mutex held. */
  232. static bool job_cancel_requested_locked(Job *job)
  233. {
  234. return job->cancelled;
  235. }
  236. bool job_cancel_requested(Job *job)
  237. {
  238. JOB_LOCK_GUARD();
  239. return job_cancel_requested_locked(job);
  240. }
  241. bool job_is_ready_locked(Job *job)
  242. {
  243. switch (job->status) {
  244. case JOB_STATUS_UNDEFINED:
  245. case JOB_STATUS_CREATED:
  246. case JOB_STATUS_RUNNING:
  247. case JOB_STATUS_PAUSED:
  248. case JOB_STATUS_WAITING:
  249. case JOB_STATUS_PENDING:
  250. case JOB_STATUS_ABORTING:
  251. case JOB_STATUS_CONCLUDED:
  252. case JOB_STATUS_NULL:
  253. return false;
  254. case JOB_STATUS_READY:
  255. case JOB_STATUS_STANDBY:
  256. return true;
  257. default:
  258. g_assert_not_reached();
  259. }
  260. return false;
  261. }
  262. bool job_is_ready(Job *job)
  263. {
  264. JOB_LOCK_GUARD();
  265. return job_is_ready_locked(job);
  266. }
  267. bool job_is_completed_locked(Job *job)
  268. {
  269. switch (job->status) {
  270. case JOB_STATUS_UNDEFINED:
  271. case JOB_STATUS_CREATED:
  272. case JOB_STATUS_RUNNING:
  273. case JOB_STATUS_PAUSED:
  274. case JOB_STATUS_READY:
  275. case JOB_STATUS_STANDBY:
  276. return false;
  277. case JOB_STATUS_WAITING:
  278. case JOB_STATUS_PENDING:
  279. case JOB_STATUS_ABORTING:
  280. case JOB_STATUS_CONCLUDED:
  281. case JOB_STATUS_NULL:
  282. return true;
  283. default:
  284. g_assert_not_reached();
  285. }
  286. return false;
  287. }
  288. static bool job_is_completed(Job *job)
  289. {
  290. JOB_LOCK_GUARD();
  291. return job_is_completed_locked(job);
  292. }
  293. static bool job_started_locked(Job *job)
  294. {
  295. return job->co;
  296. }
  297. /* Called with job_mutex held. */
  298. static bool job_should_pause_locked(Job *job)
  299. {
  300. return job->pause_count > 0;
  301. }
  302. Job *job_next_locked(Job *job)
  303. {
  304. if (!job) {
  305. return QLIST_FIRST(&jobs);
  306. }
  307. return QLIST_NEXT(job, job_list);
  308. }
  309. Job *job_next(Job *job)
  310. {
  311. JOB_LOCK_GUARD();
  312. return job_next_locked(job);
  313. }
  314. Job *job_get_locked(const char *id)
  315. {
  316. Job *job;
  317. QLIST_FOREACH(job, &jobs, job_list) {
  318. if (job->id && !strcmp(id, job->id)) {
  319. return job;
  320. }
  321. }
  322. return NULL;
  323. }
  324. void job_set_aio_context(Job *job, AioContext *ctx)
  325. {
  326. /* protect against read in job_finish_sync_locked and job_start */
  327. GLOBAL_STATE_CODE();
  328. /* protect against read in job_do_yield_locked */
  329. JOB_LOCK_GUARD();
  330. /* ensure the job is quiescent while the AioContext is changed */
  331. assert(job->paused || job_is_completed_locked(job));
  332. job->aio_context = ctx;
  333. }
  334. /* Called with job_mutex *not* held. */
  335. static void job_sleep_timer_cb(void *opaque)
  336. {
  337. Job *job = opaque;
  338. job_enter(job);
  339. }
  340. void *job_create(const char *job_id, const JobDriver *driver, JobTxn *txn,
  341. AioContext *ctx, int flags, BlockCompletionFunc *cb,
  342. void *opaque, Error **errp)
  343. {
  344. Job *job;
  345. JOB_LOCK_GUARD();
  346. if (job_id) {
  347. if (flags & JOB_INTERNAL) {
  348. error_setg(errp, "Cannot specify job ID for internal job");
  349. return NULL;
  350. }
  351. if (!id_wellformed(job_id)) {
  352. error_setg(errp, "Invalid job ID '%s'", job_id);
  353. return NULL;
  354. }
  355. if (job_get_locked(job_id)) {
  356. error_setg(errp, "Job ID '%s' already in use", job_id);
  357. return NULL;
  358. }
  359. } else if (!(flags & JOB_INTERNAL)) {
  360. error_setg(errp, "An explicit job ID is required");
  361. return NULL;
  362. }
  363. job = g_malloc0(driver->instance_size);
  364. job->driver = driver;
  365. job->id = g_strdup(job_id);
  366. job->refcnt = 1;
  367. job->aio_context = ctx;
  368. job->busy = false;
  369. job->paused = true;
  370. job->pause_count = 1;
  371. job->auto_finalize = !(flags & JOB_MANUAL_FINALIZE);
  372. job->auto_dismiss = !(flags & JOB_MANUAL_DISMISS);
  373. job->cb = cb;
  374. job->opaque = opaque;
  375. progress_init(&job->progress);
  376. notifier_list_init(&job->on_finalize_cancelled);
  377. notifier_list_init(&job->on_finalize_completed);
  378. notifier_list_init(&job->on_pending);
  379. notifier_list_init(&job->on_ready);
  380. notifier_list_init(&job->on_idle);
  381. job_state_transition_locked(job, JOB_STATUS_CREATED);
  382. aio_timer_init(qemu_get_aio_context(), &job->sleep_timer,
  383. QEMU_CLOCK_REALTIME, SCALE_NS,
  384. job_sleep_timer_cb, job);
  385. QLIST_INSERT_HEAD(&jobs, job, job_list);
  386. /* Single jobs are modeled as single-job transactions for sake of
  387. * consolidating the job management logic */
  388. if (!txn) {
  389. txn = job_txn_new();
  390. job_txn_add_job_locked(txn, job);
  391. job_txn_unref_locked(txn);
  392. } else {
  393. job_txn_add_job_locked(txn, job);
  394. }
  395. return job;
  396. }
  397. void job_ref_locked(Job *job)
  398. {
  399. ++job->refcnt;
  400. }
  401. void job_unref_locked(Job *job)
  402. {
  403. GLOBAL_STATE_CODE();
  404. if (--job->refcnt == 0) {
  405. assert(job->status == JOB_STATUS_NULL);
  406. assert(!timer_pending(&job->sleep_timer));
  407. assert(!job->txn);
  408. if (job->driver->free) {
  409. job_unlock();
  410. job->driver->free(job);
  411. job_lock();
  412. }
  413. QLIST_REMOVE(job, job_list);
  414. progress_destroy(&job->progress);
  415. error_free(job->err);
  416. g_free(job->id);
  417. g_free(job);
  418. }
  419. }
  420. void job_progress_update(Job *job, uint64_t done)
  421. {
  422. progress_work_done(&job->progress, done);
  423. }
  424. void job_progress_set_remaining(Job *job, uint64_t remaining)
  425. {
  426. progress_set_remaining(&job->progress, remaining);
  427. }
  428. void job_progress_increase_remaining(Job *job, uint64_t delta)
  429. {
  430. progress_increase_remaining(&job->progress, delta);
  431. }
  432. /**
  433. * To be called when a cancelled job is finalised.
  434. * Called with job_mutex held.
  435. */
  436. static void job_event_cancelled_locked(Job *job)
  437. {
  438. notifier_list_notify(&job->on_finalize_cancelled, job);
  439. }
  440. /**
  441. * To be called when a successfully completed job is finalised.
  442. * Called with job_mutex held.
  443. */
  444. static void job_event_completed_locked(Job *job)
  445. {
  446. notifier_list_notify(&job->on_finalize_completed, job);
  447. }
  448. /* Called with job_mutex held. */
  449. static void job_event_pending_locked(Job *job)
  450. {
  451. notifier_list_notify(&job->on_pending, job);
  452. }
  453. /* Called with job_mutex held. */
  454. static void job_event_ready_locked(Job *job)
  455. {
  456. notifier_list_notify(&job->on_ready, job);
  457. }
  458. /* Called with job_mutex held. */
  459. static void job_event_idle_locked(Job *job)
  460. {
  461. notifier_list_notify(&job->on_idle, job);
  462. }
  463. void job_enter_cond_locked(Job *job, bool(*fn)(Job *job))
  464. {
  465. if (!job_started_locked(job)) {
  466. return;
  467. }
  468. if (job->deferred_to_main_loop) {
  469. return;
  470. }
  471. if (job->busy) {
  472. return;
  473. }
  474. if (fn && !fn(job)) {
  475. return;
  476. }
  477. assert(!job->deferred_to_main_loop);
  478. timer_del(&job->sleep_timer);
  479. job->busy = true;
  480. job_unlock();
  481. aio_co_wake(job->co);
  482. job_lock();
  483. }
  484. void job_enter(Job *job)
  485. {
  486. JOB_LOCK_GUARD();
  487. job_enter_cond_locked(job, NULL);
  488. }
  489. /* Yield, and schedule a timer to reenter the coroutine after @ns nanoseconds.
  490. * Reentering the job coroutine with job_enter() before the timer has expired
  491. * is allowed and cancels the timer.
  492. *
  493. * If @ns is (uint64_t) -1, no timer is scheduled and job_enter() must be
  494. * called explicitly.
  495. *
  496. * Called with job_mutex held, but releases it temporarily.
  497. */
  498. static void coroutine_fn job_do_yield_locked(Job *job, uint64_t ns)
  499. {
  500. AioContext *next_aio_context;
  501. if (ns != -1) {
  502. timer_mod(&job->sleep_timer, ns);
  503. }
  504. job->busy = false;
  505. job_event_idle_locked(job);
  506. job_unlock();
  507. qemu_coroutine_yield();
  508. job_lock();
  509. next_aio_context = job->aio_context;
  510. /*
  511. * Coroutine has resumed, but in the meanwhile the job AioContext
  512. * might have changed via bdrv_try_change_aio_context(), so we need to move
  513. * the coroutine too in the new aiocontext.
  514. */
  515. while (qemu_get_current_aio_context() != next_aio_context) {
  516. job_unlock();
  517. aio_co_reschedule_self(next_aio_context);
  518. job_lock();
  519. next_aio_context = job->aio_context;
  520. }
  521. /* Set by job_enter_cond_locked() before re-entering the coroutine. */
  522. assert(job->busy);
  523. }
  524. /* Called with job_mutex held, but releases it temporarily. */
  525. static void coroutine_fn job_pause_point_locked(Job *job)
  526. {
  527. assert(job && job_started_locked(job));
  528. if (!job_should_pause_locked(job)) {
  529. return;
  530. }
  531. if (job_is_cancelled_locked(job)) {
  532. return;
  533. }
  534. if (job->driver->pause) {
  535. job_unlock();
  536. job->driver->pause(job);
  537. job_lock();
  538. }
  539. if (job_should_pause_locked(job) && !job_is_cancelled_locked(job)) {
  540. JobStatus status = job->status;
  541. job_state_transition_locked(job, status == JOB_STATUS_READY
  542. ? JOB_STATUS_STANDBY
  543. : JOB_STATUS_PAUSED);
  544. job->paused = true;
  545. job_do_yield_locked(job, -1);
  546. job->paused = false;
  547. job_state_transition_locked(job, status);
  548. }
  549. if (job->driver->resume) {
  550. job_unlock();
  551. job->driver->resume(job);
  552. job_lock();
  553. }
  554. }
  555. void coroutine_fn job_pause_point(Job *job)
  556. {
  557. JOB_LOCK_GUARD();
  558. job_pause_point_locked(job);
  559. }
  560. void coroutine_fn job_yield(Job *job)
  561. {
  562. JOB_LOCK_GUARD();
  563. assert(job->busy);
  564. /* Check cancellation *before* setting busy = false, too! */
  565. if (job_is_cancelled_locked(job)) {
  566. return;
  567. }
  568. if (!job_should_pause_locked(job)) {
  569. job_do_yield_locked(job, -1);
  570. }
  571. job_pause_point_locked(job);
  572. }
  573. void coroutine_fn job_sleep_ns(Job *job, int64_t ns)
  574. {
  575. JOB_LOCK_GUARD();
  576. assert(job->busy);
  577. /* Check cancellation *before* setting busy = false, too! */
  578. if (job_is_cancelled_locked(job)) {
  579. return;
  580. }
  581. if (!job_should_pause_locked(job)) {
  582. job_do_yield_locked(job, qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + ns);
  583. }
  584. job_pause_point_locked(job);
  585. }
  586. /* Assumes the job_mutex is held */
  587. static bool job_timer_not_pending_locked(Job *job)
  588. {
  589. return !timer_pending(&job->sleep_timer);
  590. }
  591. void job_pause_locked(Job *job)
  592. {
  593. job->pause_count++;
  594. if (!job->paused) {
  595. job_enter_cond_locked(job, NULL);
  596. }
  597. }
  598. void job_pause(Job *job)
  599. {
  600. JOB_LOCK_GUARD();
  601. job_pause_locked(job);
  602. }
  603. void job_resume_locked(Job *job)
  604. {
  605. assert(job->pause_count > 0);
  606. job->pause_count--;
  607. if (job->pause_count) {
  608. return;
  609. }
  610. /* kick only if no timer is pending */
  611. job_enter_cond_locked(job, job_timer_not_pending_locked);
  612. }
  613. void job_resume(Job *job)
  614. {
  615. JOB_LOCK_GUARD();
  616. job_resume_locked(job);
  617. }
  618. void job_user_pause_locked(Job *job, Error **errp)
  619. {
  620. if (job_apply_verb_locked(job, JOB_VERB_PAUSE, errp)) {
  621. return;
  622. }
  623. if (job->user_paused) {
  624. error_setg(errp, "Job is already paused");
  625. return;
  626. }
  627. job->user_paused = true;
  628. job_pause_locked(job);
  629. }
  630. bool job_user_paused_locked(Job *job)
  631. {
  632. return job->user_paused;
  633. }
  634. void job_user_resume_locked(Job *job, Error **errp)
  635. {
  636. assert(job);
  637. GLOBAL_STATE_CODE();
  638. if (!job->user_paused || job->pause_count <= 0) {
  639. error_setg(errp, "Can't resume a job that was not paused");
  640. return;
  641. }
  642. if (job_apply_verb_locked(job, JOB_VERB_RESUME, errp)) {
  643. return;
  644. }
  645. if (job->driver->user_resume) {
  646. job_unlock();
  647. job->driver->user_resume(job);
  648. job_lock();
  649. }
  650. job->user_paused = false;
  651. job_resume_locked(job);
  652. }
  653. /* Called with job_mutex held, but releases it temporarily. */
  654. static void job_do_dismiss_locked(Job *job)
  655. {
  656. assert(job);
  657. job->busy = false;
  658. job->paused = false;
  659. job->deferred_to_main_loop = true;
  660. job_txn_del_job_locked(job);
  661. job_state_transition_locked(job, JOB_STATUS_NULL);
  662. job_unref_locked(job);
  663. }
  664. void job_dismiss_locked(Job **jobptr, Error **errp)
  665. {
  666. Job *job = *jobptr;
  667. /* similarly to _complete, this is QMP-interface only. */
  668. assert(job->id);
  669. if (job_apply_verb_locked(job, JOB_VERB_DISMISS, errp)) {
  670. return;
  671. }
  672. job_do_dismiss_locked(job);
  673. *jobptr = NULL;
  674. }
  675. void job_early_fail(Job *job)
  676. {
  677. JOB_LOCK_GUARD();
  678. assert(job->status == JOB_STATUS_CREATED);
  679. job_do_dismiss_locked(job);
  680. }
  681. /* Called with job_mutex held. */
  682. static void job_conclude_locked(Job *job)
  683. {
  684. job_state_transition_locked(job, JOB_STATUS_CONCLUDED);
  685. if (job->auto_dismiss || !job_started_locked(job)) {
  686. job_do_dismiss_locked(job);
  687. }
  688. }
  689. /* Called with job_mutex held. */
  690. static void job_update_rc_locked(Job *job)
  691. {
  692. if (!job->ret && job_is_cancelled_locked(job)) {
  693. job->ret = -ECANCELED;
  694. }
  695. if (job->ret) {
  696. if (!job->err) {
  697. error_setg(&job->err, "%s", strerror(-job->ret));
  698. }
  699. job_state_transition_locked(job, JOB_STATUS_ABORTING);
  700. }
  701. }
  702. static void job_commit(Job *job)
  703. {
  704. assert(!job->ret);
  705. GLOBAL_STATE_CODE();
  706. if (job->driver->commit) {
  707. job->driver->commit(job);
  708. }
  709. }
  710. static void job_abort(Job *job)
  711. {
  712. assert(job->ret);
  713. GLOBAL_STATE_CODE();
  714. if (job->driver->abort) {
  715. job->driver->abort(job);
  716. }
  717. }
  718. static void job_clean(Job *job)
  719. {
  720. GLOBAL_STATE_CODE();
  721. if (job->driver->clean) {
  722. job->driver->clean(job);
  723. }
  724. }
  725. /*
  726. * Called with job_mutex held, but releases it temporarily.
  727. */
  728. static int job_finalize_single_locked(Job *job)
  729. {
  730. int job_ret;
  731. assert(job_is_completed_locked(job));
  732. /* Ensure abort is called for late-transactional failures */
  733. job_update_rc_locked(job);
  734. job_ret = job->ret;
  735. job_unlock();
  736. if (!job_ret) {
  737. job_commit(job);
  738. } else {
  739. job_abort(job);
  740. }
  741. job_clean(job);
  742. if (job->cb) {
  743. job->cb(job->opaque, job_ret);
  744. }
  745. job_lock();
  746. /* Emit events only if we actually started */
  747. if (job_started_locked(job)) {
  748. if (job_is_cancelled_locked(job)) {
  749. job_event_cancelled_locked(job);
  750. } else {
  751. job_event_completed_locked(job);
  752. }
  753. }
  754. job_txn_del_job_locked(job);
  755. job_conclude_locked(job);
  756. return 0;
  757. }
  758. /*
  759. * Called with job_mutex held, but releases it temporarily.
  760. */
  761. static void job_cancel_async_locked(Job *job, bool force)
  762. {
  763. GLOBAL_STATE_CODE();
  764. if (job->driver->cancel) {
  765. job_unlock();
  766. force = job->driver->cancel(job, force);
  767. job_lock();
  768. } else {
  769. /* No .cancel() means the job will behave as if force-cancelled */
  770. force = true;
  771. }
  772. if (job->user_paused) {
  773. /* Do not call job_enter here, the caller will handle it. */
  774. if (job->driver->user_resume) {
  775. job_unlock();
  776. job->driver->user_resume(job);
  777. job_lock();
  778. }
  779. job->user_paused = false;
  780. assert(job->pause_count > 0);
  781. job->pause_count--;
  782. }
  783. /*
  784. * Ignore soft cancel requests after the job is already done
  785. * (We will still invoke job->driver->cancel() above, but if the
  786. * job driver supports soft cancelling and the job is done, that
  787. * should be a no-op, too. We still call it so it can override
  788. * @force.)
  789. */
  790. if (force || !job->deferred_to_main_loop) {
  791. job->cancelled = true;
  792. /* To prevent 'force == false' overriding a previous 'force == true' */
  793. job->force_cancel |= force;
  794. }
  795. }
  796. /*
  797. * Called with job_mutex held, but releases it temporarily.
  798. */
  799. static void job_completed_txn_abort_locked(Job *job)
  800. {
  801. JobTxn *txn = job->txn;
  802. Job *other_job;
  803. if (txn->aborting) {
  804. /*
  805. * We are cancelled by another job, which will handle everything.
  806. */
  807. return;
  808. }
  809. txn->aborting = true;
  810. job_txn_ref_locked(txn);
  811. job_ref_locked(job);
  812. /* Other jobs are effectively cancelled by us, set the status for
  813. * them; this job, however, may or may not be cancelled, depending
  814. * on the caller, so leave it. */
  815. QLIST_FOREACH(other_job, &txn->jobs, txn_list) {
  816. if (other_job != job) {
  817. /*
  818. * This is a transaction: If one job failed, no result will matter.
  819. * Therefore, pass force=true to terminate all other jobs as quickly
  820. * as possible.
  821. */
  822. job_cancel_async_locked(other_job, true);
  823. }
  824. }
  825. while (!QLIST_EMPTY(&txn->jobs)) {
  826. other_job = QLIST_FIRST(&txn->jobs);
  827. if (!job_is_completed_locked(other_job)) {
  828. assert(job_cancel_requested_locked(other_job));
  829. job_finish_sync_locked(other_job, NULL, NULL);
  830. }
  831. job_finalize_single_locked(other_job);
  832. }
  833. job_unref_locked(job);
  834. job_txn_unref_locked(txn);
  835. }
  836. /* Called with job_mutex held, but releases it temporarily */
  837. static int job_prepare_locked(Job *job)
  838. {
  839. int ret;
  840. GLOBAL_STATE_CODE();
  841. if (job->ret == 0 && job->driver->prepare) {
  842. job_unlock();
  843. ret = job->driver->prepare(job);
  844. job_lock();
  845. job->ret = ret;
  846. job_update_rc_locked(job);
  847. }
  848. return job->ret;
  849. }
  850. /* Called with job_mutex held */
  851. static int job_needs_finalize_locked(Job *job)
  852. {
  853. return !job->auto_finalize;
  854. }
  855. /* Called with job_mutex held */
  856. static void job_do_finalize_locked(Job *job)
  857. {
  858. int rc;
  859. assert(job && job->txn);
  860. /* prepare the transaction to complete */
  861. rc = job_txn_apply_locked(job, job_prepare_locked);
  862. if (rc) {
  863. job_completed_txn_abort_locked(job);
  864. } else {
  865. job_txn_apply_locked(job, job_finalize_single_locked);
  866. }
  867. }
  868. void job_finalize_locked(Job *job, Error **errp)
  869. {
  870. assert(job && job->id);
  871. if (job_apply_verb_locked(job, JOB_VERB_FINALIZE, errp)) {
  872. return;
  873. }
  874. job_do_finalize_locked(job);
  875. }
  876. /* Called with job_mutex held. */
  877. static int job_transition_to_pending_locked(Job *job)
  878. {
  879. job_state_transition_locked(job, JOB_STATUS_PENDING);
  880. if (!job->auto_finalize) {
  881. job_event_pending_locked(job);
  882. }
  883. return 0;
  884. }
  885. void job_transition_to_ready(Job *job)
  886. {
  887. JOB_LOCK_GUARD();
  888. job_state_transition_locked(job, JOB_STATUS_READY);
  889. job_event_ready_locked(job);
  890. }
  891. /* Called with job_mutex held. */
  892. static void job_completed_txn_success_locked(Job *job)
  893. {
  894. JobTxn *txn = job->txn;
  895. Job *other_job;
  896. job_state_transition_locked(job, JOB_STATUS_WAITING);
  897. /*
  898. * Successful completion, see if there are other running jobs in this
  899. * txn.
  900. */
  901. QLIST_FOREACH(other_job, &txn->jobs, txn_list) {
  902. if (!job_is_completed_locked(other_job)) {
  903. return;
  904. }
  905. assert(other_job->ret == 0);
  906. }
  907. job_txn_apply_locked(job, job_transition_to_pending_locked);
  908. /* If no jobs need manual finalization, automatically do so */
  909. if (job_txn_apply_locked(job, job_needs_finalize_locked) == 0) {
  910. job_do_finalize_locked(job);
  911. }
  912. }
  913. /* Called with job_mutex held. */
  914. static void job_completed_locked(Job *job)
  915. {
  916. assert(job && job->txn && !job_is_completed_locked(job));
  917. job_update_rc_locked(job);
  918. trace_job_completed(job, job->ret);
  919. if (job->ret) {
  920. job_completed_txn_abort_locked(job);
  921. } else {
  922. job_completed_txn_success_locked(job);
  923. }
  924. }
  925. /**
  926. * Useful only as a type shim for aio_bh_schedule_oneshot.
  927. * Called with job_mutex *not* held.
  928. */
  929. static void job_exit(void *opaque)
  930. {
  931. Job *job = (Job *)opaque;
  932. JOB_LOCK_GUARD();
  933. job_ref_locked(job);
  934. /* This is a lie, we're not quiescent, but still doing the completion
  935. * callbacks. However, completion callbacks tend to involve operations that
  936. * drain block nodes, and if .drained_poll still returned true, we would
  937. * deadlock. */
  938. job->busy = false;
  939. job_event_idle_locked(job);
  940. job_completed_locked(job);
  941. job_unref_locked(job);
  942. }
  943. /**
  944. * All jobs must allow a pause point before entering their job proper. This
  945. * ensures that jobs can be paused prior to being started, then resumed later.
  946. */
  947. static void coroutine_fn job_co_entry(void *opaque)
  948. {
  949. Job *job = opaque;
  950. int ret;
  951. assert(job && job->driver && job->driver->run);
  952. WITH_JOB_LOCK_GUARD() {
  953. assert(job->aio_context == qemu_get_current_aio_context());
  954. job_pause_point_locked(job);
  955. }
  956. ret = job->driver->run(job, &job->err);
  957. WITH_JOB_LOCK_GUARD() {
  958. job->ret = ret;
  959. job->deferred_to_main_loop = true;
  960. job->busy = true;
  961. }
  962. aio_bh_schedule_oneshot(qemu_get_aio_context(), job_exit, job);
  963. }
  964. void job_start(Job *job)
  965. {
  966. assert(qemu_in_main_thread());
  967. WITH_JOB_LOCK_GUARD() {
  968. assert(job && !job_started_locked(job) && job->paused &&
  969. job->driver && job->driver->run);
  970. job->co = qemu_coroutine_create(job_co_entry, job);
  971. job->pause_count--;
  972. job->busy = true;
  973. job->paused = false;
  974. job_state_transition_locked(job, JOB_STATUS_RUNNING);
  975. }
  976. aio_co_enter(job->aio_context, job->co);
  977. }
  978. void job_cancel_locked(Job *job, bool force)
  979. {
  980. if (job->status == JOB_STATUS_CONCLUDED) {
  981. job_do_dismiss_locked(job);
  982. return;
  983. }
  984. job_cancel_async_locked(job, force);
  985. if (!job_started_locked(job)) {
  986. job_completed_locked(job);
  987. } else if (job->deferred_to_main_loop) {
  988. /*
  989. * job_cancel_async() ignores soft-cancel requests for jobs
  990. * that are already done (i.e. deferred to the main loop). We
  991. * have to check again whether the job is really cancelled.
  992. * (job_cancel_requested() and job_is_cancelled() are equivalent
  993. * here, because job_cancel_async() will make soft-cancel
  994. * requests no-ops when deferred_to_main_loop is true. We
  995. * choose to call job_is_cancelled() to show that we invoke
  996. * job_completed_txn_abort() only for force-cancelled jobs.)
  997. */
  998. if (job_is_cancelled_locked(job)) {
  999. job_completed_txn_abort_locked(job);
  1000. }
  1001. } else {
  1002. job_enter_cond_locked(job, NULL);
  1003. }
  1004. }
  1005. void job_user_cancel_locked(Job *job, bool force, Error **errp)
  1006. {
  1007. if (job_apply_verb_locked(job, JOB_VERB_CANCEL, errp)) {
  1008. return;
  1009. }
  1010. job_cancel_locked(job, force);
  1011. }
  1012. /* A wrapper around job_cancel_locked() taking an Error ** parameter so it may
  1013. * be used with job_finish_sync_locked() without the need for (rather nasty)
  1014. * function pointer casts there.
  1015. *
  1016. * Called with job_mutex held.
  1017. */
  1018. static void job_cancel_err_locked(Job *job, Error **errp)
  1019. {
  1020. job_cancel_locked(job, false);
  1021. }
  1022. /**
  1023. * Same as job_cancel_err(), but force-cancel.
  1024. * Called with job_mutex held.
  1025. */
  1026. static void job_force_cancel_err_locked(Job *job, Error **errp)
  1027. {
  1028. job_cancel_locked(job, true);
  1029. }
  1030. int job_cancel_sync_locked(Job *job, bool force)
  1031. {
  1032. if (force) {
  1033. return job_finish_sync_locked(job, &job_force_cancel_err_locked, NULL);
  1034. } else {
  1035. return job_finish_sync_locked(job, &job_cancel_err_locked, NULL);
  1036. }
  1037. }
  1038. int job_cancel_sync(Job *job, bool force)
  1039. {
  1040. JOB_LOCK_GUARD();
  1041. return job_cancel_sync_locked(job, force);
  1042. }
  1043. void job_cancel_sync_all(void)
  1044. {
  1045. Job *job;
  1046. JOB_LOCK_GUARD();
  1047. while ((job = job_next_locked(NULL))) {
  1048. job_cancel_sync_locked(job, true);
  1049. }
  1050. }
  1051. int job_complete_sync_locked(Job *job, Error **errp)
  1052. {
  1053. return job_finish_sync_locked(job, job_complete_locked, errp);
  1054. }
  1055. void job_complete_locked(Job *job, Error **errp)
  1056. {
  1057. /* Should not be reachable via external interface for internal jobs */
  1058. assert(job->id);
  1059. GLOBAL_STATE_CODE();
  1060. if (job_apply_verb_locked(job, JOB_VERB_COMPLETE, errp)) {
  1061. return;
  1062. }
  1063. if (job_cancel_requested_locked(job) || !job->driver->complete) {
  1064. error_setg(errp, "The active block job '%s' cannot be completed",
  1065. job->id);
  1066. return;
  1067. }
  1068. job_unlock();
  1069. job->driver->complete(job, errp);
  1070. job_lock();
  1071. }
  1072. int job_finish_sync_locked(Job *job,
  1073. void (*finish)(Job *, Error **errp),
  1074. Error **errp)
  1075. {
  1076. Error *local_err = NULL;
  1077. int ret;
  1078. GLOBAL_STATE_CODE();
  1079. job_ref_locked(job);
  1080. if (finish) {
  1081. finish(job, &local_err);
  1082. }
  1083. if (local_err) {
  1084. error_propagate(errp, local_err);
  1085. job_unref_locked(job);
  1086. return -EBUSY;
  1087. }
  1088. job_unlock();
  1089. AIO_WAIT_WHILE_UNLOCKED(job->aio_context,
  1090. (job_enter(job), !job_is_completed(job)));
  1091. job_lock();
  1092. ret = (job_is_cancelled_locked(job) && job->ret == 0)
  1093. ? -ECANCELED : job->ret;
  1094. job_unref_locked(job);
  1095. return ret;
  1096. }