block-backend.c 76 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853
  1. /*
  2. * QEMU Block backends
  3. *
  4. * Copyright (C) 2014-2016 Red Hat, Inc.
  5. *
  6. * Authors:
  7. * Markus Armbruster <armbru@redhat.com>,
  8. *
  9. * This work is licensed under the terms of the GNU LGPL, version 2.1
  10. * or later. See the COPYING.LIB file in the top-level directory.
  11. */
  12. #include "qemu/osdep.h"
  13. #include "system/block-backend.h"
  14. #include "block/block_int.h"
  15. #include "block/blockjob.h"
  16. #include "block/coroutines.h"
  17. #include "block/throttle-groups.h"
  18. #include "hw/qdev-core.h"
  19. #include "system/blockdev.h"
  20. #include "system/runstate.h"
  21. #include "system/replay.h"
  22. #include "qapi/error.h"
  23. #include "qapi/qapi-events-block.h"
  24. #include "qemu/id.h"
  25. #include "qemu/main-loop.h"
  26. #include "qemu/option.h"
  27. #include "trace.h"
  28. #include "migration/misc.h"
  29. /* Number of coroutines to reserve per attached device model */
  30. #define COROUTINE_POOL_RESERVATION 64
  31. #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
  32. typedef struct BlockBackendAioNotifier {
  33. void (*attached_aio_context)(AioContext *new_context, void *opaque);
  34. void (*detach_aio_context)(void *opaque);
  35. void *opaque;
  36. QLIST_ENTRY(BlockBackendAioNotifier) list;
  37. } BlockBackendAioNotifier;
  38. struct BlockBackend {
  39. char *name;
  40. int refcnt;
  41. BdrvChild *root;
  42. AioContext *ctx; /* access with atomic operations only */
  43. DriveInfo *legacy_dinfo; /* null unless created by drive_new() */
  44. QTAILQ_ENTRY(BlockBackend) link; /* for block_backends */
  45. QTAILQ_ENTRY(BlockBackend) monitor_link; /* for monitor_block_backends */
  46. BlockBackendPublic public;
  47. DeviceState *dev; /* attached device model, if any */
  48. const BlockDevOps *dev_ops;
  49. void *dev_opaque;
  50. /* If the BDS tree is removed, some of its options are stored here (which
  51. * can be used to restore those options in the new BDS on insert) */
  52. BlockBackendRootState root_state;
  53. bool enable_write_cache;
  54. /* I/O stats (display with "info blockstats"). */
  55. BlockAcctStats stats;
  56. BlockdevOnError on_read_error, on_write_error;
  57. bool iostatus_enabled;
  58. BlockDeviceIoStatus iostatus;
  59. uint64_t perm;
  60. uint64_t shared_perm;
  61. bool disable_perm;
  62. bool allow_aio_context_change;
  63. bool allow_write_beyond_eof;
  64. /* Protected by BQL */
  65. NotifierList remove_bs_notifiers, insert_bs_notifiers;
  66. QLIST_HEAD(, BlockBackendAioNotifier) aio_notifiers;
  67. int quiesce_counter; /* atomic: written under BQL, read by other threads */
  68. QemuMutex queued_requests_lock; /* protects queued_requests */
  69. CoQueue queued_requests;
  70. bool disable_request_queuing; /* atomic */
  71. VMChangeStateEntry *vmsh;
  72. bool force_allow_inactivate;
  73. /* Number of in-flight aio requests. BlockDriverState also counts
  74. * in-flight requests but aio requests can exist even when blk->root is
  75. * NULL, so we cannot rely on its counter for that case.
  76. * Accessed with atomic ops.
  77. */
  78. unsigned int in_flight;
  79. };
  80. typedef struct BlockBackendAIOCB {
  81. BlockAIOCB common;
  82. BlockBackend *blk;
  83. int ret;
  84. } BlockBackendAIOCB;
  85. static const AIOCBInfo block_backend_aiocb_info = {
  86. .aiocb_size = sizeof(BlockBackendAIOCB),
  87. };
  88. static void drive_info_del(DriveInfo *dinfo);
  89. static BlockBackend *bdrv_first_blk(BlockDriverState *bs);
  90. /* All BlockBackends. Protected by BQL. */
  91. static QTAILQ_HEAD(, BlockBackend) block_backends =
  92. QTAILQ_HEAD_INITIALIZER(block_backends);
  93. /*
  94. * All BlockBackends referenced by the monitor and which are iterated through by
  95. * blk_next(). Protected by BQL.
  96. */
  97. static QTAILQ_HEAD(, BlockBackend) monitor_block_backends =
  98. QTAILQ_HEAD_INITIALIZER(monitor_block_backends);
  99. static int coroutine_mixed_fn GRAPH_RDLOCK
  100. blk_set_perm_locked(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
  101. Error **errp);
  102. static void blk_root_inherit_options(BdrvChildRole role, bool parent_is_format,
  103. int *child_flags, QDict *child_options,
  104. int parent_flags, QDict *parent_options)
  105. {
  106. /* We're not supposed to call this function for root nodes */
  107. abort();
  108. }
  109. static void blk_root_drained_begin(BdrvChild *child);
  110. static bool blk_root_drained_poll(BdrvChild *child);
  111. static void blk_root_drained_end(BdrvChild *child);
  112. static void blk_root_change_media(BdrvChild *child, bool load);
  113. static void blk_root_resize(BdrvChild *child);
  114. static bool blk_root_change_aio_ctx(BdrvChild *child, AioContext *ctx,
  115. GHashTable *visited, Transaction *tran,
  116. Error **errp);
  117. static char *blk_root_get_parent_desc(BdrvChild *child)
  118. {
  119. BlockBackend *blk = child->opaque;
  120. g_autofree char *dev_id = NULL;
  121. if (blk->name) {
  122. return g_strdup_printf("block device '%s'", blk->name);
  123. }
  124. dev_id = blk_get_attached_dev_id(blk);
  125. if (*dev_id) {
  126. return g_strdup_printf("block device '%s'", dev_id);
  127. } else {
  128. /* TODO Callback into the BB owner for something more detailed */
  129. return g_strdup("an unnamed block device");
  130. }
  131. }
  132. static const char *blk_root_get_name(BdrvChild *child)
  133. {
  134. return blk_name(child->opaque);
  135. }
  136. static void blk_vm_state_changed(void *opaque, bool running, RunState state)
  137. {
  138. Error *local_err = NULL;
  139. BlockBackend *blk = opaque;
  140. if (state == RUN_STATE_INMIGRATE) {
  141. return;
  142. }
  143. qemu_del_vm_change_state_handler(blk->vmsh);
  144. blk->vmsh = NULL;
  145. blk_set_perm(blk, blk->perm, blk->shared_perm, &local_err);
  146. if (local_err) {
  147. error_report_err(local_err);
  148. }
  149. }
  150. /*
  151. * Notifies the user of the BlockBackend that migration has completed. qdev
  152. * devices can tighten their permissions in response (specifically revoke
  153. * shared write permissions that we needed for storage migration).
  154. *
  155. * If an error is returned, the VM cannot be allowed to be resumed.
  156. */
  157. static void GRAPH_RDLOCK blk_root_activate(BdrvChild *child, Error **errp)
  158. {
  159. BlockBackend *blk = child->opaque;
  160. Error *local_err = NULL;
  161. uint64_t saved_shared_perm;
  162. if (!blk->disable_perm) {
  163. return;
  164. }
  165. blk->disable_perm = false;
  166. /*
  167. * blk->shared_perm contains the permissions we want to share once
  168. * migration is really completely done. For now, we need to share
  169. * all; but we also need to retain blk->shared_perm, which is
  170. * overwritten by a successful blk_set_perm() call. Save it and
  171. * restore it below.
  172. */
  173. saved_shared_perm = blk->shared_perm;
  174. blk_set_perm_locked(blk, blk->perm, BLK_PERM_ALL, &local_err);
  175. if (local_err) {
  176. error_propagate(errp, local_err);
  177. blk->disable_perm = true;
  178. return;
  179. }
  180. blk->shared_perm = saved_shared_perm;
  181. if (runstate_check(RUN_STATE_INMIGRATE)) {
  182. /* Activation can happen when migration process is still active, for
  183. * example when nbd_server_add is called during non-shared storage
  184. * migration. Defer the shared_perm update to migration completion. */
  185. if (!blk->vmsh) {
  186. blk->vmsh = qemu_add_vm_change_state_handler(blk_vm_state_changed,
  187. blk);
  188. }
  189. return;
  190. }
  191. blk_set_perm_locked(blk, blk->perm, blk->shared_perm, &local_err);
  192. if (local_err) {
  193. error_propagate(errp, local_err);
  194. blk->disable_perm = true;
  195. return;
  196. }
  197. }
  198. void blk_set_force_allow_inactivate(BlockBackend *blk)
  199. {
  200. GLOBAL_STATE_CODE();
  201. blk->force_allow_inactivate = true;
  202. }
  203. static bool blk_can_inactivate(BlockBackend *blk)
  204. {
  205. /* If it is a guest device, inactivate is ok. */
  206. if (blk->dev || blk_name(blk)[0]) {
  207. return true;
  208. }
  209. /* Inactivating means no more writes to the image can be done,
  210. * even if those writes would be changes invisible to the
  211. * guest. For block job BBs that satisfy this, we can just allow
  212. * it. This is the case for mirror job source, which is required
  213. * by libvirt non-shared block migration. */
  214. if (!(blk->perm & ~BLK_PERM_CONSISTENT_READ)) {
  215. return true;
  216. }
  217. return blk->force_allow_inactivate;
  218. }
  219. static int GRAPH_RDLOCK blk_root_inactivate(BdrvChild *child)
  220. {
  221. BlockBackend *blk = child->opaque;
  222. if (blk->disable_perm) {
  223. return 0;
  224. }
  225. if (!blk_can_inactivate(blk)) {
  226. return -EPERM;
  227. }
  228. blk->disable_perm = true;
  229. if (blk->root) {
  230. bdrv_child_try_set_perm(blk->root, 0, BLK_PERM_ALL, &error_abort);
  231. }
  232. return 0;
  233. }
  234. static void blk_root_attach(BdrvChild *child)
  235. {
  236. BlockBackend *blk = child->opaque;
  237. BlockBackendAioNotifier *notifier;
  238. trace_blk_root_attach(child, blk, child->bs);
  239. QLIST_FOREACH(notifier, &blk->aio_notifiers, list) {
  240. bdrv_add_aio_context_notifier(child->bs,
  241. notifier->attached_aio_context,
  242. notifier->detach_aio_context,
  243. notifier->opaque);
  244. }
  245. }
  246. static void blk_root_detach(BdrvChild *child)
  247. {
  248. BlockBackend *blk = child->opaque;
  249. BlockBackendAioNotifier *notifier;
  250. trace_blk_root_detach(child, blk, child->bs);
  251. QLIST_FOREACH(notifier, &blk->aio_notifiers, list) {
  252. bdrv_remove_aio_context_notifier(child->bs,
  253. notifier->attached_aio_context,
  254. notifier->detach_aio_context,
  255. notifier->opaque);
  256. }
  257. }
  258. static AioContext *blk_root_get_parent_aio_context(BdrvChild *c)
  259. {
  260. BlockBackend *blk = c->opaque;
  261. IO_CODE();
  262. return blk_get_aio_context(blk);
  263. }
  264. static const BdrvChildClass child_root = {
  265. .inherit_options = blk_root_inherit_options,
  266. .change_media = blk_root_change_media,
  267. .resize = blk_root_resize,
  268. .get_name = blk_root_get_name,
  269. .get_parent_desc = blk_root_get_parent_desc,
  270. .drained_begin = blk_root_drained_begin,
  271. .drained_poll = blk_root_drained_poll,
  272. .drained_end = blk_root_drained_end,
  273. .activate = blk_root_activate,
  274. .inactivate = blk_root_inactivate,
  275. .attach = blk_root_attach,
  276. .detach = blk_root_detach,
  277. .change_aio_ctx = blk_root_change_aio_ctx,
  278. .get_parent_aio_context = blk_root_get_parent_aio_context,
  279. };
  280. /*
  281. * Create a new BlockBackend with a reference count of one.
  282. *
  283. * @perm is a bitmasks of BLK_PERM_* constants which describes the permissions
  284. * to request for a block driver node that is attached to this BlockBackend.
  285. * @shared_perm is a bitmask which describes which permissions may be granted
  286. * to other users of the attached node.
  287. * Both sets of permissions can be changed later using blk_set_perm().
  288. *
  289. * Return the new BlockBackend on success, null on failure.
  290. */
  291. BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm)
  292. {
  293. BlockBackend *blk;
  294. GLOBAL_STATE_CODE();
  295. blk = g_new0(BlockBackend, 1);
  296. blk->refcnt = 1;
  297. blk->ctx = ctx;
  298. blk->perm = perm;
  299. blk->shared_perm = shared_perm;
  300. blk_set_enable_write_cache(blk, true);
  301. blk->on_read_error = BLOCKDEV_ON_ERROR_REPORT;
  302. blk->on_write_error = BLOCKDEV_ON_ERROR_ENOSPC;
  303. block_acct_init(&blk->stats);
  304. qemu_mutex_init(&blk->queued_requests_lock);
  305. qemu_co_queue_init(&blk->queued_requests);
  306. notifier_list_init(&blk->remove_bs_notifiers);
  307. notifier_list_init(&blk->insert_bs_notifiers);
  308. QLIST_INIT(&blk->aio_notifiers);
  309. QTAILQ_INSERT_TAIL(&block_backends, blk, link);
  310. return blk;
  311. }
  312. /*
  313. * Create a new BlockBackend connected to an existing BlockDriverState.
  314. *
  315. * @perm is a bitmasks of BLK_PERM_* constants which describes the
  316. * permissions to request for @bs that is attached to this
  317. * BlockBackend. @shared_perm is a bitmask which describes which
  318. * permissions may be granted to other users of the attached node.
  319. * Both sets of permissions can be changed later using blk_set_perm().
  320. *
  321. * Return the new BlockBackend on success, null on failure.
  322. */
  323. BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm,
  324. uint64_t shared_perm, Error **errp)
  325. {
  326. BlockBackend *blk = blk_new(bdrv_get_aio_context(bs), perm, shared_perm);
  327. GLOBAL_STATE_CODE();
  328. if (blk_insert_bs(blk, bs, errp) < 0) {
  329. blk_unref(blk);
  330. return NULL;
  331. }
  332. return blk;
  333. }
  334. /*
  335. * Creates a new BlockBackend, opens a new BlockDriverState, and connects both.
  336. * By default, the new BlockBackend is in the main AioContext, but if the
  337. * parameters connect it with any existing node in a different AioContext, it
  338. * may end up there instead.
  339. *
  340. * Just as with bdrv_open(), after having called this function the reference to
  341. * @options belongs to the block layer (even on failure).
  342. *
  343. * TODO: Remove @filename and @flags; it should be possible to specify a whole
  344. * BDS tree just by specifying the @options QDict (or @reference,
  345. * alternatively). At the time of adding this function, this is not possible,
  346. * though, so callers of this function have to be able to specify @filename and
  347. * @flags.
  348. */
  349. BlockBackend *blk_new_open(const char *filename, const char *reference,
  350. QDict *options, int flags, Error **errp)
  351. {
  352. BlockBackend *blk;
  353. BlockDriverState *bs;
  354. uint64_t perm = 0;
  355. uint64_t shared = BLK_PERM_ALL;
  356. GLOBAL_STATE_CODE();
  357. /*
  358. * blk_new_open() is mainly used in .bdrv_create implementations and the
  359. * tools where sharing isn't a major concern because the BDS stays private
  360. * and the file is generally not supposed to be used by a second process,
  361. * so we just request permission according to the flags.
  362. *
  363. * The exceptions are xen_disk and blockdev_init(); in these cases, the
  364. * caller of blk_new_open() doesn't make use of the permissions, but they
  365. * shouldn't hurt either. We can still share everything here because the
  366. * guest devices will add their own blockers if they can't share.
  367. */
  368. if ((flags & BDRV_O_NO_IO) == 0) {
  369. perm |= BLK_PERM_CONSISTENT_READ;
  370. if (flags & BDRV_O_RDWR) {
  371. perm |= BLK_PERM_WRITE;
  372. }
  373. }
  374. if (flags & BDRV_O_RESIZE) {
  375. perm |= BLK_PERM_RESIZE;
  376. }
  377. if (flags & BDRV_O_NO_SHARE) {
  378. shared = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED;
  379. }
  380. bs = bdrv_open(filename, reference, options, flags, errp);
  381. if (!bs) {
  382. return NULL;
  383. }
  384. /* bdrv_open() could have moved bs to a different AioContext */
  385. blk = blk_new(bdrv_get_aio_context(bs), perm, shared);
  386. blk->perm = perm;
  387. blk->shared_perm = shared;
  388. blk_insert_bs(blk, bs, errp);
  389. bdrv_unref(bs);
  390. if (!blk->root) {
  391. blk_unref(blk);
  392. return NULL;
  393. }
  394. return blk;
  395. }
  396. static void blk_delete(BlockBackend *blk)
  397. {
  398. assert(!blk->refcnt);
  399. assert(!blk->name);
  400. assert(!blk->dev);
  401. if (blk->public.throttle_group_member.throttle_state) {
  402. blk_io_limits_disable(blk);
  403. }
  404. if (blk->root) {
  405. blk_remove_bs(blk);
  406. }
  407. if (blk->vmsh) {
  408. qemu_del_vm_change_state_handler(blk->vmsh);
  409. blk->vmsh = NULL;
  410. }
  411. assert(QLIST_EMPTY(&blk->remove_bs_notifiers.notifiers));
  412. assert(QLIST_EMPTY(&blk->insert_bs_notifiers.notifiers));
  413. assert(QLIST_EMPTY(&blk->aio_notifiers));
  414. assert(qemu_co_queue_empty(&blk->queued_requests));
  415. qemu_mutex_destroy(&blk->queued_requests_lock);
  416. QTAILQ_REMOVE(&block_backends, blk, link);
  417. drive_info_del(blk->legacy_dinfo);
  418. block_acct_cleanup(&blk->stats);
  419. g_free(blk);
  420. }
  421. static void drive_info_del(DriveInfo *dinfo)
  422. {
  423. if (!dinfo) {
  424. return;
  425. }
  426. qemu_opts_del(dinfo->opts);
  427. g_free(dinfo);
  428. }
  429. int blk_get_refcnt(BlockBackend *blk)
  430. {
  431. GLOBAL_STATE_CODE();
  432. return blk ? blk->refcnt : 0;
  433. }
  434. /*
  435. * Increment @blk's reference count.
  436. * @blk must not be null.
  437. */
  438. void blk_ref(BlockBackend *blk)
  439. {
  440. assert(blk->refcnt > 0);
  441. GLOBAL_STATE_CODE();
  442. blk->refcnt++;
  443. }
  444. /*
  445. * Decrement @blk's reference count.
  446. * If this drops it to zero, destroy @blk.
  447. * For convenience, do nothing if @blk is null.
  448. */
  449. void blk_unref(BlockBackend *blk)
  450. {
  451. GLOBAL_STATE_CODE();
  452. if (blk) {
  453. assert(blk->refcnt > 0);
  454. if (blk->refcnt > 1) {
  455. blk->refcnt--;
  456. } else {
  457. blk_drain(blk);
  458. /* blk_drain() cannot resurrect blk, nobody held a reference */
  459. assert(blk->refcnt == 1);
  460. blk->refcnt = 0;
  461. blk_delete(blk);
  462. }
  463. }
  464. }
  465. /*
  466. * Behaves similarly to blk_next() but iterates over all BlockBackends, even the
  467. * ones which are hidden (i.e. are not referenced by the monitor).
  468. */
  469. BlockBackend *blk_all_next(BlockBackend *blk)
  470. {
  471. GLOBAL_STATE_CODE();
  472. return blk ? QTAILQ_NEXT(blk, link)
  473. : QTAILQ_FIRST(&block_backends);
  474. }
  475. void blk_remove_all_bs(void)
  476. {
  477. BlockBackend *blk = NULL;
  478. GLOBAL_STATE_CODE();
  479. while ((blk = blk_all_next(blk)) != NULL) {
  480. if (blk->root) {
  481. blk_remove_bs(blk);
  482. }
  483. }
  484. }
  485. /*
  486. * Return the monitor-owned BlockBackend after @blk.
  487. * If @blk is null, return the first one.
  488. * Else, return @blk's next sibling, which may be null.
  489. *
  490. * To iterate over all BlockBackends, do
  491. * for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
  492. * ...
  493. * }
  494. */
  495. BlockBackend *blk_next(BlockBackend *blk)
  496. {
  497. GLOBAL_STATE_CODE();
  498. return blk ? QTAILQ_NEXT(blk, monitor_link)
  499. : QTAILQ_FIRST(&monitor_block_backends);
  500. }
  501. /* Iterates over all top-level BlockDriverStates, i.e. BDSs that are owned by
  502. * the monitor or attached to a BlockBackend */
  503. BlockDriverState *bdrv_next(BdrvNextIterator *it)
  504. {
  505. BlockDriverState *bs, *old_bs;
  506. /* Must be called from the main loop */
  507. assert(qemu_get_current_aio_context() == qemu_get_aio_context());
  508. old_bs = it->bs;
  509. /* First, return all root nodes of BlockBackends. In order to avoid
  510. * returning a BDS twice when multiple BBs refer to it, we only return it
  511. * if the BB is the first one in the parent list of the BDS. */
  512. if (it->phase == BDRV_NEXT_BACKEND_ROOTS) {
  513. BlockBackend *old_blk = it->blk;
  514. do {
  515. it->blk = blk_all_next(it->blk);
  516. bs = it->blk ? blk_bs(it->blk) : NULL;
  517. } while (it->blk && (bs == NULL || bdrv_first_blk(bs) != it->blk));
  518. if (it->blk) {
  519. blk_ref(it->blk);
  520. }
  521. blk_unref(old_blk);
  522. if (bs) {
  523. bdrv_ref(bs);
  524. bdrv_unref(old_bs);
  525. it->bs = bs;
  526. return bs;
  527. }
  528. it->phase = BDRV_NEXT_MONITOR_OWNED;
  529. }
  530. /* Then return the monitor-owned BDSes without a BB attached. Ignore all
  531. * BDSes that are attached to a BlockBackend here; they have been handled
  532. * by the above block already */
  533. do {
  534. it->bs = bdrv_next_monitor_owned(it->bs);
  535. bs = it->bs;
  536. } while (bs && bdrv_has_blk(bs));
  537. if (bs) {
  538. bdrv_ref(bs);
  539. }
  540. bdrv_unref(old_bs);
  541. return bs;
  542. }
  543. static void bdrv_next_reset(BdrvNextIterator *it)
  544. {
  545. *it = (BdrvNextIterator) {
  546. .phase = BDRV_NEXT_BACKEND_ROOTS,
  547. };
  548. }
  549. BlockDriverState *bdrv_first(BdrvNextIterator *it)
  550. {
  551. GLOBAL_STATE_CODE();
  552. bdrv_next_reset(it);
  553. return bdrv_next(it);
  554. }
  555. /* Must be called when aborting a bdrv_next() iteration before
  556. * bdrv_next() returns NULL */
  557. void bdrv_next_cleanup(BdrvNextIterator *it)
  558. {
  559. /* Must be called from the main loop */
  560. assert(qemu_get_current_aio_context() == qemu_get_aio_context());
  561. bdrv_unref(it->bs);
  562. if (it->phase == BDRV_NEXT_BACKEND_ROOTS && it->blk) {
  563. blk_unref(it->blk);
  564. }
  565. bdrv_next_reset(it);
  566. }
  567. /*
  568. * Add a BlockBackend into the list of backends referenced by the monitor, with
  569. * the given @name acting as the handle for the monitor.
  570. * Strictly for use by blockdev.c.
  571. *
  572. * @name must not be null or empty.
  573. *
  574. * Returns true on success and false on failure. In the latter case, an Error
  575. * object is returned through @errp.
  576. */
  577. bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp)
  578. {
  579. assert(!blk->name);
  580. assert(name && name[0]);
  581. GLOBAL_STATE_CODE();
  582. if (!id_wellformed(name)) {
  583. error_setg(errp, "Invalid device name");
  584. return false;
  585. }
  586. if (blk_by_name(name)) {
  587. error_setg(errp, "Device with id '%s' already exists", name);
  588. return false;
  589. }
  590. if (bdrv_find_node(name)) {
  591. error_setg(errp,
  592. "Device name '%s' conflicts with an existing node name",
  593. name);
  594. return false;
  595. }
  596. blk->name = g_strdup(name);
  597. QTAILQ_INSERT_TAIL(&monitor_block_backends, blk, monitor_link);
  598. return true;
  599. }
  600. /*
  601. * Remove a BlockBackend from the list of backends referenced by the monitor.
  602. * Strictly for use by blockdev.c.
  603. */
  604. void monitor_remove_blk(BlockBackend *blk)
  605. {
  606. GLOBAL_STATE_CODE();
  607. if (!blk->name) {
  608. return;
  609. }
  610. QTAILQ_REMOVE(&monitor_block_backends, blk, monitor_link);
  611. g_free(blk->name);
  612. blk->name = NULL;
  613. }
  614. /*
  615. * Return @blk's name, a non-null string.
  616. * Returns an empty string iff @blk is not referenced by the monitor.
  617. */
  618. const char *blk_name(const BlockBackend *blk)
  619. {
  620. IO_CODE();
  621. return blk->name ?: "";
  622. }
  623. /*
  624. * Return the BlockBackend with name @name if it exists, else null.
  625. * @name must not be null.
  626. */
  627. BlockBackend *blk_by_name(const char *name)
  628. {
  629. BlockBackend *blk = NULL;
  630. GLOBAL_STATE_CODE();
  631. assert(name);
  632. while ((blk = blk_next(blk)) != NULL) {
  633. if (!strcmp(name, blk->name)) {
  634. return blk;
  635. }
  636. }
  637. return NULL;
  638. }
  639. /*
  640. * Return the BlockDriverState attached to @blk if any, else null.
  641. */
  642. BlockDriverState *blk_bs(BlockBackend *blk)
  643. {
  644. IO_CODE();
  645. return blk->root ? blk->root->bs : NULL;
  646. }
  647. static BlockBackend * GRAPH_RDLOCK bdrv_first_blk(BlockDriverState *bs)
  648. {
  649. BdrvChild *child;
  650. GLOBAL_STATE_CODE();
  651. assert_bdrv_graph_readable();
  652. QLIST_FOREACH(child, &bs->parents, next_parent) {
  653. if (child->klass == &child_root) {
  654. return child->opaque;
  655. }
  656. }
  657. return NULL;
  658. }
  659. /*
  660. * Returns true if @bs has an associated BlockBackend.
  661. */
  662. bool bdrv_has_blk(BlockDriverState *bs)
  663. {
  664. GLOBAL_STATE_CODE();
  665. return bdrv_first_blk(bs) != NULL;
  666. }
  667. /*
  668. * Returns true if @bs has only BlockBackends as parents.
  669. */
  670. bool bdrv_is_root_node(BlockDriverState *bs)
  671. {
  672. BdrvChild *c;
  673. GLOBAL_STATE_CODE();
  674. assert_bdrv_graph_readable();
  675. QLIST_FOREACH(c, &bs->parents, next_parent) {
  676. if (c->klass != &child_root) {
  677. return false;
  678. }
  679. }
  680. return true;
  681. }
  682. /*
  683. * Return @blk's DriveInfo if any, else null.
  684. */
  685. DriveInfo *blk_legacy_dinfo(BlockBackend *blk)
  686. {
  687. GLOBAL_STATE_CODE();
  688. return blk->legacy_dinfo;
  689. }
  690. /*
  691. * Set @blk's DriveInfo to @dinfo, and return it.
  692. * @blk must not have a DriveInfo set already.
  693. * No other BlockBackend may have the same DriveInfo set.
  694. */
  695. DriveInfo *blk_set_legacy_dinfo(BlockBackend *blk, DriveInfo *dinfo)
  696. {
  697. assert(!blk->legacy_dinfo);
  698. GLOBAL_STATE_CODE();
  699. return blk->legacy_dinfo = dinfo;
  700. }
  701. /*
  702. * Return the BlockBackend with DriveInfo @dinfo.
  703. * It must exist.
  704. */
  705. BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo)
  706. {
  707. BlockBackend *blk = NULL;
  708. GLOBAL_STATE_CODE();
  709. while ((blk = blk_next(blk)) != NULL) {
  710. if (blk->legacy_dinfo == dinfo) {
  711. return blk;
  712. }
  713. }
  714. abort();
  715. }
  716. /*
  717. * Returns a pointer to the publicly accessible fields of @blk.
  718. */
  719. BlockBackendPublic *blk_get_public(BlockBackend *blk)
  720. {
  721. GLOBAL_STATE_CODE();
  722. return &blk->public;
  723. }
  724. /*
  725. * Disassociates the currently associated BlockDriverState from @blk.
  726. */
  727. void blk_remove_bs(BlockBackend *blk)
  728. {
  729. ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
  730. BdrvChild *root;
  731. GLOBAL_STATE_CODE();
  732. notifier_list_notify(&blk->remove_bs_notifiers, blk);
  733. if (tgm->throttle_state) {
  734. BlockDriverState *bs = blk_bs(blk);
  735. /*
  736. * Take a ref in case blk_bs() changes across bdrv_drained_begin(), for
  737. * example, if a temporary filter node is removed by a blockjob.
  738. */
  739. bdrv_ref(bs);
  740. bdrv_drained_begin(bs);
  741. throttle_group_detach_aio_context(tgm);
  742. throttle_group_attach_aio_context(tgm, qemu_get_aio_context());
  743. bdrv_drained_end(bs);
  744. bdrv_unref(bs);
  745. }
  746. blk_update_root_state(blk);
  747. /* bdrv_root_unref_child() will cause blk->root to become stale and may
  748. * switch to a completion coroutine later on. Let's drain all I/O here
  749. * to avoid that and a potential QEMU crash.
  750. */
  751. blk_drain(blk);
  752. root = blk->root;
  753. blk->root = NULL;
  754. bdrv_graph_wrlock();
  755. bdrv_root_unref_child(root);
  756. bdrv_graph_wrunlock();
  757. }
  758. /*
  759. * Associates a new BlockDriverState with @blk.
  760. */
  761. int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
  762. {
  763. ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
  764. uint64_t perm, shared_perm;
  765. GLOBAL_STATE_CODE();
  766. bdrv_ref(bs);
  767. bdrv_graph_wrlock();
  768. if ((bs->open_flags & BDRV_O_INACTIVE) && blk_can_inactivate(blk)) {
  769. blk->disable_perm = true;
  770. perm = 0;
  771. shared_perm = BLK_PERM_ALL;
  772. } else {
  773. perm = blk->perm;
  774. shared_perm = blk->shared_perm;
  775. }
  776. blk->root = bdrv_root_attach_child(bs, "root", &child_root,
  777. BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
  778. perm, shared_perm, blk, errp);
  779. bdrv_graph_wrunlock();
  780. if (blk->root == NULL) {
  781. return -EPERM;
  782. }
  783. notifier_list_notify(&blk->insert_bs_notifiers, blk);
  784. if (tgm->throttle_state) {
  785. throttle_group_detach_aio_context(tgm);
  786. throttle_group_attach_aio_context(tgm, bdrv_get_aio_context(bs));
  787. }
  788. return 0;
  789. }
  790. /*
  791. * Change BlockDriverState associated with @blk.
  792. */
  793. int blk_replace_bs(BlockBackend *blk, BlockDriverState *new_bs, Error **errp)
  794. {
  795. GLOBAL_STATE_CODE();
  796. return bdrv_replace_child_bs(blk->root, new_bs, errp);
  797. }
  798. /*
  799. * Sets the permission bitmasks that the user of the BlockBackend needs.
  800. */
  801. static int coroutine_mixed_fn GRAPH_RDLOCK
  802. blk_set_perm_locked(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
  803. Error **errp)
  804. {
  805. int ret;
  806. GLOBAL_STATE_CODE();
  807. if (blk->root && !blk->disable_perm) {
  808. ret = bdrv_child_try_set_perm(blk->root, perm, shared_perm, errp);
  809. if (ret < 0) {
  810. return ret;
  811. }
  812. }
  813. blk->perm = perm;
  814. blk->shared_perm = shared_perm;
  815. return 0;
  816. }
  817. int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
  818. Error **errp)
  819. {
  820. GLOBAL_STATE_CODE();
  821. GRAPH_RDLOCK_GUARD_MAINLOOP();
  822. return blk_set_perm_locked(blk, perm, shared_perm, errp);
  823. }
  824. void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm)
  825. {
  826. GLOBAL_STATE_CODE();
  827. *perm = blk->perm;
  828. *shared_perm = blk->shared_perm;
  829. }
  830. /*
  831. * Attach device model @dev to @blk.
  832. * Return 0 on success, -EBUSY when a device model is attached already.
  833. */
  834. int blk_attach_dev(BlockBackend *blk, DeviceState *dev)
  835. {
  836. GLOBAL_STATE_CODE();
  837. if (blk->dev) {
  838. return -EBUSY;
  839. }
  840. /* While migration is still incoming, we don't need to apply the
  841. * permissions of guest device BlockBackends. We might still have a block
  842. * job or NBD server writing to the image for storage migration. */
  843. if (runstate_check(RUN_STATE_INMIGRATE)) {
  844. blk->disable_perm = true;
  845. }
  846. blk_ref(blk);
  847. blk->dev = dev;
  848. blk_iostatus_reset(blk);
  849. return 0;
  850. }
  851. /*
  852. * Detach device model @dev from @blk.
  853. * @dev must be currently attached to @blk.
  854. */
  855. void blk_detach_dev(BlockBackend *blk, DeviceState *dev)
  856. {
  857. assert(blk->dev == dev);
  858. GLOBAL_STATE_CODE();
  859. blk->dev = NULL;
  860. blk->dev_ops = NULL;
  861. blk->dev_opaque = NULL;
  862. blk_set_perm(blk, 0, BLK_PERM_ALL, &error_abort);
  863. blk_unref(blk);
  864. }
  865. /*
  866. * Return the device model attached to @blk if any, else null.
  867. */
  868. DeviceState *blk_get_attached_dev(BlockBackend *blk)
  869. {
  870. GLOBAL_STATE_CODE();
  871. return blk->dev;
  872. }
  873. /*
  874. * The caller is responsible for releasing the value returned
  875. * with g_free() after use.
  876. */
  877. static char *blk_get_attached_dev_id_or_path(BlockBackend *blk, bool want_id)
  878. {
  879. DeviceState *dev = blk->dev;
  880. IO_CODE();
  881. if (!dev) {
  882. return g_strdup("");
  883. } else if (want_id && dev->id) {
  884. return g_strdup(dev->id);
  885. }
  886. return object_get_canonical_path(OBJECT(dev)) ?: g_strdup("");
  887. }
  888. char *blk_get_attached_dev_id(BlockBackend *blk)
  889. {
  890. return blk_get_attached_dev_id_or_path(blk, true);
  891. }
  892. /*
  893. * The caller is responsible for releasing the value returned
  894. * with g_free() after use.
  895. */
  896. static char *blk_get_attached_dev_path(BlockBackend *blk)
  897. {
  898. return blk_get_attached_dev_id_or_path(blk, false);
  899. }
  900. /*
  901. * Return the BlockBackend which has the device model @dev attached if it
  902. * exists, else null.
  903. *
  904. * @dev must not be null.
  905. */
  906. BlockBackend *blk_by_dev(void *dev)
  907. {
  908. BlockBackend *blk = NULL;
  909. GLOBAL_STATE_CODE();
  910. assert(dev != NULL);
  911. while ((blk = blk_all_next(blk)) != NULL) {
  912. if (blk->dev == dev) {
  913. return blk;
  914. }
  915. }
  916. return NULL;
  917. }
  918. /*
  919. * Set @blk's device model callbacks to @ops.
  920. * @opaque is the opaque argument to pass to the callbacks.
  921. * This is for use by device models.
  922. */
  923. void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops,
  924. void *opaque)
  925. {
  926. GLOBAL_STATE_CODE();
  927. blk->dev_ops = ops;
  928. blk->dev_opaque = opaque;
  929. /* Are we currently quiesced? Should we enforce this right now? */
  930. if (qatomic_read(&blk->quiesce_counter) && ops && ops->drained_begin) {
  931. ops->drained_begin(opaque);
  932. }
  933. }
  934. /*
  935. * Notify @blk's attached device model of media change.
  936. *
  937. * If @load is true, notify of media load. This action can fail, meaning that
  938. * the medium cannot be loaded. @errp is set then.
  939. *
  940. * If @load is false, notify of media eject. This can never fail.
  941. *
  942. * Also send DEVICE_TRAY_MOVED events as appropriate.
  943. */
  944. void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp)
  945. {
  946. GLOBAL_STATE_CODE();
  947. if (blk->dev_ops && blk->dev_ops->change_media_cb) {
  948. bool tray_was_open, tray_is_open;
  949. Error *local_err = NULL;
  950. tray_was_open = blk_dev_is_tray_open(blk);
  951. blk->dev_ops->change_media_cb(blk->dev_opaque, load, &local_err);
  952. if (local_err) {
  953. assert(load == true);
  954. error_propagate(errp, local_err);
  955. return;
  956. }
  957. tray_is_open = blk_dev_is_tray_open(blk);
  958. if (tray_was_open != tray_is_open) {
  959. char *id = blk_get_attached_dev_id(blk);
  960. qapi_event_send_device_tray_moved(blk_name(blk), id, tray_is_open);
  961. g_free(id);
  962. }
  963. }
  964. }
  965. static void blk_root_change_media(BdrvChild *child, bool load)
  966. {
  967. blk_dev_change_media_cb(child->opaque, load, NULL);
  968. }
  969. /*
  970. * Does @blk's attached device model have removable media?
  971. * %true if no device model is attached.
  972. */
  973. bool blk_dev_has_removable_media(BlockBackend *blk)
  974. {
  975. GLOBAL_STATE_CODE();
  976. return !blk->dev || (blk->dev_ops && blk->dev_ops->change_media_cb);
  977. }
  978. /*
  979. * Does @blk's attached device model have a tray?
  980. */
  981. bool blk_dev_has_tray(BlockBackend *blk)
  982. {
  983. IO_CODE();
  984. return blk->dev_ops && blk->dev_ops->is_tray_open;
  985. }
  986. /*
  987. * Notify @blk's attached device model of a media eject request.
  988. * If @force is true, the medium is about to be yanked out forcefully.
  989. */
  990. void blk_dev_eject_request(BlockBackend *blk, bool force)
  991. {
  992. GLOBAL_STATE_CODE();
  993. if (blk->dev_ops && blk->dev_ops->eject_request_cb) {
  994. blk->dev_ops->eject_request_cb(blk->dev_opaque, force);
  995. }
  996. }
  997. /*
  998. * Does @blk's attached device model have a tray, and is it open?
  999. */
  1000. bool blk_dev_is_tray_open(BlockBackend *blk)
  1001. {
  1002. IO_CODE();
  1003. if (blk_dev_has_tray(blk)) {
  1004. return blk->dev_ops->is_tray_open(blk->dev_opaque);
  1005. }
  1006. return false;
  1007. }
  1008. /*
  1009. * Does @blk's attached device model have the medium locked?
  1010. * %false if the device model has no such lock.
  1011. */
  1012. bool blk_dev_is_medium_locked(BlockBackend *blk)
  1013. {
  1014. GLOBAL_STATE_CODE();
  1015. if (blk->dev_ops && blk->dev_ops->is_medium_locked) {
  1016. return blk->dev_ops->is_medium_locked(blk->dev_opaque);
  1017. }
  1018. return false;
  1019. }
  1020. /*
  1021. * Notify @blk's attached device model of a backend size change.
  1022. */
  1023. static void blk_root_resize(BdrvChild *child)
  1024. {
  1025. BlockBackend *blk = child->opaque;
  1026. if (blk->dev_ops && blk->dev_ops->resize_cb) {
  1027. blk->dev_ops->resize_cb(blk->dev_opaque);
  1028. }
  1029. }
  1030. void blk_iostatus_enable(BlockBackend *blk)
  1031. {
  1032. GLOBAL_STATE_CODE();
  1033. blk->iostatus_enabled = true;
  1034. blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
  1035. }
  1036. /* The I/O status is only enabled if the drive explicitly
  1037. * enables it _and_ the VM is configured to stop on errors */
  1038. bool blk_iostatus_is_enabled(const BlockBackend *blk)
  1039. {
  1040. IO_CODE();
  1041. return (blk->iostatus_enabled &&
  1042. (blk->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
  1043. blk->on_write_error == BLOCKDEV_ON_ERROR_STOP ||
  1044. blk->on_read_error == BLOCKDEV_ON_ERROR_STOP));
  1045. }
  1046. BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk)
  1047. {
  1048. GLOBAL_STATE_CODE();
  1049. return blk->iostatus;
  1050. }
  1051. void blk_iostatus_reset(BlockBackend *blk)
  1052. {
  1053. GLOBAL_STATE_CODE();
  1054. if (blk_iostatus_is_enabled(blk)) {
  1055. blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
  1056. }
  1057. }
  1058. void blk_iostatus_set_err(BlockBackend *blk, int error)
  1059. {
  1060. IO_CODE();
  1061. assert(blk_iostatus_is_enabled(blk));
  1062. if (blk->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
  1063. blk->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
  1064. BLOCK_DEVICE_IO_STATUS_FAILED;
  1065. }
  1066. }
  1067. void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow)
  1068. {
  1069. IO_CODE();
  1070. blk->allow_write_beyond_eof = allow;
  1071. }
  1072. void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow)
  1073. {
  1074. IO_CODE();
  1075. blk->allow_aio_context_change = allow;
  1076. }
  1077. void blk_set_disable_request_queuing(BlockBackend *blk, bool disable)
  1078. {
  1079. IO_CODE();
  1080. qatomic_set(&blk->disable_request_queuing, disable);
  1081. }
  1082. static int coroutine_fn GRAPH_RDLOCK
  1083. blk_check_byte_request(BlockBackend *blk, int64_t offset, int64_t bytes)
  1084. {
  1085. int64_t len;
  1086. if (bytes < 0) {
  1087. return -EIO;
  1088. }
  1089. if (!blk_co_is_available(blk)) {
  1090. return -ENOMEDIUM;
  1091. }
  1092. if (offset < 0) {
  1093. return -EIO;
  1094. }
  1095. if (!blk->allow_write_beyond_eof) {
  1096. len = bdrv_co_getlength(blk_bs(blk));
  1097. if (len < 0) {
  1098. return len;
  1099. }
  1100. if (offset > len || len - offset < bytes) {
  1101. return -EIO;
  1102. }
  1103. }
  1104. return 0;
  1105. }
  1106. /* Are we currently in a drained section? */
  1107. bool blk_in_drain(BlockBackend *blk)
  1108. {
  1109. GLOBAL_STATE_CODE(); /* change to IO_OR_GS_CODE(), if necessary */
  1110. return qatomic_read(&blk->quiesce_counter);
  1111. }
  1112. /* To be called between exactly one pair of blk_inc/dec_in_flight() */
  1113. static void coroutine_fn blk_wait_while_drained(BlockBackend *blk)
  1114. {
  1115. assert(blk->in_flight > 0);
  1116. if (qatomic_read(&blk->quiesce_counter) &&
  1117. !qatomic_read(&blk->disable_request_queuing)) {
  1118. /*
  1119. * Take lock before decrementing in flight counter so main loop thread
  1120. * waits for us to enqueue ourselves before it can leave the drained
  1121. * section.
  1122. */
  1123. qemu_mutex_lock(&blk->queued_requests_lock);
  1124. blk_dec_in_flight(blk);
  1125. qemu_co_queue_wait(&blk->queued_requests, &blk->queued_requests_lock);
  1126. blk_inc_in_flight(blk);
  1127. qemu_mutex_unlock(&blk->queued_requests_lock);
  1128. }
  1129. }
  1130. /* To be called between exactly one pair of blk_inc/dec_in_flight() */
  1131. static int coroutine_fn
  1132. blk_co_do_preadv_part(BlockBackend *blk, int64_t offset, int64_t bytes,
  1133. QEMUIOVector *qiov, size_t qiov_offset,
  1134. BdrvRequestFlags flags)
  1135. {
  1136. int ret;
  1137. BlockDriverState *bs;
  1138. IO_CODE();
  1139. blk_wait_while_drained(blk);
  1140. GRAPH_RDLOCK_GUARD();
  1141. /* Call blk_bs() only after waiting, the graph may have changed */
  1142. bs = blk_bs(blk);
  1143. trace_blk_co_preadv(blk, bs, offset, bytes, flags);
  1144. ret = blk_check_byte_request(blk, offset, bytes);
  1145. if (ret < 0) {
  1146. return ret;
  1147. }
  1148. bdrv_inc_in_flight(bs);
  1149. /* throttling disk I/O */
  1150. if (blk->public.throttle_group_member.throttle_state) {
  1151. throttle_group_co_io_limits_intercept(&blk->public.throttle_group_member,
  1152. bytes, THROTTLE_READ);
  1153. }
  1154. ret = bdrv_co_preadv_part(blk->root, offset, bytes, qiov, qiov_offset,
  1155. flags);
  1156. bdrv_dec_in_flight(bs);
  1157. return ret;
  1158. }
  1159. int coroutine_fn blk_co_pread(BlockBackend *blk, int64_t offset, int64_t bytes,
  1160. void *buf, BdrvRequestFlags flags)
  1161. {
  1162. QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
  1163. IO_OR_GS_CODE();
  1164. assert(bytes <= SIZE_MAX);
  1165. return blk_co_preadv(blk, offset, bytes, &qiov, flags);
  1166. }
  1167. int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
  1168. int64_t bytes, QEMUIOVector *qiov,
  1169. BdrvRequestFlags flags)
  1170. {
  1171. int ret;
  1172. IO_OR_GS_CODE();
  1173. blk_inc_in_flight(blk);
  1174. ret = blk_co_do_preadv_part(blk, offset, bytes, qiov, 0, flags);
  1175. blk_dec_in_flight(blk);
  1176. return ret;
  1177. }
  1178. int coroutine_fn blk_co_preadv_part(BlockBackend *blk, int64_t offset,
  1179. int64_t bytes, QEMUIOVector *qiov,
  1180. size_t qiov_offset, BdrvRequestFlags flags)
  1181. {
  1182. int ret;
  1183. IO_OR_GS_CODE();
  1184. blk_inc_in_flight(blk);
  1185. ret = blk_co_do_preadv_part(blk, offset, bytes, qiov, qiov_offset, flags);
  1186. blk_dec_in_flight(blk);
  1187. return ret;
  1188. }
  1189. /* To be called between exactly one pair of blk_inc/dec_in_flight() */
  1190. static int coroutine_fn
  1191. blk_co_do_pwritev_part(BlockBackend *blk, int64_t offset, int64_t bytes,
  1192. QEMUIOVector *qiov, size_t qiov_offset,
  1193. BdrvRequestFlags flags)
  1194. {
  1195. int ret;
  1196. BlockDriverState *bs;
  1197. IO_CODE();
  1198. blk_wait_while_drained(blk);
  1199. GRAPH_RDLOCK_GUARD();
  1200. /* Call blk_bs() only after waiting, the graph may have changed */
  1201. bs = blk_bs(blk);
  1202. trace_blk_co_pwritev(blk, bs, offset, bytes, flags);
  1203. ret = blk_check_byte_request(blk, offset, bytes);
  1204. if (ret < 0) {
  1205. return ret;
  1206. }
  1207. bdrv_inc_in_flight(bs);
  1208. /* throttling disk I/O */
  1209. if (blk->public.throttle_group_member.throttle_state) {
  1210. throttle_group_co_io_limits_intercept(&blk->public.throttle_group_member,
  1211. bytes, THROTTLE_WRITE);
  1212. }
  1213. if (!blk->enable_write_cache) {
  1214. flags |= BDRV_REQ_FUA;
  1215. }
  1216. ret = bdrv_co_pwritev_part(blk->root, offset, bytes, qiov, qiov_offset,
  1217. flags);
  1218. bdrv_dec_in_flight(bs);
  1219. return ret;
  1220. }
  1221. int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset,
  1222. int64_t bytes,
  1223. QEMUIOVector *qiov, size_t qiov_offset,
  1224. BdrvRequestFlags flags)
  1225. {
  1226. int ret;
  1227. IO_OR_GS_CODE();
  1228. blk_inc_in_flight(blk);
  1229. ret = blk_co_do_pwritev_part(blk, offset, bytes, qiov, qiov_offset, flags);
  1230. blk_dec_in_flight(blk);
  1231. return ret;
  1232. }
  1233. int coroutine_fn blk_co_pwrite(BlockBackend *blk, int64_t offset, int64_t bytes,
  1234. const void *buf, BdrvRequestFlags flags)
  1235. {
  1236. QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
  1237. IO_OR_GS_CODE();
  1238. assert(bytes <= SIZE_MAX);
  1239. return blk_co_pwritev(blk, offset, bytes, &qiov, flags);
  1240. }
  1241. int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
  1242. int64_t bytes, QEMUIOVector *qiov,
  1243. BdrvRequestFlags flags)
  1244. {
  1245. IO_OR_GS_CODE();
  1246. return blk_co_pwritev_part(blk, offset, bytes, qiov, 0, flags);
  1247. }
  1248. int coroutine_fn blk_co_block_status_above(BlockBackend *blk,
  1249. BlockDriverState *base,
  1250. int64_t offset, int64_t bytes,
  1251. int64_t *pnum, int64_t *map,
  1252. BlockDriverState **file)
  1253. {
  1254. IO_CODE();
  1255. GRAPH_RDLOCK_GUARD();
  1256. return bdrv_co_block_status_above(blk_bs(blk), base, offset, bytes, pnum,
  1257. map, file);
  1258. }
  1259. int coroutine_fn blk_co_is_allocated_above(BlockBackend *blk,
  1260. BlockDriverState *base,
  1261. bool include_base, int64_t offset,
  1262. int64_t bytes, int64_t *pnum)
  1263. {
  1264. IO_CODE();
  1265. GRAPH_RDLOCK_GUARD();
  1266. return bdrv_co_is_allocated_above(blk_bs(blk), base, include_base, offset,
  1267. bytes, pnum);
  1268. }
  1269. typedef struct BlkRwCo {
  1270. BlockBackend *blk;
  1271. int64_t offset;
  1272. void *iobuf;
  1273. int ret;
  1274. BdrvRequestFlags flags;
  1275. } BlkRwCo;
  1276. int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags)
  1277. {
  1278. GLOBAL_STATE_CODE();
  1279. return bdrv_make_zero(blk->root, flags);
  1280. }
  1281. void blk_inc_in_flight(BlockBackend *blk)
  1282. {
  1283. IO_CODE();
  1284. qatomic_inc(&blk->in_flight);
  1285. }
  1286. void blk_dec_in_flight(BlockBackend *blk)
  1287. {
  1288. IO_CODE();
  1289. qatomic_dec(&blk->in_flight);
  1290. aio_wait_kick();
  1291. }
  1292. static void error_callback_bh(void *opaque)
  1293. {
  1294. struct BlockBackendAIOCB *acb = opaque;
  1295. blk_dec_in_flight(acb->blk);
  1296. acb->common.cb(acb->common.opaque, acb->ret);
  1297. qemu_aio_unref(acb);
  1298. }
  1299. BlockAIOCB *blk_abort_aio_request(BlockBackend *blk,
  1300. BlockCompletionFunc *cb,
  1301. void *opaque, int ret)
  1302. {
  1303. struct BlockBackendAIOCB *acb;
  1304. IO_CODE();
  1305. blk_inc_in_flight(blk);
  1306. acb = blk_aio_get(&block_backend_aiocb_info, blk, cb, opaque);
  1307. acb->blk = blk;
  1308. acb->ret = ret;
  1309. replay_bh_schedule_oneshot_event(qemu_get_current_aio_context(),
  1310. error_callback_bh, acb);
  1311. return &acb->common;
  1312. }
  1313. typedef struct BlkAioEmAIOCB {
  1314. BlockAIOCB common;
  1315. BlkRwCo rwco;
  1316. int64_t bytes;
  1317. bool has_returned;
  1318. } BlkAioEmAIOCB;
  1319. static const AIOCBInfo blk_aio_em_aiocb_info = {
  1320. .aiocb_size = sizeof(BlkAioEmAIOCB),
  1321. };
  1322. static void blk_aio_complete(BlkAioEmAIOCB *acb)
  1323. {
  1324. if (acb->has_returned) {
  1325. acb->common.cb(acb->common.opaque, acb->rwco.ret);
  1326. blk_dec_in_flight(acb->rwco.blk);
  1327. qemu_aio_unref(acb);
  1328. }
  1329. }
  1330. static void blk_aio_complete_bh(void *opaque)
  1331. {
  1332. BlkAioEmAIOCB *acb = opaque;
  1333. assert(acb->has_returned);
  1334. blk_aio_complete(acb);
  1335. }
  1336. static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset,
  1337. int64_t bytes,
  1338. void *iobuf, CoroutineEntry co_entry,
  1339. BdrvRequestFlags flags,
  1340. BlockCompletionFunc *cb, void *opaque)
  1341. {
  1342. BlkAioEmAIOCB *acb;
  1343. Coroutine *co;
  1344. blk_inc_in_flight(blk);
  1345. acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
  1346. acb->rwco = (BlkRwCo) {
  1347. .blk = blk,
  1348. .offset = offset,
  1349. .iobuf = iobuf,
  1350. .flags = flags,
  1351. .ret = NOT_DONE,
  1352. };
  1353. acb->bytes = bytes;
  1354. acb->has_returned = false;
  1355. co = qemu_coroutine_create(co_entry, acb);
  1356. aio_co_enter(qemu_get_current_aio_context(), co);
  1357. acb->has_returned = true;
  1358. if (acb->rwco.ret != NOT_DONE) {
  1359. replay_bh_schedule_oneshot_event(qemu_get_current_aio_context(),
  1360. blk_aio_complete_bh, acb);
  1361. }
  1362. return &acb->common;
  1363. }
  1364. static void coroutine_fn blk_aio_read_entry(void *opaque)
  1365. {
  1366. BlkAioEmAIOCB *acb = opaque;
  1367. BlkRwCo *rwco = &acb->rwco;
  1368. QEMUIOVector *qiov = rwco->iobuf;
  1369. assert(qiov->size == acb->bytes);
  1370. rwco->ret = blk_co_do_preadv_part(rwco->blk, rwco->offset, acb->bytes, qiov,
  1371. 0, rwco->flags);
  1372. blk_aio_complete(acb);
  1373. }
  1374. static void coroutine_fn blk_aio_write_entry(void *opaque)
  1375. {
  1376. BlkAioEmAIOCB *acb = opaque;
  1377. BlkRwCo *rwco = &acb->rwco;
  1378. QEMUIOVector *qiov = rwco->iobuf;
  1379. assert(!qiov || qiov->size == acb->bytes);
  1380. rwco->ret = blk_co_do_pwritev_part(rwco->blk, rwco->offset, acb->bytes,
  1381. qiov, 0, rwco->flags);
  1382. blk_aio_complete(acb);
  1383. }
  1384. BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset,
  1385. int64_t bytes, BdrvRequestFlags flags,
  1386. BlockCompletionFunc *cb, void *opaque)
  1387. {
  1388. IO_CODE();
  1389. return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_write_entry,
  1390. flags | BDRV_REQ_ZERO_WRITE, cb, opaque);
  1391. }
  1392. int64_t coroutine_fn blk_co_getlength(BlockBackend *blk)
  1393. {
  1394. IO_CODE();
  1395. GRAPH_RDLOCK_GUARD();
  1396. if (!blk_co_is_available(blk)) {
  1397. return -ENOMEDIUM;
  1398. }
  1399. return bdrv_co_getlength(blk_bs(blk));
  1400. }
  1401. int64_t coroutine_fn blk_co_nb_sectors(BlockBackend *blk)
  1402. {
  1403. BlockDriverState *bs = blk_bs(blk);
  1404. IO_CODE();
  1405. GRAPH_RDLOCK_GUARD();
  1406. if (!bs) {
  1407. return -ENOMEDIUM;
  1408. } else {
  1409. return bdrv_co_nb_sectors(bs);
  1410. }
  1411. }
  1412. /*
  1413. * This wrapper is written by hand because this function is in the hot I/O path,
  1414. * via blk_get_geometry.
  1415. */
  1416. int64_t coroutine_mixed_fn blk_nb_sectors(BlockBackend *blk)
  1417. {
  1418. BlockDriverState *bs = blk_bs(blk);
  1419. IO_CODE();
  1420. if (!bs) {
  1421. return -ENOMEDIUM;
  1422. } else {
  1423. return bdrv_nb_sectors(bs);
  1424. }
  1425. }
  1426. /* return 0 as number of sectors if no device present or error */
  1427. void coroutine_fn blk_co_get_geometry(BlockBackend *blk,
  1428. uint64_t *nb_sectors_ptr)
  1429. {
  1430. int64_t ret = blk_co_nb_sectors(blk);
  1431. *nb_sectors_ptr = ret < 0 ? 0 : ret;
  1432. }
  1433. /*
  1434. * This wrapper is written by hand because this function is in the hot I/O path.
  1435. */
  1436. void coroutine_mixed_fn blk_get_geometry(BlockBackend *blk,
  1437. uint64_t *nb_sectors_ptr)
  1438. {
  1439. int64_t ret = blk_nb_sectors(blk);
  1440. *nb_sectors_ptr = ret < 0 ? 0 : ret;
  1441. }
  1442. BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset,
  1443. QEMUIOVector *qiov, BdrvRequestFlags flags,
  1444. BlockCompletionFunc *cb, void *opaque)
  1445. {
  1446. IO_CODE();
  1447. assert((uint64_t)qiov->size <= INT64_MAX);
  1448. return blk_aio_prwv(blk, offset, qiov->size, qiov,
  1449. blk_aio_read_entry, flags, cb, opaque);
  1450. }
  1451. BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset,
  1452. QEMUIOVector *qiov, BdrvRequestFlags flags,
  1453. BlockCompletionFunc *cb, void *opaque)
  1454. {
  1455. IO_CODE();
  1456. assert((uint64_t)qiov->size <= INT64_MAX);
  1457. return blk_aio_prwv(blk, offset, qiov->size, qiov,
  1458. blk_aio_write_entry, flags, cb, opaque);
  1459. }
  1460. void blk_aio_cancel(BlockAIOCB *acb)
  1461. {
  1462. GLOBAL_STATE_CODE();
  1463. bdrv_aio_cancel(acb);
  1464. }
  1465. void blk_aio_cancel_async(BlockAIOCB *acb)
  1466. {
  1467. IO_CODE();
  1468. bdrv_aio_cancel_async(acb);
  1469. }
  1470. /* To be called between exactly one pair of blk_inc/dec_in_flight() */
  1471. static int coroutine_fn
  1472. blk_co_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
  1473. {
  1474. IO_CODE();
  1475. blk_wait_while_drained(blk);
  1476. GRAPH_RDLOCK_GUARD();
  1477. if (!blk_co_is_available(blk)) {
  1478. return -ENOMEDIUM;
  1479. }
  1480. return bdrv_co_ioctl(blk_bs(blk), req, buf);
  1481. }
  1482. int coroutine_fn blk_co_ioctl(BlockBackend *blk, unsigned long int req,
  1483. void *buf)
  1484. {
  1485. int ret;
  1486. IO_OR_GS_CODE();
  1487. blk_inc_in_flight(blk);
  1488. ret = blk_co_do_ioctl(blk, req, buf);
  1489. blk_dec_in_flight(blk);
  1490. return ret;
  1491. }
  1492. static void coroutine_fn blk_aio_ioctl_entry(void *opaque)
  1493. {
  1494. BlkAioEmAIOCB *acb = opaque;
  1495. BlkRwCo *rwco = &acb->rwco;
  1496. rwco->ret = blk_co_do_ioctl(rwco->blk, rwco->offset, rwco->iobuf);
  1497. blk_aio_complete(acb);
  1498. }
  1499. BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
  1500. BlockCompletionFunc *cb, void *opaque)
  1501. {
  1502. IO_CODE();
  1503. return blk_aio_prwv(blk, req, 0, buf, blk_aio_ioctl_entry, 0, cb, opaque);
  1504. }
  1505. /* To be called between exactly one pair of blk_inc/dec_in_flight() */
  1506. static int coroutine_fn
  1507. blk_co_do_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes)
  1508. {
  1509. int ret;
  1510. IO_CODE();
  1511. blk_wait_while_drained(blk);
  1512. GRAPH_RDLOCK_GUARD();
  1513. ret = blk_check_byte_request(blk, offset, bytes);
  1514. if (ret < 0) {
  1515. return ret;
  1516. }
  1517. return bdrv_co_pdiscard(blk->root, offset, bytes);
  1518. }
  1519. static void coroutine_fn blk_aio_pdiscard_entry(void *opaque)
  1520. {
  1521. BlkAioEmAIOCB *acb = opaque;
  1522. BlkRwCo *rwco = &acb->rwco;
  1523. rwco->ret = blk_co_do_pdiscard(rwco->blk, rwco->offset, acb->bytes);
  1524. blk_aio_complete(acb);
  1525. }
  1526. BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk,
  1527. int64_t offset, int64_t bytes,
  1528. BlockCompletionFunc *cb, void *opaque)
  1529. {
  1530. IO_CODE();
  1531. return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_pdiscard_entry, 0,
  1532. cb, opaque);
  1533. }
  1534. int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset,
  1535. int64_t bytes)
  1536. {
  1537. int ret;
  1538. IO_OR_GS_CODE();
  1539. blk_inc_in_flight(blk);
  1540. ret = blk_co_do_pdiscard(blk, offset, bytes);
  1541. blk_dec_in_flight(blk);
  1542. return ret;
  1543. }
  1544. /* To be called between exactly one pair of blk_inc/dec_in_flight() */
  1545. static int coroutine_fn blk_co_do_flush(BlockBackend *blk)
  1546. {
  1547. IO_CODE();
  1548. blk_wait_while_drained(blk);
  1549. GRAPH_RDLOCK_GUARD();
  1550. if (!blk_co_is_available(blk)) {
  1551. return -ENOMEDIUM;
  1552. }
  1553. return bdrv_co_flush(blk_bs(blk));
  1554. }
  1555. static void coroutine_fn blk_aio_flush_entry(void *opaque)
  1556. {
  1557. BlkAioEmAIOCB *acb = opaque;
  1558. BlkRwCo *rwco = &acb->rwco;
  1559. rwco->ret = blk_co_do_flush(rwco->blk);
  1560. blk_aio_complete(acb);
  1561. }
  1562. BlockAIOCB *blk_aio_flush(BlockBackend *blk,
  1563. BlockCompletionFunc *cb, void *opaque)
  1564. {
  1565. IO_CODE();
  1566. return blk_aio_prwv(blk, 0, 0, NULL, blk_aio_flush_entry, 0, cb, opaque);
  1567. }
  1568. int coroutine_fn blk_co_flush(BlockBackend *blk)
  1569. {
  1570. int ret;
  1571. IO_OR_GS_CODE();
  1572. blk_inc_in_flight(blk);
  1573. ret = blk_co_do_flush(blk);
  1574. blk_dec_in_flight(blk);
  1575. return ret;
  1576. }
  1577. static void coroutine_fn blk_aio_zone_report_entry(void *opaque)
  1578. {
  1579. BlkAioEmAIOCB *acb = opaque;
  1580. BlkRwCo *rwco = &acb->rwco;
  1581. rwco->ret = blk_co_zone_report(rwco->blk, rwco->offset,
  1582. (unsigned int*)(uintptr_t)acb->bytes,
  1583. rwco->iobuf);
  1584. blk_aio_complete(acb);
  1585. }
  1586. BlockAIOCB *blk_aio_zone_report(BlockBackend *blk, int64_t offset,
  1587. unsigned int *nr_zones,
  1588. BlockZoneDescriptor *zones,
  1589. BlockCompletionFunc *cb, void *opaque)
  1590. {
  1591. BlkAioEmAIOCB *acb;
  1592. Coroutine *co;
  1593. IO_CODE();
  1594. blk_inc_in_flight(blk);
  1595. acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
  1596. acb->rwco = (BlkRwCo) {
  1597. .blk = blk,
  1598. .offset = offset,
  1599. .iobuf = zones,
  1600. .ret = NOT_DONE,
  1601. };
  1602. acb->bytes = (int64_t)(uintptr_t)nr_zones,
  1603. acb->has_returned = false;
  1604. co = qemu_coroutine_create(blk_aio_zone_report_entry, acb);
  1605. aio_co_enter(qemu_get_current_aio_context(), co);
  1606. acb->has_returned = true;
  1607. if (acb->rwco.ret != NOT_DONE) {
  1608. replay_bh_schedule_oneshot_event(qemu_get_current_aio_context(),
  1609. blk_aio_complete_bh, acb);
  1610. }
  1611. return &acb->common;
  1612. }
  1613. static void coroutine_fn blk_aio_zone_mgmt_entry(void *opaque)
  1614. {
  1615. BlkAioEmAIOCB *acb = opaque;
  1616. BlkRwCo *rwco = &acb->rwco;
  1617. rwco->ret = blk_co_zone_mgmt(rwco->blk,
  1618. (BlockZoneOp)(uintptr_t)rwco->iobuf,
  1619. rwco->offset, acb->bytes);
  1620. blk_aio_complete(acb);
  1621. }
  1622. BlockAIOCB *blk_aio_zone_mgmt(BlockBackend *blk, BlockZoneOp op,
  1623. int64_t offset, int64_t len,
  1624. BlockCompletionFunc *cb, void *opaque) {
  1625. BlkAioEmAIOCB *acb;
  1626. Coroutine *co;
  1627. IO_CODE();
  1628. blk_inc_in_flight(blk);
  1629. acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
  1630. acb->rwco = (BlkRwCo) {
  1631. .blk = blk,
  1632. .offset = offset,
  1633. .iobuf = (void *)(uintptr_t)op,
  1634. .ret = NOT_DONE,
  1635. };
  1636. acb->bytes = len;
  1637. acb->has_returned = false;
  1638. co = qemu_coroutine_create(blk_aio_zone_mgmt_entry, acb);
  1639. aio_co_enter(qemu_get_current_aio_context(), co);
  1640. acb->has_returned = true;
  1641. if (acb->rwco.ret != NOT_DONE) {
  1642. replay_bh_schedule_oneshot_event(qemu_get_current_aio_context(),
  1643. blk_aio_complete_bh, acb);
  1644. }
  1645. return &acb->common;
  1646. }
  1647. static void coroutine_fn blk_aio_zone_append_entry(void *opaque)
  1648. {
  1649. BlkAioEmAIOCB *acb = opaque;
  1650. BlkRwCo *rwco = &acb->rwco;
  1651. rwco->ret = blk_co_zone_append(rwco->blk, (int64_t *)(uintptr_t)acb->bytes,
  1652. rwco->iobuf, rwco->flags);
  1653. blk_aio_complete(acb);
  1654. }
  1655. BlockAIOCB *blk_aio_zone_append(BlockBackend *blk, int64_t *offset,
  1656. QEMUIOVector *qiov, BdrvRequestFlags flags,
  1657. BlockCompletionFunc *cb, void *opaque) {
  1658. BlkAioEmAIOCB *acb;
  1659. Coroutine *co;
  1660. IO_CODE();
  1661. blk_inc_in_flight(blk);
  1662. acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
  1663. acb->rwco = (BlkRwCo) {
  1664. .blk = blk,
  1665. .ret = NOT_DONE,
  1666. .flags = flags,
  1667. .iobuf = qiov,
  1668. };
  1669. acb->bytes = (int64_t)(uintptr_t)offset;
  1670. acb->has_returned = false;
  1671. co = qemu_coroutine_create(blk_aio_zone_append_entry, acb);
  1672. aio_co_enter(qemu_get_current_aio_context(), co);
  1673. acb->has_returned = true;
  1674. if (acb->rwco.ret != NOT_DONE) {
  1675. replay_bh_schedule_oneshot_event(qemu_get_current_aio_context(),
  1676. blk_aio_complete_bh, acb);
  1677. }
  1678. return &acb->common;
  1679. }
  1680. /*
  1681. * Send a zone_report command.
  1682. * offset is a byte offset from the start of the device. No alignment
  1683. * required for offset.
  1684. * nr_zones represents IN maximum and OUT actual.
  1685. */
  1686. int coroutine_fn blk_co_zone_report(BlockBackend *blk, int64_t offset,
  1687. unsigned int *nr_zones,
  1688. BlockZoneDescriptor *zones)
  1689. {
  1690. int ret;
  1691. IO_CODE();
  1692. blk_inc_in_flight(blk); /* increase before waiting */
  1693. blk_wait_while_drained(blk);
  1694. GRAPH_RDLOCK_GUARD();
  1695. if (!blk_is_available(blk)) {
  1696. blk_dec_in_flight(blk);
  1697. return -ENOMEDIUM;
  1698. }
  1699. ret = bdrv_co_zone_report(blk_bs(blk), offset, nr_zones, zones);
  1700. blk_dec_in_flight(blk);
  1701. return ret;
  1702. }
  1703. /*
  1704. * Send a zone_management command.
  1705. * op is the zone operation;
  1706. * offset is the byte offset from the start of the zoned device;
  1707. * len is the maximum number of bytes the command should operate on. It
  1708. * should be aligned with the device zone size.
  1709. */
  1710. int coroutine_fn blk_co_zone_mgmt(BlockBackend *blk, BlockZoneOp op,
  1711. int64_t offset, int64_t len)
  1712. {
  1713. int ret;
  1714. IO_CODE();
  1715. blk_inc_in_flight(blk);
  1716. blk_wait_while_drained(blk);
  1717. GRAPH_RDLOCK_GUARD();
  1718. ret = blk_check_byte_request(blk, offset, len);
  1719. if (ret < 0) {
  1720. blk_dec_in_flight(blk);
  1721. return ret;
  1722. }
  1723. ret = bdrv_co_zone_mgmt(blk_bs(blk), op, offset, len);
  1724. blk_dec_in_flight(blk);
  1725. return ret;
  1726. }
  1727. /*
  1728. * Send a zone_append command.
  1729. */
  1730. int coroutine_fn blk_co_zone_append(BlockBackend *blk, int64_t *offset,
  1731. QEMUIOVector *qiov, BdrvRequestFlags flags)
  1732. {
  1733. int ret;
  1734. IO_CODE();
  1735. blk_inc_in_flight(blk);
  1736. blk_wait_while_drained(blk);
  1737. GRAPH_RDLOCK_GUARD();
  1738. if (!blk_is_available(blk)) {
  1739. blk_dec_in_flight(blk);
  1740. return -ENOMEDIUM;
  1741. }
  1742. ret = bdrv_co_zone_append(blk_bs(blk), offset, qiov, flags);
  1743. blk_dec_in_flight(blk);
  1744. return ret;
  1745. }
  1746. void blk_drain(BlockBackend *blk)
  1747. {
  1748. BlockDriverState *bs = blk_bs(blk);
  1749. GLOBAL_STATE_CODE();
  1750. if (bs) {
  1751. bdrv_ref(bs);
  1752. bdrv_drained_begin(bs);
  1753. }
  1754. /* We may have -ENOMEDIUM completions in flight */
  1755. AIO_WAIT_WHILE(blk_get_aio_context(blk),
  1756. qatomic_read(&blk->in_flight) > 0);
  1757. if (bs) {
  1758. bdrv_drained_end(bs);
  1759. bdrv_unref(bs);
  1760. }
  1761. }
  1762. void blk_drain_all(void)
  1763. {
  1764. BlockBackend *blk = NULL;
  1765. GLOBAL_STATE_CODE();
  1766. bdrv_drain_all_begin();
  1767. while ((blk = blk_all_next(blk)) != NULL) {
  1768. /* We may have -ENOMEDIUM completions in flight */
  1769. AIO_WAIT_WHILE_UNLOCKED(NULL, qatomic_read(&blk->in_flight) > 0);
  1770. }
  1771. bdrv_drain_all_end();
  1772. }
  1773. void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error,
  1774. BlockdevOnError on_write_error)
  1775. {
  1776. GLOBAL_STATE_CODE();
  1777. blk->on_read_error = on_read_error;
  1778. blk->on_write_error = on_write_error;
  1779. }
  1780. BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read)
  1781. {
  1782. IO_CODE();
  1783. return is_read ? blk->on_read_error : blk->on_write_error;
  1784. }
  1785. BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read,
  1786. int error)
  1787. {
  1788. BlockdevOnError on_err = blk_get_on_error(blk, is_read);
  1789. IO_CODE();
  1790. switch (on_err) {
  1791. case BLOCKDEV_ON_ERROR_ENOSPC:
  1792. return (error == ENOSPC) ?
  1793. BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
  1794. case BLOCKDEV_ON_ERROR_STOP:
  1795. return BLOCK_ERROR_ACTION_STOP;
  1796. case BLOCKDEV_ON_ERROR_REPORT:
  1797. return BLOCK_ERROR_ACTION_REPORT;
  1798. case BLOCKDEV_ON_ERROR_IGNORE:
  1799. return BLOCK_ERROR_ACTION_IGNORE;
  1800. case BLOCKDEV_ON_ERROR_AUTO:
  1801. default:
  1802. abort();
  1803. }
  1804. }
  1805. static void send_qmp_error_event(BlockBackend *blk,
  1806. BlockErrorAction action,
  1807. bool is_read, int error)
  1808. {
  1809. IoOperationType optype;
  1810. BlockDriverState *bs = blk_bs(blk);
  1811. g_autofree char *path = blk_get_attached_dev_path(blk);
  1812. optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
  1813. qapi_event_send_block_io_error(path, blk_name(blk),
  1814. bs ? bdrv_get_node_name(bs) : NULL, optype,
  1815. action, blk_iostatus_is_enabled(blk),
  1816. error == ENOSPC, strerror(error));
  1817. }
  1818. /* This is done by device models because, while the block layer knows
  1819. * about the error, it does not know whether an operation comes from
  1820. * the device or the block layer (from a job, for example).
  1821. */
  1822. void blk_error_action(BlockBackend *blk, BlockErrorAction action,
  1823. bool is_read, int error)
  1824. {
  1825. assert(error >= 0);
  1826. IO_CODE();
  1827. if (action == BLOCK_ERROR_ACTION_STOP) {
  1828. /* First set the iostatus, so that "info block" returns an iostatus
  1829. * that matches the events raised so far (an additional error iostatus
  1830. * is fine, but not a lost one).
  1831. */
  1832. blk_iostatus_set_err(blk, error);
  1833. /* Then raise the request to stop the VM and the event.
  1834. * qemu_system_vmstop_request_prepare has two effects. First,
  1835. * it ensures that the STOP event always comes after the
  1836. * BLOCK_IO_ERROR event. Second, it ensures that even if management
  1837. * can observe the STOP event and do a "cont" before the STOP
  1838. * event is issued, the VM will not stop. In this case, vm_start()
  1839. * also ensures that the STOP/RESUME pair of events is emitted.
  1840. */
  1841. qemu_system_vmstop_request_prepare();
  1842. send_qmp_error_event(blk, action, is_read, error);
  1843. qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
  1844. } else {
  1845. send_qmp_error_event(blk, action, is_read, error);
  1846. }
  1847. }
  1848. /*
  1849. * Returns true if the BlockBackend can support taking write permissions
  1850. * (because its root node is not read-only).
  1851. */
  1852. bool blk_supports_write_perm(BlockBackend *blk)
  1853. {
  1854. BlockDriverState *bs = blk_bs(blk);
  1855. GLOBAL_STATE_CODE();
  1856. if (bs) {
  1857. return !bdrv_is_read_only(bs);
  1858. } else {
  1859. return blk->root_state.open_flags & BDRV_O_RDWR;
  1860. }
  1861. }
  1862. /*
  1863. * Returns true if the BlockBackend can be written to in its current
  1864. * configuration (i.e. if write permission have been requested)
  1865. */
  1866. bool blk_is_writable(BlockBackend *blk)
  1867. {
  1868. IO_CODE();
  1869. return blk->perm & BLK_PERM_WRITE;
  1870. }
  1871. bool blk_is_sg(BlockBackend *blk)
  1872. {
  1873. BlockDriverState *bs = blk_bs(blk);
  1874. GLOBAL_STATE_CODE();
  1875. if (!bs) {
  1876. return false;
  1877. }
  1878. return bdrv_is_sg(bs);
  1879. }
  1880. bool blk_enable_write_cache(BlockBackend *blk)
  1881. {
  1882. IO_CODE();
  1883. return blk->enable_write_cache;
  1884. }
  1885. void blk_set_enable_write_cache(BlockBackend *blk, bool wce)
  1886. {
  1887. IO_CODE();
  1888. blk->enable_write_cache = wce;
  1889. }
  1890. bool coroutine_fn blk_co_is_inserted(BlockBackend *blk)
  1891. {
  1892. BlockDriverState *bs = blk_bs(blk);
  1893. IO_CODE();
  1894. assert_bdrv_graph_readable();
  1895. return bs && bdrv_co_is_inserted(bs);
  1896. }
  1897. bool coroutine_fn blk_co_is_available(BlockBackend *blk)
  1898. {
  1899. IO_CODE();
  1900. return blk_co_is_inserted(blk) && !blk_dev_is_tray_open(blk);
  1901. }
  1902. void coroutine_fn blk_co_lock_medium(BlockBackend *blk, bool locked)
  1903. {
  1904. BlockDriverState *bs = blk_bs(blk);
  1905. IO_CODE();
  1906. GRAPH_RDLOCK_GUARD();
  1907. if (bs) {
  1908. bdrv_co_lock_medium(bs, locked);
  1909. }
  1910. }
  1911. void coroutine_fn blk_co_eject(BlockBackend *blk, bool eject_flag)
  1912. {
  1913. BlockDriverState *bs = blk_bs(blk);
  1914. char *id;
  1915. IO_CODE();
  1916. GRAPH_RDLOCK_GUARD();
  1917. if (bs) {
  1918. bdrv_co_eject(bs, eject_flag);
  1919. }
  1920. /* Whether or not we ejected on the backend,
  1921. * the frontend experienced a tray event. */
  1922. id = blk_get_attached_dev_id(blk);
  1923. qapi_event_send_device_tray_moved(blk_name(blk), id,
  1924. eject_flag);
  1925. g_free(id);
  1926. }
  1927. int blk_get_flags(BlockBackend *blk)
  1928. {
  1929. BlockDriverState *bs = blk_bs(blk);
  1930. GLOBAL_STATE_CODE();
  1931. if (bs) {
  1932. return bdrv_get_flags(bs);
  1933. } else {
  1934. return blk->root_state.open_flags;
  1935. }
  1936. }
  1937. /* Returns the minimum request alignment, in bytes; guaranteed nonzero */
  1938. uint32_t blk_get_request_alignment(BlockBackend *blk)
  1939. {
  1940. BlockDriverState *bs = blk_bs(blk);
  1941. IO_CODE();
  1942. return bs ? bs->bl.request_alignment : BDRV_SECTOR_SIZE;
  1943. }
  1944. /* Returns the maximum hardware transfer length, in bytes; guaranteed nonzero */
  1945. uint64_t blk_get_max_hw_transfer(BlockBackend *blk)
  1946. {
  1947. BlockDriverState *bs = blk_bs(blk);
  1948. uint64_t max = INT_MAX;
  1949. IO_CODE();
  1950. if (bs) {
  1951. max = MIN_NON_ZERO(max, bs->bl.max_hw_transfer);
  1952. max = MIN_NON_ZERO(max, bs->bl.max_transfer);
  1953. }
  1954. return ROUND_DOWN(max, blk_get_request_alignment(blk));
  1955. }
  1956. /* Returns the maximum transfer length, in bytes; guaranteed nonzero */
  1957. uint32_t blk_get_max_transfer(BlockBackend *blk)
  1958. {
  1959. BlockDriverState *bs = blk_bs(blk);
  1960. uint32_t max = INT_MAX;
  1961. IO_CODE();
  1962. if (bs) {
  1963. max = MIN_NON_ZERO(max, bs->bl.max_transfer);
  1964. }
  1965. return ROUND_DOWN(max, blk_get_request_alignment(blk));
  1966. }
  1967. int blk_get_max_hw_iov(BlockBackend *blk)
  1968. {
  1969. IO_CODE();
  1970. return MIN_NON_ZERO(blk->root->bs->bl.max_hw_iov,
  1971. blk->root->bs->bl.max_iov);
  1972. }
  1973. int blk_get_max_iov(BlockBackend *blk)
  1974. {
  1975. IO_CODE();
  1976. return blk->root->bs->bl.max_iov;
  1977. }
  1978. void *blk_try_blockalign(BlockBackend *blk, size_t size)
  1979. {
  1980. IO_CODE();
  1981. return qemu_try_blockalign(blk ? blk_bs(blk) : NULL, size);
  1982. }
  1983. void *blk_blockalign(BlockBackend *blk, size_t size)
  1984. {
  1985. IO_CODE();
  1986. return qemu_blockalign(blk ? blk_bs(blk) : NULL, size);
  1987. }
  1988. bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp)
  1989. {
  1990. BlockDriverState *bs = blk_bs(blk);
  1991. GLOBAL_STATE_CODE();
  1992. GRAPH_RDLOCK_GUARD_MAINLOOP();
  1993. if (!bs) {
  1994. return false;
  1995. }
  1996. return bdrv_op_is_blocked(bs, op, errp);
  1997. }
  1998. /**
  1999. * Return BB's current AioContext. Note that this context may change
  2000. * concurrently at any time, with one exception: If the BB has a root node
  2001. * attached, its context will only change through bdrv_try_change_aio_context(),
  2002. * which creates a drained section. Therefore, incrementing such a BB's
  2003. * in-flight counter will prevent its context from changing.
  2004. */
  2005. AioContext *blk_get_aio_context(BlockBackend *blk)
  2006. {
  2007. IO_CODE();
  2008. if (!blk) {
  2009. return qemu_get_aio_context();
  2010. }
  2011. return qatomic_read(&blk->ctx);
  2012. }
  2013. int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
  2014. Error **errp)
  2015. {
  2016. bool old_allow_change;
  2017. BlockDriverState *bs = blk_bs(blk);
  2018. int ret;
  2019. GLOBAL_STATE_CODE();
  2020. if (!bs) {
  2021. qatomic_set(&blk->ctx, new_context);
  2022. return 0;
  2023. }
  2024. bdrv_ref(bs);
  2025. old_allow_change = blk->allow_aio_context_change;
  2026. blk->allow_aio_context_change = true;
  2027. ret = bdrv_try_change_aio_context(bs, new_context, NULL, errp);
  2028. blk->allow_aio_context_change = old_allow_change;
  2029. bdrv_unref(bs);
  2030. return ret;
  2031. }
  2032. typedef struct BdrvStateBlkRootContext {
  2033. AioContext *new_ctx;
  2034. BlockBackend *blk;
  2035. } BdrvStateBlkRootContext;
  2036. static void blk_root_set_aio_ctx_commit(void *opaque)
  2037. {
  2038. BdrvStateBlkRootContext *s = opaque;
  2039. BlockBackend *blk = s->blk;
  2040. AioContext *new_context = s->new_ctx;
  2041. ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
  2042. qatomic_set(&blk->ctx, new_context);
  2043. if (tgm->throttle_state) {
  2044. throttle_group_detach_aio_context(tgm);
  2045. throttle_group_attach_aio_context(tgm, new_context);
  2046. }
  2047. }
  2048. static TransactionActionDrv set_blk_root_context = {
  2049. .commit = blk_root_set_aio_ctx_commit,
  2050. .clean = g_free,
  2051. };
  2052. static bool blk_root_change_aio_ctx(BdrvChild *child, AioContext *ctx,
  2053. GHashTable *visited, Transaction *tran,
  2054. Error **errp)
  2055. {
  2056. BlockBackend *blk = child->opaque;
  2057. BdrvStateBlkRootContext *s;
  2058. if (!blk->allow_aio_context_change) {
  2059. /*
  2060. * Manually created BlockBackends (those with a name) that are not
  2061. * attached to anything can change their AioContext without updating
  2062. * their user; return an error for others.
  2063. */
  2064. if (!blk->name || blk->dev) {
  2065. /* TODO Add BB name/QOM path */
  2066. error_setg(errp, "Cannot change iothread of active block backend");
  2067. return false;
  2068. }
  2069. }
  2070. s = g_new(BdrvStateBlkRootContext, 1);
  2071. *s = (BdrvStateBlkRootContext) {
  2072. .new_ctx = ctx,
  2073. .blk = blk,
  2074. };
  2075. tran_add(tran, &set_blk_root_context, s);
  2076. return true;
  2077. }
  2078. void blk_add_aio_context_notifier(BlockBackend *blk,
  2079. void (*attached_aio_context)(AioContext *new_context, void *opaque),
  2080. void (*detach_aio_context)(void *opaque), void *opaque)
  2081. {
  2082. BlockBackendAioNotifier *notifier;
  2083. BlockDriverState *bs = blk_bs(blk);
  2084. GLOBAL_STATE_CODE();
  2085. notifier = g_new(BlockBackendAioNotifier, 1);
  2086. notifier->attached_aio_context = attached_aio_context;
  2087. notifier->detach_aio_context = detach_aio_context;
  2088. notifier->opaque = opaque;
  2089. QLIST_INSERT_HEAD(&blk->aio_notifiers, notifier, list);
  2090. if (bs) {
  2091. bdrv_add_aio_context_notifier(bs, attached_aio_context,
  2092. detach_aio_context, opaque);
  2093. }
  2094. }
  2095. void blk_remove_aio_context_notifier(BlockBackend *blk,
  2096. void (*attached_aio_context)(AioContext *,
  2097. void *),
  2098. void (*detach_aio_context)(void *),
  2099. void *opaque)
  2100. {
  2101. BlockBackendAioNotifier *notifier;
  2102. BlockDriverState *bs = blk_bs(blk);
  2103. GLOBAL_STATE_CODE();
  2104. if (bs) {
  2105. bdrv_remove_aio_context_notifier(bs, attached_aio_context,
  2106. detach_aio_context, opaque);
  2107. }
  2108. QLIST_FOREACH(notifier, &blk->aio_notifiers, list) {
  2109. if (notifier->attached_aio_context == attached_aio_context &&
  2110. notifier->detach_aio_context == detach_aio_context &&
  2111. notifier->opaque == opaque) {
  2112. QLIST_REMOVE(notifier, list);
  2113. g_free(notifier);
  2114. return;
  2115. }
  2116. }
  2117. abort();
  2118. }
  2119. void blk_add_remove_bs_notifier(BlockBackend *blk, Notifier *notify)
  2120. {
  2121. GLOBAL_STATE_CODE();
  2122. notifier_list_add(&blk->remove_bs_notifiers, notify);
  2123. }
  2124. BlockAcctStats *blk_get_stats(BlockBackend *blk)
  2125. {
  2126. IO_CODE();
  2127. return &blk->stats;
  2128. }
  2129. void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
  2130. BlockCompletionFunc *cb, void *opaque)
  2131. {
  2132. IO_CODE();
  2133. return qemu_aio_get(aiocb_info, blk_bs(blk), cb, opaque);
  2134. }
  2135. int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
  2136. int64_t bytes, BdrvRequestFlags flags)
  2137. {
  2138. IO_OR_GS_CODE();
  2139. return blk_co_pwritev(blk, offset, bytes, NULL,
  2140. flags | BDRV_REQ_ZERO_WRITE);
  2141. }
  2142. int coroutine_fn blk_co_pwrite_compressed(BlockBackend *blk, int64_t offset,
  2143. int64_t bytes, const void *buf)
  2144. {
  2145. QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
  2146. IO_OR_GS_CODE();
  2147. return blk_co_pwritev_part(blk, offset, bytes, &qiov, 0,
  2148. BDRV_REQ_WRITE_COMPRESSED);
  2149. }
  2150. int coroutine_fn blk_co_truncate(BlockBackend *blk, int64_t offset, bool exact,
  2151. PreallocMode prealloc, BdrvRequestFlags flags,
  2152. Error **errp)
  2153. {
  2154. IO_OR_GS_CODE();
  2155. GRAPH_RDLOCK_GUARD();
  2156. if (!blk_co_is_available(blk)) {
  2157. error_setg(errp, "No medium inserted");
  2158. return -ENOMEDIUM;
  2159. }
  2160. return bdrv_co_truncate(blk->root, offset, exact, prealloc, flags, errp);
  2161. }
  2162. int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
  2163. int64_t pos, int size)
  2164. {
  2165. int ret;
  2166. GLOBAL_STATE_CODE();
  2167. if (!blk_is_available(blk)) {
  2168. return -ENOMEDIUM;
  2169. }
  2170. ret = bdrv_save_vmstate(blk_bs(blk), buf, pos, size);
  2171. if (ret < 0) {
  2172. return ret;
  2173. }
  2174. if (ret == size && !blk->enable_write_cache) {
  2175. ret = bdrv_flush(blk_bs(blk));
  2176. }
  2177. return ret < 0 ? ret : size;
  2178. }
  2179. int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size)
  2180. {
  2181. GLOBAL_STATE_CODE();
  2182. if (!blk_is_available(blk)) {
  2183. return -ENOMEDIUM;
  2184. }
  2185. return bdrv_load_vmstate(blk_bs(blk), buf, pos, size);
  2186. }
  2187. int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz)
  2188. {
  2189. GLOBAL_STATE_CODE();
  2190. GRAPH_RDLOCK_GUARD_MAINLOOP();
  2191. if (!blk_is_available(blk)) {
  2192. return -ENOMEDIUM;
  2193. }
  2194. return bdrv_probe_blocksizes(blk_bs(blk), bsz);
  2195. }
  2196. int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo)
  2197. {
  2198. GLOBAL_STATE_CODE();
  2199. if (!blk_is_available(blk)) {
  2200. return -ENOMEDIUM;
  2201. }
  2202. return bdrv_probe_geometry(blk_bs(blk), geo);
  2203. }
  2204. /*
  2205. * Updates the BlockBackendRootState object with data from the currently
  2206. * attached BlockDriverState.
  2207. */
  2208. void blk_update_root_state(BlockBackend *blk)
  2209. {
  2210. GLOBAL_STATE_CODE();
  2211. assert(blk->root);
  2212. blk->root_state.open_flags = blk->root->bs->open_flags;
  2213. blk->root_state.detect_zeroes = blk->root->bs->detect_zeroes;
  2214. }
  2215. /*
  2216. * Returns the detect-zeroes setting to be used for bdrv_open() of a
  2217. * BlockDriverState which is supposed to inherit the root state.
  2218. */
  2219. bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk)
  2220. {
  2221. GLOBAL_STATE_CODE();
  2222. return blk->root_state.detect_zeroes;
  2223. }
  2224. /*
  2225. * Returns the flags to be used for bdrv_open() of a BlockDriverState which is
  2226. * supposed to inherit the root state.
  2227. */
  2228. int blk_get_open_flags_from_root_state(BlockBackend *blk)
  2229. {
  2230. GLOBAL_STATE_CODE();
  2231. return blk->root_state.open_flags;
  2232. }
  2233. BlockBackendRootState *blk_get_root_state(BlockBackend *blk)
  2234. {
  2235. GLOBAL_STATE_CODE();
  2236. return &blk->root_state;
  2237. }
  2238. int blk_commit_all(void)
  2239. {
  2240. BlockBackend *blk = NULL;
  2241. GLOBAL_STATE_CODE();
  2242. GRAPH_RDLOCK_GUARD_MAINLOOP();
  2243. while ((blk = blk_all_next(blk)) != NULL) {
  2244. BlockDriverState *unfiltered_bs = bdrv_skip_filters(blk_bs(blk));
  2245. if (blk_is_inserted(blk) && bdrv_cow_child(unfiltered_bs)) {
  2246. int ret;
  2247. ret = bdrv_commit(unfiltered_bs);
  2248. if (ret < 0) {
  2249. return ret;
  2250. }
  2251. }
  2252. }
  2253. return 0;
  2254. }
  2255. /* throttling disk I/O limits */
  2256. void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg)
  2257. {
  2258. GLOBAL_STATE_CODE();
  2259. throttle_group_config(&blk->public.throttle_group_member, cfg);
  2260. }
  2261. void blk_io_limits_disable(BlockBackend *blk)
  2262. {
  2263. BlockDriverState *bs = blk_bs(blk);
  2264. ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
  2265. assert(tgm->throttle_state);
  2266. GLOBAL_STATE_CODE();
  2267. if (bs) {
  2268. bdrv_ref(bs);
  2269. bdrv_drained_begin(bs);
  2270. }
  2271. throttle_group_unregister_tgm(tgm);
  2272. if (bs) {
  2273. bdrv_drained_end(bs);
  2274. bdrv_unref(bs);
  2275. }
  2276. }
  2277. /* should be called before blk_set_io_limits if a limit is set */
  2278. void blk_io_limits_enable(BlockBackend *blk, const char *group)
  2279. {
  2280. assert(!blk->public.throttle_group_member.throttle_state);
  2281. GLOBAL_STATE_CODE();
  2282. throttle_group_register_tgm(&blk->public.throttle_group_member,
  2283. group, blk_get_aio_context(blk));
  2284. }
  2285. void blk_io_limits_update_group(BlockBackend *blk, const char *group)
  2286. {
  2287. GLOBAL_STATE_CODE();
  2288. /* this BB is not part of any group */
  2289. if (!blk->public.throttle_group_member.throttle_state) {
  2290. return;
  2291. }
  2292. /* this BB is a part of the same group than the one we want */
  2293. if (!g_strcmp0(throttle_group_get_name(&blk->public.throttle_group_member),
  2294. group)) {
  2295. return;
  2296. }
  2297. /* need to change the group this bs belong to */
  2298. blk_io_limits_disable(blk);
  2299. blk_io_limits_enable(blk, group);
  2300. }
  2301. static void blk_root_drained_begin(BdrvChild *child)
  2302. {
  2303. BlockBackend *blk = child->opaque;
  2304. ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
  2305. if (qatomic_fetch_inc(&blk->quiesce_counter) == 0) {
  2306. if (blk->dev_ops && blk->dev_ops->drained_begin) {
  2307. blk->dev_ops->drained_begin(blk->dev_opaque);
  2308. }
  2309. }
  2310. /* Note that blk->root may not be accessible here yet if we are just
  2311. * attaching to a BlockDriverState that is drained. Use child instead. */
  2312. if (qatomic_fetch_inc(&tgm->io_limits_disabled) == 0) {
  2313. throttle_group_restart_tgm(tgm);
  2314. }
  2315. }
  2316. static bool blk_root_drained_poll(BdrvChild *child)
  2317. {
  2318. BlockBackend *blk = child->opaque;
  2319. bool busy = false;
  2320. assert(qatomic_read(&blk->quiesce_counter));
  2321. if (blk->dev_ops && blk->dev_ops->drained_poll) {
  2322. busy = blk->dev_ops->drained_poll(blk->dev_opaque);
  2323. }
  2324. return busy || !!blk->in_flight;
  2325. }
  2326. static void blk_root_drained_end(BdrvChild *child)
  2327. {
  2328. BlockBackend *blk = child->opaque;
  2329. assert(qatomic_read(&blk->quiesce_counter));
  2330. assert(blk->public.throttle_group_member.io_limits_disabled);
  2331. qatomic_dec(&blk->public.throttle_group_member.io_limits_disabled);
  2332. if (qatomic_fetch_dec(&blk->quiesce_counter) == 1) {
  2333. if (blk->dev_ops && blk->dev_ops->drained_end) {
  2334. blk->dev_ops->drained_end(blk->dev_opaque);
  2335. }
  2336. qemu_mutex_lock(&blk->queued_requests_lock);
  2337. while (qemu_co_enter_next(&blk->queued_requests,
  2338. &blk->queued_requests_lock)) {
  2339. /* Resume all queued requests */
  2340. }
  2341. qemu_mutex_unlock(&blk->queued_requests_lock);
  2342. }
  2343. }
  2344. bool blk_register_buf(BlockBackend *blk, void *host, size_t size, Error **errp)
  2345. {
  2346. BlockDriverState *bs = blk_bs(blk);
  2347. GLOBAL_STATE_CODE();
  2348. if (bs) {
  2349. return bdrv_register_buf(bs, host, size, errp);
  2350. }
  2351. return true;
  2352. }
  2353. void blk_unregister_buf(BlockBackend *blk, void *host, size_t size)
  2354. {
  2355. BlockDriverState *bs = blk_bs(blk);
  2356. GLOBAL_STATE_CODE();
  2357. if (bs) {
  2358. bdrv_unregister_buf(bs, host, size);
  2359. }
  2360. }
  2361. int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in,
  2362. BlockBackend *blk_out, int64_t off_out,
  2363. int64_t bytes, BdrvRequestFlags read_flags,
  2364. BdrvRequestFlags write_flags)
  2365. {
  2366. int r;
  2367. IO_CODE();
  2368. GRAPH_RDLOCK_GUARD();
  2369. r = blk_check_byte_request(blk_in, off_in, bytes);
  2370. if (r) {
  2371. return r;
  2372. }
  2373. r = blk_check_byte_request(blk_out, off_out, bytes);
  2374. if (r) {
  2375. return r;
  2376. }
  2377. return bdrv_co_copy_range(blk_in->root, off_in,
  2378. blk_out->root, off_out,
  2379. bytes, read_flags, write_flags);
  2380. }
  2381. const BdrvChild *blk_root(BlockBackend *blk)
  2382. {
  2383. GLOBAL_STATE_CODE();
  2384. return blk->root;
  2385. }
  2386. int blk_make_empty(BlockBackend *blk, Error **errp)
  2387. {
  2388. GLOBAL_STATE_CODE();
  2389. GRAPH_RDLOCK_GUARD_MAINLOOP();
  2390. if (!blk_is_available(blk)) {
  2391. error_setg(errp, "No medium inserted");
  2392. return -ENOMEDIUM;
  2393. }
  2394. return bdrv_make_empty(blk->root, errp);
  2395. }