qcow2.c 208 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214
  1. /*
  2. * Block driver for the QCOW version 2 format
  3. *
  4. * Copyright (c) 2004-2006 Fabrice Bellard
  5. *
  6. * Permission is hereby granted, free of charge, to any person obtaining a copy
  7. * of this software and associated documentation files (the "Software"), to deal
  8. * in the Software without restriction, including without limitation the rights
  9. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10. * copies of the Software, and to permit persons to whom the Software is
  11. * furnished to do so, subject to the following conditions:
  12. *
  13. * The above copyright notice and this permission notice shall be included in
  14. * all copies or substantial portions of the Software.
  15. *
  16. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22. * THE SOFTWARE.
  23. */
  24. #include "qemu/osdep.h"
  25. #include "block/qdict.h"
  26. #include "system/block-backend.h"
  27. #include "qemu/main-loop.h"
  28. #include "qemu/module.h"
  29. #include "qcow2.h"
  30. #include "qemu/error-report.h"
  31. #include "qapi/error.h"
  32. #include "qapi/qapi-events-block-core.h"
  33. #include "qapi/qmp/qdict.h"
  34. #include "qapi/qmp/qstring.h"
  35. #include "trace.h"
  36. #include "qemu/option_int.h"
  37. #include "qemu/cutils.h"
  38. #include "qemu/bswap.h"
  39. #include "qemu/memalign.h"
  40. #include "qapi/qobject-input-visitor.h"
  41. #include "qapi/qapi-visit-block-core.h"
  42. #include "crypto.h"
  43. #include "block/aio_task.h"
  44. #include "block/dirty-bitmap.h"
  45. /*
  46. Differences with QCOW:
  47. - Support for multiple incremental snapshots.
  48. - Memory management by reference counts.
  49. - Clusters which have a reference count of one have the bit
  50. QCOW_OFLAG_COPIED to optimize write performance.
  51. - Size of compressed clusters is stored in sectors to reduce bit usage
  52. in the cluster offsets.
  53. - Support for storing additional data (such as the VM state) in the
  54. snapshots.
  55. - If a backing store is used, the cluster size is not constrained
  56. (could be backported to QCOW).
  57. - L2 tables have always a size of one cluster.
  58. */
  59. typedef struct {
  60. uint32_t magic;
  61. uint32_t len;
  62. } QEMU_PACKED QCowExtension;
  63. #define QCOW2_EXT_MAGIC_END 0
  64. #define QCOW2_EXT_MAGIC_BACKING_FORMAT 0xe2792aca
  65. #define QCOW2_EXT_MAGIC_FEATURE_TABLE 0x6803f857
  66. #define QCOW2_EXT_MAGIC_CRYPTO_HEADER 0x0537be77
  67. #define QCOW2_EXT_MAGIC_BITMAPS 0x23852875
  68. #define QCOW2_EXT_MAGIC_DATA_FILE 0x44415441
  69. static int coroutine_fn
  70. qcow2_co_preadv_compressed(BlockDriverState *bs,
  71. uint64_t l2_entry,
  72. uint64_t offset,
  73. uint64_t bytes,
  74. QEMUIOVector *qiov,
  75. size_t qiov_offset);
  76. static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename)
  77. {
  78. const QCowHeader *cow_header = (const void *)buf;
  79. if (buf_size >= sizeof(QCowHeader) &&
  80. be32_to_cpu(cow_header->magic) == QCOW_MAGIC &&
  81. be32_to_cpu(cow_header->version) >= 2)
  82. return 100;
  83. else
  84. return 0;
  85. }
  86. static int GRAPH_RDLOCK
  87. qcow2_crypto_hdr_read_func(QCryptoBlock *block, size_t offset,
  88. uint8_t *buf, size_t buflen,
  89. void *opaque, Error **errp)
  90. {
  91. BlockDriverState *bs = opaque;
  92. BDRVQcow2State *s = bs->opaque;
  93. ssize_t ret;
  94. if ((offset + buflen) > s->crypto_header.length) {
  95. error_setg(errp, "Request for data outside of extension header");
  96. return -1;
  97. }
  98. ret = bdrv_pread(bs->file, s->crypto_header.offset + offset, buflen, buf,
  99. 0);
  100. if (ret < 0) {
  101. error_setg_errno(errp, -ret, "Could not read encryption header");
  102. return -1;
  103. }
  104. return 0;
  105. }
  106. static int coroutine_fn GRAPH_RDLOCK
  107. qcow2_crypto_hdr_init_func(QCryptoBlock *block, size_t headerlen, void *opaque,
  108. Error **errp)
  109. {
  110. BlockDriverState *bs = opaque;
  111. BDRVQcow2State *s = bs->opaque;
  112. int64_t ret;
  113. int64_t clusterlen;
  114. ret = qcow2_alloc_clusters(bs, headerlen);
  115. if (ret < 0) {
  116. error_setg_errno(errp, -ret,
  117. "Cannot allocate cluster for LUKS header size %zu",
  118. headerlen);
  119. return -1;
  120. }
  121. s->crypto_header.length = headerlen;
  122. s->crypto_header.offset = ret;
  123. /*
  124. * Zero fill all space in cluster so it has predictable
  125. * content, as we may not initialize some regions of the
  126. * header (eg only 1 out of 8 key slots will be initialized)
  127. */
  128. clusterlen = size_to_clusters(s, headerlen) * s->cluster_size;
  129. assert(qcow2_pre_write_overlap_check(bs, 0, ret, clusterlen, false) == 0);
  130. ret = bdrv_co_pwrite_zeroes(bs->file, ret, clusterlen, 0);
  131. if (ret < 0) {
  132. error_setg_errno(errp, -ret, "Could not zero fill encryption header");
  133. return -1;
  134. }
  135. return 0;
  136. }
  137. /* The graph lock must be held when called in coroutine context */
  138. static int coroutine_mixed_fn GRAPH_RDLOCK
  139. qcow2_crypto_hdr_write_func(QCryptoBlock *block, size_t offset,
  140. const uint8_t *buf, size_t buflen,
  141. void *opaque, Error **errp)
  142. {
  143. BlockDriverState *bs = opaque;
  144. BDRVQcow2State *s = bs->opaque;
  145. ssize_t ret;
  146. if ((offset + buflen) > s->crypto_header.length) {
  147. error_setg(errp, "Request for data outside of extension header");
  148. return -1;
  149. }
  150. ret = bdrv_pwrite(bs->file, s->crypto_header.offset + offset, buflen, buf,
  151. 0);
  152. if (ret < 0) {
  153. error_setg_errno(errp, -ret, "Could not read encryption header");
  154. return -1;
  155. }
  156. return 0;
  157. }
  158. static QDict*
  159. qcow2_extract_crypto_opts(QemuOpts *opts, const char *fmt, Error **errp)
  160. {
  161. QDict *cryptoopts_qdict;
  162. QDict *opts_qdict;
  163. /* Extract "encrypt." options into a qdict */
  164. opts_qdict = qemu_opts_to_qdict(opts, NULL);
  165. qdict_extract_subqdict(opts_qdict, &cryptoopts_qdict, "encrypt.");
  166. qobject_unref(opts_qdict);
  167. qdict_put_str(cryptoopts_qdict, "format", fmt);
  168. return cryptoopts_qdict;
  169. }
  170. /*
  171. * read qcow2 extension and fill bs
  172. * start reading from start_offset
  173. * finish reading upon magic of value 0 or when end_offset reached
  174. * unknown magic is skipped (future extension this version knows nothing about)
  175. * return 0 upon success, non-0 otherwise
  176. */
  177. static int coroutine_fn GRAPH_RDLOCK
  178. qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
  179. uint64_t end_offset, void **p_feature_table,
  180. int flags, bool *need_update_header, Error **errp)
  181. {
  182. BDRVQcow2State *s = bs->opaque;
  183. QCowExtension ext;
  184. uint64_t offset;
  185. int ret;
  186. Qcow2BitmapHeaderExt bitmaps_ext;
  187. if (need_update_header != NULL) {
  188. *need_update_header = false;
  189. }
  190. #ifdef DEBUG_EXT
  191. printf("qcow2_read_extensions: start=%ld end=%ld\n", start_offset, end_offset);
  192. #endif
  193. offset = start_offset;
  194. while (offset < end_offset) {
  195. #ifdef DEBUG_EXT
  196. /* Sanity check */
  197. if (offset > s->cluster_size)
  198. printf("qcow2_read_extension: suspicious offset %lu\n", offset);
  199. printf("attempting to read extended header in offset %lu\n", offset);
  200. #endif
  201. ret = bdrv_co_pread(bs->file, offset, sizeof(ext), &ext, 0);
  202. if (ret < 0) {
  203. error_setg_errno(errp, -ret, "qcow2_read_extension: ERROR: "
  204. "pread fail from offset %" PRIu64, offset);
  205. return 1;
  206. }
  207. ext.magic = be32_to_cpu(ext.magic);
  208. ext.len = be32_to_cpu(ext.len);
  209. offset += sizeof(ext);
  210. #ifdef DEBUG_EXT
  211. printf("ext.magic = 0x%x\n", ext.magic);
  212. #endif
  213. if (offset > end_offset || ext.len > end_offset - offset) {
  214. error_setg(errp, "Header extension too large");
  215. return -EINVAL;
  216. }
  217. switch (ext.magic) {
  218. case QCOW2_EXT_MAGIC_END:
  219. return 0;
  220. case QCOW2_EXT_MAGIC_BACKING_FORMAT:
  221. if (ext.len >= sizeof(bs->backing_format)) {
  222. error_setg(errp, "ERROR: ext_backing_format: len=%" PRIu32
  223. " too large (>=%zu)", ext.len,
  224. sizeof(bs->backing_format));
  225. return 2;
  226. }
  227. ret = bdrv_co_pread(bs->file, offset, ext.len, bs->backing_format, 0);
  228. if (ret < 0) {
  229. error_setg_errno(errp, -ret, "ERROR: ext_backing_format: "
  230. "Could not read format name");
  231. return 3;
  232. }
  233. bs->backing_format[ext.len] = '\0';
  234. s->image_backing_format = g_strdup(bs->backing_format);
  235. #ifdef DEBUG_EXT
  236. printf("Qcow2: Got format extension %s\n", bs->backing_format);
  237. #endif
  238. break;
  239. case QCOW2_EXT_MAGIC_FEATURE_TABLE:
  240. if (p_feature_table != NULL) {
  241. void *feature_table = g_malloc0(ext.len + 2 * sizeof(Qcow2Feature));
  242. ret = bdrv_co_pread(bs->file, offset, ext.len, feature_table, 0);
  243. if (ret < 0) {
  244. error_setg_errno(errp, -ret, "ERROR: ext_feature_table: "
  245. "Could not read table");
  246. g_free(feature_table);
  247. return ret;
  248. }
  249. *p_feature_table = feature_table;
  250. }
  251. break;
  252. case QCOW2_EXT_MAGIC_CRYPTO_HEADER: {
  253. unsigned int cflags = 0;
  254. if (s->crypt_method_header != QCOW_CRYPT_LUKS) {
  255. error_setg(errp, "CRYPTO header extension only "
  256. "expected with LUKS encryption method");
  257. return -EINVAL;
  258. }
  259. if (ext.len != sizeof(Qcow2CryptoHeaderExtension)) {
  260. error_setg(errp, "CRYPTO header extension size %u, "
  261. "but expected size %zu", ext.len,
  262. sizeof(Qcow2CryptoHeaderExtension));
  263. return -EINVAL;
  264. }
  265. ret = bdrv_co_pread(bs->file, offset, ext.len, &s->crypto_header, 0);
  266. if (ret < 0) {
  267. error_setg_errno(errp, -ret,
  268. "Unable to read CRYPTO header extension");
  269. return ret;
  270. }
  271. s->crypto_header.offset = be64_to_cpu(s->crypto_header.offset);
  272. s->crypto_header.length = be64_to_cpu(s->crypto_header.length);
  273. if ((s->crypto_header.offset % s->cluster_size) != 0) {
  274. error_setg(errp, "Encryption header offset '%" PRIu64 "' is "
  275. "not a multiple of cluster size '%u'",
  276. s->crypto_header.offset, s->cluster_size);
  277. return -EINVAL;
  278. }
  279. if (flags & BDRV_O_NO_IO) {
  280. cflags |= QCRYPTO_BLOCK_OPEN_NO_IO;
  281. }
  282. s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.",
  283. qcow2_crypto_hdr_read_func,
  284. bs, cflags, errp);
  285. if (!s->crypto) {
  286. return -EINVAL;
  287. }
  288. } break;
  289. case QCOW2_EXT_MAGIC_BITMAPS:
  290. if (ext.len != sizeof(bitmaps_ext)) {
  291. error_setg_errno(errp, -ret, "bitmaps_ext: "
  292. "Invalid extension length");
  293. return -EINVAL;
  294. }
  295. if (!(s->autoclear_features & QCOW2_AUTOCLEAR_BITMAPS)) {
  296. if (s->qcow_version < 3) {
  297. /* Let's be a bit more specific */
  298. warn_report("This qcow2 v2 image contains bitmaps, but "
  299. "they may have been modified by a program "
  300. "without persistent bitmap support; so now "
  301. "they must all be considered inconsistent");
  302. } else {
  303. warn_report("a program lacking bitmap support "
  304. "modified this file, so all bitmaps are now "
  305. "considered inconsistent");
  306. }
  307. error_printf("Some clusters may be leaked, "
  308. "run 'qemu-img check -r' on the image "
  309. "file to fix.");
  310. if (need_update_header != NULL) {
  311. /* Updating is needed to drop invalid bitmap extension. */
  312. *need_update_header = true;
  313. }
  314. break;
  315. }
  316. ret = bdrv_co_pread(bs->file, offset, ext.len, &bitmaps_ext, 0);
  317. if (ret < 0) {
  318. error_setg_errno(errp, -ret, "bitmaps_ext: "
  319. "Could not read ext header");
  320. return ret;
  321. }
  322. if (bitmaps_ext.reserved32 != 0) {
  323. error_setg_errno(errp, -ret, "bitmaps_ext: "
  324. "Reserved field is not zero");
  325. return -EINVAL;
  326. }
  327. bitmaps_ext.nb_bitmaps = be32_to_cpu(bitmaps_ext.nb_bitmaps);
  328. bitmaps_ext.bitmap_directory_size =
  329. be64_to_cpu(bitmaps_ext.bitmap_directory_size);
  330. bitmaps_ext.bitmap_directory_offset =
  331. be64_to_cpu(bitmaps_ext.bitmap_directory_offset);
  332. if (bitmaps_ext.nb_bitmaps > QCOW2_MAX_BITMAPS) {
  333. error_setg(errp,
  334. "bitmaps_ext: Image has %" PRIu32 " bitmaps, "
  335. "exceeding the QEMU supported maximum of %d",
  336. bitmaps_ext.nb_bitmaps, QCOW2_MAX_BITMAPS);
  337. return -EINVAL;
  338. }
  339. if (bitmaps_ext.nb_bitmaps == 0) {
  340. error_setg(errp, "found bitmaps extension with zero bitmaps");
  341. return -EINVAL;
  342. }
  343. if (offset_into_cluster(s, bitmaps_ext.bitmap_directory_offset)) {
  344. error_setg(errp, "bitmaps_ext: "
  345. "invalid bitmap directory offset");
  346. return -EINVAL;
  347. }
  348. if (bitmaps_ext.bitmap_directory_size >
  349. QCOW2_MAX_BITMAP_DIRECTORY_SIZE) {
  350. error_setg(errp, "bitmaps_ext: "
  351. "bitmap directory size (%" PRIu64 ") exceeds "
  352. "the maximum supported size (%d)",
  353. bitmaps_ext.bitmap_directory_size,
  354. QCOW2_MAX_BITMAP_DIRECTORY_SIZE);
  355. return -EINVAL;
  356. }
  357. s->nb_bitmaps = bitmaps_ext.nb_bitmaps;
  358. s->bitmap_directory_offset =
  359. bitmaps_ext.bitmap_directory_offset;
  360. s->bitmap_directory_size =
  361. bitmaps_ext.bitmap_directory_size;
  362. #ifdef DEBUG_EXT
  363. printf("Qcow2: Got bitmaps extension: "
  364. "offset=%" PRIu64 " nb_bitmaps=%" PRIu32 "\n",
  365. s->bitmap_directory_offset, s->nb_bitmaps);
  366. #endif
  367. break;
  368. case QCOW2_EXT_MAGIC_DATA_FILE:
  369. {
  370. s->image_data_file = g_malloc0(ext.len + 1);
  371. ret = bdrv_co_pread(bs->file, offset, ext.len, s->image_data_file, 0);
  372. if (ret < 0) {
  373. error_setg_errno(errp, -ret,
  374. "ERROR: Could not read data file name");
  375. return ret;
  376. }
  377. #ifdef DEBUG_EXT
  378. printf("Qcow2: Got external data file %s\n", s->image_data_file);
  379. #endif
  380. break;
  381. }
  382. default:
  383. /* unknown magic - save it in case we need to rewrite the header */
  384. /* If you add a new feature, make sure to also update the fast
  385. * path of qcow2_make_empty() to deal with it. */
  386. {
  387. Qcow2UnknownHeaderExtension *uext;
  388. uext = g_malloc0(sizeof(*uext) + ext.len);
  389. uext->magic = ext.magic;
  390. uext->len = ext.len;
  391. QLIST_INSERT_HEAD(&s->unknown_header_ext, uext, next);
  392. ret = bdrv_co_pread(bs->file, offset, uext->len, uext->data, 0);
  393. if (ret < 0) {
  394. error_setg_errno(errp, -ret, "ERROR: unknown extension: "
  395. "Could not read data");
  396. return ret;
  397. }
  398. }
  399. break;
  400. }
  401. offset += ((ext.len + 7) & ~7);
  402. }
  403. return 0;
  404. }
  405. static void cleanup_unknown_header_ext(BlockDriverState *bs)
  406. {
  407. BDRVQcow2State *s = bs->opaque;
  408. Qcow2UnknownHeaderExtension *uext, *next;
  409. QLIST_FOREACH_SAFE(uext, &s->unknown_header_ext, next, next) {
  410. QLIST_REMOVE(uext, next);
  411. g_free(uext);
  412. }
  413. }
  414. static void report_unsupported_feature(Error **errp, Qcow2Feature *table,
  415. uint64_t mask)
  416. {
  417. g_autoptr(GString) features = g_string_sized_new(60);
  418. while (table && table->name[0] != '\0') {
  419. if (table->type == QCOW2_FEAT_TYPE_INCOMPATIBLE) {
  420. if (mask & (1ULL << table->bit)) {
  421. if (features->len > 0) {
  422. g_string_append(features, ", ");
  423. }
  424. g_string_append_printf(features, "%.46s", table->name);
  425. mask &= ~(1ULL << table->bit);
  426. }
  427. }
  428. table++;
  429. }
  430. if (mask) {
  431. if (features->len > 0) {
  432. g_string_append(features, ", ");
  433. }
  434. g_string_append_printf(features,
  435. "Unknown incompatible feature: %" PRIx64, mask);
  436. }
  437. error_setg(errp, "Unsupported qcow2 feature(s): %s", features->str);
  438. }
  439. /*
  440. * Sets the dirty bit and flushes afterwards if necessary.
  441. *
  442. * The incompatible_features bit is only set if the image file header was
  443. * updated successfully. Therefore it is not required to check the return
  444. * value of this function.
  445. */
  446. int qcow2_mark_dirty(BlockDriverState *bs)
  447. {
  448. BDRVQcow2State *s = bs->opaque;
  449. uint64_t val;
  450. int ret;
  451. assert(s->qcow_version >= 3);
  452. if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
  453. return 0; /* already dirty */
  454. }
  455. val = cpu_to_be64(s->incompatible_features | QCOW2_INCOMPAT_DIRTY);
  456. ret = bdrv_pwrite_sync(bs->file,
  457. offsetof(QCowHeader, incompatible_features),
  458. sizeof(val), &val, 0);
  459. if (ret < 0) {
  460. return ret;
  461. }
  462. /* Only treat image as dirty if the header was updated successfully */
  463. s->incompatible_features |= QCOW2_INCOMPAT_DIRTY;
  464. return 0;
  465. }
  466. /*
  467. * Clears the dirty bit and flushes before if necessary. Only call this
  468. * function when there are no pending requests, it does not guard against
  469. * concurrent requests dirtying the image.
  470. */
  471. static int GRAPH_RDLOCK qcow2_mark_clean(BlockDriverState *bs)
  472. {
  473. BDRVQcow2State *s = bs->opaque;
  474. if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
  475. int ret;
  476. s->incompatible_features &= ~QCOW2_INCOMPAT_DIRTY;
  477. ret = qcow2_flush_caches(bs);
  478. if (ret < 0) {
  479. return ret;
  480. }
  481. return qcow2_update_header(bs);
  482. }
  483. return 0;
  484. }
  485. /*
  486. * Marks the image as corrupt.
  487. */
  488. int qcow2_mark_corrupt(BlockDriverState *bs)
  489. {
  490. BDRVQcow2State *s = bs->opaque;
  491. s->incompatible_features |= QCOW2_INCOMPAT_CORRUPT;
  492. return qcow2_update_header(bs);
  493. }
  494. /*
  495. * Marks the image as consistent, i.e., unsets the corrupt bit, and flushes
  496. * before if necessary.
  497. */
  498. static int coroutine_fn GRAPH_RDLOCK
  499. qcow2_mark_consistent(BlockDriverState *bs)
  500. {
  501. BDRVQcow2State *s = bs->opaque;
  502. if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) {
  503. int ret = qcow2_flush_caches(bs);
  504. if (ret < 0) {
  505. return ret;
  506. }
  507. s->incompatible_features &= ~QCOW2_INCOMPAT_CORRUPT;
  508. return qcow2_update_header(bs);
  509. }
  510. return 0;
  511. }
  512. static void qcow2_add_check_result(BdrvCheckResult *out,
  513. const BdrvCheckResult *src,
  514. bool set_allocation_info)
  515. {
  516. out->corruptions += src->corruptions;
  517. out->leaks += src->leaks;
  518. out->check_errors += src->check_errors;
  519. out->corruptions_fixed += src->corruptions_fixed;
  520. out->leaks_fixed += src->leaks_fixed;
  521. if (set_allocation_info) {
  522. out->image_end_offset = src->image_end_offset;
  523. out->bfi = src->bfi;
  524. }
  525. }
  526. static int coroutine_fn GRAPH_RDLOCK
  527. qcow2_co_check_locked(BlockDriverState *bs, BdrvCheckResult *result,
  528. BdrvCheckMode fix)
  529. {
  530. BdrvCheckResult snapshot_res = {};
  531. BdrvCheckResult refcount_res = {};
  532. int ret;
  533. memset(result, 0, sizeof(*result));
  534. ret = qcow2_check_read_snapshot_table(bs, &snapshot_res, fix);
  535. if (ret < 0) {
  536. qcow2_add_check_result(result, &snapshot_res, false);
  537. return ret;
  538. }
  539. ret = qcow2_check_refcounts(bs, &refcount_res, fix);
  540. qcow2_add_check_result(result, &refcount_res, true);
  541. if (ret < 0) {
  542. qcow2_add_check_result(result, &snapshot_res, false);
  543. return ret;
  544. }
  545. ret = qcow2_check_fix_snapshot_table(bs, &snapshot_res, fix);
  546. qcow2_add_check_result(result, &snapshot_res, false);
  547. if (ret < 0) {
  548. return ret;
  549. }
  550. if (fix && result->check_errors == 0 && result->corruptions == 0) {
  551. ret = qcow2_mark_clean(bs);
  552. if (ret < 0) {
  553. return ret;
  554. }
  555. return qcow2_mark_consistent(bs);
  556. }
  557. return ret;
  558. }
  559. static int coroutine_fn GRAPH_RDLOCK
  560. qcow2_co_check(BlockDriverState *bs, BdrvCheckResult *result,
  561. BdrvCheckMode fix)
  562. {
  563. BDRVQcow2State *s = bs->opaque;
  564. int ret;
  565. qemu_co_mutex_lock(&s->lock);
  566. ret = qcow2_co_check_locked(bs, result, fix);
  567. qemu_co_mutex_unlock(&s->lock);
  568. return ret;
  569. }
  570. int qcow2_validate_table(BlockDriverState *bs, uint64_t offset,
  571. uint64_t entries, size_t entry_len,
  572. int64_t max_size_bytes, const char *table_name,
  573. Error **errp)
  574. {
  575. BDRVQcow2State *s = bs->opaque;
  576. if (entries > max_size_bytes / entry_len) {
  577. error_setg(errp, "%s too large", table_name);
  578. return -EFBIG;
  579. }
  580. /* Use signed INT64_MAX as the maximum even for uint64_t header fields,
  581. * because values will be passed to qemu functions taking int64_t. */
  582. if ((INT64_MAX - entries * entry_len < offset) ||
  583. (offset_into_cluster(s, offset) != 0)) {
  584. error_setg(errp, "%s offset invalid", table_name);
  585. return -EINVAL;
  586. }
  587. return 0;
  588. }
  589. static const char *const mutable_opts[] = {
  590. QCOW2_OPT_LAZY_REFCOUNTS,
  591. QCOW2_OPT_DISCARD_REQUEST,
  592. QCOW2_OPT_DISCARD_SNAPSHOT,
  593. QCOW2_OPT_DISCARD_OTHER,
  594. QCOW2_OPT_DISCARD_NO_UNREF,
  595. QCOW2_OPT_OVERLAP,
  596. QCOW2_OPT_OVERLAP_TEMPLATE,
  597. QCOW2_OPT_OVERLAP_MAIN_HEADER,
  598. QCOW2_OPT_OVERLAP_ACTIVE_L1,
  599. QCOW2_OPT_OVERLAP_ACTIVE_L2,
  600. QCOW2_OPT_OVERLAP_REFCOUNT_TABLE,
  601. QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK,
  602. QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE,
  603. QCOW2_OPT_OVERLAP_INACTIVE_L1,
  604. QCOW2_OPT_OVERLAP_INACTIVE_L2,
  605. QCOW2_OPT_OVERLAP_BITMAP_DIRECTORY,
  606. QCOW2_OPT_CACHE_SIZE,
  607. QCOW2_OPT_L2_CACHE_SIZE,
  608. QCOW2_OPT_L2_CACHE_ENTRY_SIZE,
  609. QCOW2_OPT_REFCOUNT_CACHE_SIZE,
  610. QCOW2_OPT_CACHE_CLEAN_INTERVAL,
  611. NULL
  612. };
  613. static QemuOptsList qcow2_runtime_opts = {
  614. .name = "qcow2",
  615. .head = QTAILQ_HEAD_INITIALIZER(qcow2_runtime_opts.head),
  616. .desc = {
  617. {
  618. .name = QCOW2_OPT_LAZY_REFCOUNTS,
  619. .type = QEMU_OPT_BOOL,
  620. .help = "Postpone refcount updates",
  621. },
  622. {
  623. .name = QCOW2_OPT_DISCARD_REQUEST,
  624. .type = QEMU_OPT_BOOL,
  625. .help = "Pass guest discard requests to the layer below",
  626. },
  627. {
  628. .name = QCOW2_OPT_DISCARD_SNAPSHOT,
  629. .type = QEMU_OPT_BOOL,
  630. .help = "Generate discard requests when snapshot related space "
  631. "is freed",
  632. },
  633. {
  634. .name = QCOW2_OPT_DISCARD_OTHER,
  635. .type = QEMU_OPT_BOOL,
  636. .help = "Generate discard requests when other clusters are freed",
  637. },
  638. {
  639. .name = QCOW2_OPT_DISCARD_NO_UNREF,
  640. .type = QEMU_OPT_BOOL,
  641. .help = "Do not unreference discarded clusters",
  642. },
  643. {
  644. .name = QCOW2_OPT_OVERLAP,
  645. .type = QEMU_OPT_STRING,
  646. .help = "Selects which overlap checks to perform from a range of "
  647. "templates (none, constant, cached, all)",
  648. },
  649. {
  650. .name = QCOW2_OPT_OVERLAP_TEMPLATE,
  651. .type = QEMU_OPT_STRING,
  652. .help = "Selects which overlap checks to perform from a range of "
  653. "templates (none, constant, cached, all)",
  654. },
  655. {
  656. .name = QCOW2_OPT_OVERLAP_MAIN_HEADER,
  657. .type = QEMU_OPT_BOOL,
  658. .help = "Check for unintended writes into the main qcow2 header",
  659. },
  660. {
  661. .name = QCOW2_OPT_OVERLAP_ACTIVE_L1,
  662. .type = QEMU_OPT_BOOL,
  663. .help = "Check for unintended writes into the active L1 table",
  664. },
  665. {
  666. .name = QCOW2_OPT_OVERLAP_ACTIVE_L2,
  667. .type = QEMU_OPT_BOOL,
  668. .help = "Check for unintended writes into an active L2 table",
  669. },
  670. {
  671. .name = QCOW2_OPT_OVERLAP_REFCOUNT_TABLE,
  672. .type = QEMU_OPT_BOOL,
  673. .help = "Check for unintended writes into the refcount table",
  674. },
  675. {
  676. .name = QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK,
  677. .type = QEMU_OPT_BOOL,
  678. .help = "Check for unintended writes into a refcount block",
  679. },
  680. {
  681. .name = QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE,
  682. .type = QEMU_OPT_BOOL,
  683. .help = "Check for unintended writes into the snapshot table",
  684. },
  685. {
  686. .name = QCOW2_OPT_OVERLAP_INACTIVE_L1,
  687. .type = QEMU_OPT_BOOL,
  688. .help = "Check for unintended writes into an inactive L1 table",
  689. },
  690. {
  691. .name = QCOW2_OPT_OVERLAP_INACTIVE_L2,
  692. .type = QEMU_OPT_BOOL,
  693. .help = "Check for unintended writes into an inactive L2 table",
  694. },
  695. {
  696. .name = QCOW2_OPT_OVERLAP_BITMAP_DIRECTORY,
  697. .type = QEMU_OPT_BOOL,
  698. .help = "Check for unintended writes into the bitmap directory",
  699. },
  700. {
  701. .name = QCOW2_OPT_CACHE_SIZE,
  702. .type = QEMU_OPT_SIZE,
  703. .help = "Maximum combined metadata (L2 tables and refcount blocks) "
  704. "cache size",
  705. },
  706. {
  707. .name = QCOW2_OPT_L2_CACHE_SIZE,
  708. .type = QEMU_OPT_SIZE,
  709. .help = "Maximum L2 table cache size",
  710. },
  711. {
  712. .name = QCOW2_OPT_L2_CACHE_ENTRY_SIZE,
  713. .type = QEMU_OPT_SIZE,
  714. .help = "Size of each entry in the L2 cache",
  715. },
  716. {
  717. .name = QCOW2_OPT_REFCOUNT_CACHE_SIZE,
  718. .type = QEMU_OPT_SIZE,
  719. .help = "Maximum refcount block cache size",
  720. },
  721. {
  722. .name = QCOW2_OPT_CACHE_CLEAN_INTERVAL,
  723. .type = QEMU_OPT_NUMBER,
  724. .help = "Clean unused cache entries after this time (in seconds)",
  725. },
  726. BLOCK_CRYPTO_OPT_DEF_KEY_SECRET("encrypt.",
  727. "ID of secret providing qcow2 AES key or LUKS passphrase"),
  728. { /* end of list */ }
  729. },
  730. };
  731. static const char *overlap_bool_option_names[QCOW2_OL_MAX_BITNR] = {
  732. [QCOW2_OL_MAIN_HEADER_BITNR] = QCOW2_OPT_OVERLAP_MAIN_HEADER,
  733. [QCOW2_OL_ACTIVE_L1_BITNR] = QCOW2_OPT_OVERLAP_ACTIVE_L1,
  734. [QCOW2_OL_ACTIVE_L2_BITNR] = QCOW2_OPT_OVERLAP_ACTIVE_L2,
  735. [QCOW2_OL_REFCOUNT_TABLE_BITNR] = QCOW2_OPT_OVERLAP_REFCOUNT_TABLE,
  736. [QCOW2_OL_REFCOUNT_BLOCK_BITNR] = QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK,
  737. [QCOW2_OL_SNAPSHOT_TABLE_BITNR] = QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE,
  738. [QCOW2_OL_INACTIVE_L1_BITNR] = QCOW2_OPT_OVERLAP_INACTIVE_L1,
  739. [QCOW2_OL_INACTIVE_L2_BITNR] = QCOW2_OPT_OVERLAP_INACTIVE_L2,
  740. [QCOW2_OL_BITMAP_DIRECTORY_BITNR] = QCOW2_OPT_OVERLAP_BITMAP_DIRECTORY,
  741. };
  742. static void cache_clean_timer_cb(void *opaque)
  743. {
  744. BlockDriverState *bs = opaque;
  745. BDRVQcow2State *s = bs->opaque;
  746. qcow2_cache_clean_unused(s->l2_table_cache);
  747. qcow2_cache_clean_unused(s->refcount_block_cache);
  748. timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
  749. (int64_t) s->cache_clean_interval * 1000);
  750. }
  751. static void cache_clean_timer_init(BlockDriverState *bs, AioContext *context)
  752. {
  753. BDRVQcow2State *s = bs->opaque;
  754. if (s->cache_clean_interval > 0) {
  755. s->cache_clean_timer =
  756. aio_timer_new_with_attrs(context, QEMU_CLOCK_VIRTUAL,
  757. SCALE_MS, QEMU_TIMER_ATTR_EXTERNAL,
  758. cache_clean_timer_cb, bs);
  759. timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
  760. (int64_t) s->cache_clean_interval * 1000);
  761. }
  762. }
  763. static void cache_clean_timer_del(BlockDriverState *bs)
  764. {
  765. BDRVQcow2State *s = bs->opaque;
  766. if (s->cache_clean_timer) {
  767. timer_free(s->cache_clean_timer);
  768. s->cache_clean_timer = NULL;
  769. }
  770. }
  771. static void qcow2_detach_aio_context(BlockDriverState *bs)
  772. {
  773. cache_clean_timer_del(bs);
  774. }
  775. static void qcow2_attach_aio_context(BlockDriverState *bs,
  776. AioContext *new_context)
  777. {
  778. cache_clean_timer_init(bs, new_context);
  779. }
  780. static bool read_cache_sizes(BlockDriverState *bs, QemuOpts *opts,
  781. uint64_t *l2_cache_size,
  782. uint64_t *l2_cache_entry_size,
  783. uint64_t *refcount_cache_size, Error **errp)
  784. {
  785. BDRVQcow2State *s = bs->opaque;
  786. uint64_t combined_cache_size, l2_cache_max_setting;
  787. bool l2_cache_size_set, refcount_cache_size_set, combined_cache_size_set;
  788. bool l2_cache_entry_size_set;
  789. int min_refcount_cache = MIN_REFCOUNT_CACHE_SIZE * s->cluster_size;
  790. uint64_t virtual_disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
  791. uint64_t max_l2_entries = DIV_ROUND_UP(virtual_disk_size, s->cluster_size);
  792. /* An L2 table is always one cluster in size so the max cache size
  793. * should be a multiple of the cluster size. */
  794. uint64_t max_l2_cache = ROUND_UP(max_l2_entries * l2_entry_size(s),
  795. s->cluster_size);
  796. combined_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_CACHE_SIZE);
  797. l2_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_L2_CACHE_SIZE);
  798. refcount_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_REFCOUNT_CACHE_SIZE);
  799. l2_cache_entry_size_set = qemu_opt_get(opts, QCOW2_OPT_L2_CACHE_ENTRY_SIZE);
  800. combined_cache_size = qemu_opt_get_size(opts, QCOW2_OPT_CACHE_SIZE, 0);
  801. l2_cache_max_setting = qemu_opt_get_size(opts, QCOW2_OPT_L2_CACHE_SIZE,
  802. DEFAULT_L2_CACHE_MAX_SIZE);
  803. *refcount_cache_size = qemu_opt_get_size(opts,
  804. QCOW2_OPT_REFCOUNT_CACHE_SIZE, 0);
  805. *l2_cache_entry_size = qemu_opt_get_size(
  806. opts, QCOW2_OPT_L2_CACHE_ENTRY_SIZE, s->cluster_size);
  807. *l2_cache_size = MIN(max_l2_cache, l2_cache_max_setting);
  808. if (combined_cache_size_set) {
  809. if (l2_cache_size_set && refcount_cache_size_set) {
  810. error_setg(errp, QCOW2_OPT_CACHE_SIZE ", " QCOW2_OPT_L2_CACHE_SIZE
  811. " and " QCOW2_OPT_REFCOUNT_CACHE_SIZE " may not be set "
  812. "at the same time");
  813. return false;
  814. } else if (l2_cache_size_set &&
  815. (l2_cache_max_setting > combined_cache_size)) {
  816. error_setg(errp, QCOW2_OPT_L2_CACHE_SIZE " may not exceed "
  817. QCOW2_OPT_CACHE_SIZE);
  818. return false;
  819. } else if (*refcount_cache_size > combined_cache_size) {
  820. error_setg(errp, QCOW2_OPT_REFCOUNT_CACHE_SIZE " may not exceed "
  821. QCOW2_OPT_CACHE_SIZE);
  822. return false;
  823. }
  824. if (l2_cache_size_set) {
  825. *refcount_cache_size = combined_cache_size - *l2_cache_size;
  826. } else if (refcount_cache_size_set) {
  827. *l2_cache_size = combined_cache_size - *refcount_cache_size;
  828. } else {
  829. /* Assign as much memory as possible to the L2 cache, and
  830. * use the remainder for the refcount cache */
  831. if (combined_cache_size >= max_l2_cache + min_refcount_cache) {
  832. *l2_cache_size = max_l2_cache;
  833. *refcount_cache_size = combined_cache_size - *l2_cache_size;
  834. } else {
  835. *refcount_cache_size =
  836. MIN(combined_cache_size, min_refcount_cache);
  837. *l2_cache_size = combined_cache_size - *refcount_cache_size;
  838. }
  839. }
  840. }
  841. /*
  842. * If the L2 cache is not enough to cover the whole disk then
  843. * default to 4KB entries. Smaller entries reduce the cost of
  844. * loads and evictions and increase I/O performance.
  845. */
  846. if (*l2_cache_size < max_l2_cache && !l2_cache_entry_size_set) {
  847. *l2_cache_entry_size = MIN(s->cluster_size, 4096);
  848. }
  849. /* l2_cache_size and refcount_cache_size are ensured to have at least
  850. * their minimum values in qcow2_update_options_prepare() */
  851. if (*l2_cache_entry_size < (1 << MIN_CLUSTER_BITS) ||
  852. *l2_cache_entry_size > s->cluster_size ||
  853. !is_power_of_2(*l2_cache_entry_size)) {
  854. error_setg(errp, "L2 cache entry size must be a power of two "
  855. "between %d and the cluster size (%d)",
  856. 1 << MIN_CLUSTER_BITS, s->cluster_size);
  857. return false;
  858. }
  859. return true;
  860. }
  861. typedef struct Qcow2ReopenState {
  862. Qcow2Cache *l2_table_cache;
  863. Qcow2Cache *refcount_block_cache;
  864. int l2_slice_size; /* Number of entries in a slice of the L2 table */
  865. bool use_lazy_refcounts;
  866. int overlap_check;
  867. bool discard_passthrough[QCOW2_DISCARD_MAX];
  868. bool discard_no_unref;
  869. uint64_t cache_clean_interval;
  870. QCryptoBlockOpenOptions *crypto_opts; /* Disk encryption runtime options */
  871. } Qcow2ReopenState;
  872. static int GRAPH_RDLOCK
  873. qcow2_update_options_prepare(BlockDriverState *bs, Qcow2ReopenState *r,
  874. QDict *options, int flags, Error **errp)
  875. {
  876. BDRVQcow2State *s = bs->opaque;
  877. QemuOpts *opts = NULL;
  878. const char *opt_overlap_check, *opt_overlap_check_template;
  879. int overlap_check_template = 0;
  880. uint64_t l2_cache_size, l2_cache_entry_size, refcount_cache_size;
  881. int i;
  882. const char *encryptfmt;
  883. QDict *encryptopts = NULL;
  884. int ret;
  885. qdict_extract_subqdict(options, &encryptopts, "encrypt.");
  886. encryptfmt = qdict_get_try_str(encryptopts, "format");
  887. opts = qemu_opts_create(&qcow2_runtime_opts, NULL, 0, &error_abort);
  888. if (!qemu_opts_absorb_qdict(opts, options, errp)) {
  889. ret = -EINVAL;
  890. goto fail;
  891. }
  892. /* get L2 table/refcount block cache size from command line options */
  893. if (!read_cache_sizes(bs, opts, &l2_cache_size, &l2_cache_entry_size,
  894. &refcount_cache_size, errp)) {
  895. ret = -EINVAL;
  896. goto fail;
  897. }
  898. l2_cache_size /= l2_cache_entry_size;
  899. if (l2_cache_size < MIN_L2_CACHE_SIZE) {
  900. l2_cache_size = MIN_L2_CACHE_SIZE;
  901. }
  902. if (l2_cache_size > INT_MAX) {
  903. error_setg(errp, "L2 cache size too big");
  904. ret = -EINVAL;
  905. goto fail;
  906. }
  907. refcount_cache_size /= s->cluster_size;
  908. if (refcount_cache_size < MIN_REFCOUNT_CACHE_SIZE) {
  909. refcount_cache_size = MIN_REFCOUNT_CACHE_SIZE;
  910. }
  911. if (refcount_cache_size > INT_MAX) {
  912. error_setg(errp, "Refcount cache size too big");
  913. ret = -EINVAL;
  914. goto fail;
  915. }
  916. /* alloc new L2 table/refcount block cache, flush old one */
  917. if (s->l2_table_cache) {
  918. ret = qcow2_cache_flush(bs, s->l2_table_cache);
  919. if (ret) {
  920. error_setg_errno(errp, -ret, "Failed to flush the L2 table cache");
  921. goto fail;
  922. }
  923. }
  924. if (s->refcount_block_cache) {
  925. ret = qcow2_cache_flush(bs, s->refcount_block_cache);
  926. if (ret) {
  927. error_setg_errno(errp, -ret,
  928. "Failed to flush the refcount block cache");
  929. goto fail;
  930. }
  931. }
  932. r->l2_slice_size = l2_cache_entry_size / l2_entry_size(s);
  933. r->l2_table_cache = qcow2_cache_create(bs, l2_cache_size,
  934. l2_cache_entry_size);
  935. r->refcount_block_cache = qcow2_cache_create(bs, refcount_cache_size,
  936. s->cluster_size);
  937. if (r->l2_table_cache == NULL || r->refcount_block_cache == NULL) {
  938. error_setg(errp, "Could not allocate metadata caches");
  939. ret = -ENOMEM;
  940. goto fail;
  941. }
  942. /* New interval for cache cleanup timer */
  943. r->cache_clean_interval =
  944. qemu_opt_get_number(opts, QCOW2_OPT_CACHE_CLEAN_INTERVAL,
  945. DEFAULT_CACHE_CLEAN_INTERVAL);
  946. #ifndef CONFIG_LINUX
  947. if (r->cache_clean_interval != 0) {
  948. error_setg(errp, QCOW2_OPT_CACHE_CLEAN_INTERVAL
  949. " not supported on this host");
  950. ret = -EINVAL;
  951. goto fail;
  952. }
  953. #endif
  954. if (r->cache_clean_interval > UINT_MAX) {
  955. error_setg(errp, "Cache clean interval too big");
  956. ret = -EINVAL;
  957. goto fail;
  958. }
  959. /* lazy-refcounts; flush if going from enabled to disabled */
  960. r->use_lazy_refcounts = qemu_opt_get_bool(opts, QCOW2_OPT_LAZY_REFCOUNTS,
  961. (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS));
  962. if (r->use_lazy_refcounts && s->qcow_version < 3) {
  963. error_setg(errp, "Lazy refcounts require a qcow2 image with at least "
  964. "qemu 1.1 compatibility level");
  965. ret = -EINVAL;
  966. goto fail;
  967. }
  968. if (s->use_lazy_refcounts && !r->use_lazy_refcounts) {
  969. ret = qcow2_mark_clean(bs);
  970. if (ret < 0) {
  971. error_setg_errno(errp, -ret, "Failed to disable lazy refcounts");
  972. goto fail;
  973. }
  974. }
  975. /* Overlap check options */
  976. opt_overlap_check = qemu_opt_get(opts, QCOW2_OPT_OVERLAP);
  977. opt_overlap_check_template = qemu_opt_get(opts, QCOW2_OPT_OVERLAP_TEMPLATE);
  978. if (opt_overlap_check_template && opt_overlap_check &&
  979. strcmp(opt_overlap_check_template, opt_overlap_check))
  980. {
  981. error_setg(errp, "Conflicting values for qcow2 options '"
  982. QCOW2_OPT_OVERLAP "' ('%s') and '" QCOW2_OPT_OVERLAP_TEMPLATE
  983. "' ('%s')", opt_overlap_check, opt_overlap_check_template);
  984. ret = -EINVAL;
  985. goto fail;
  986. }
  987. if (!opt_overlap_check) {
  988. opt_overlap_check = opt_overlap_check_template ?: "cached";
  989. }
  990. if (!strcmp(opt_overlap_check, "none")) {
  991. overlap_check_template = 0;
  992. } else if (!strcmp(opt_overlap_check, "constant")) {
  993. overlap_check_template = QCOW2_OL_CONSTANT;
  994. } else if (!strcmp(opt_overlap_check, "cached")) {
  995. overlap_check_template = QCOW2_OL_CACHED;
  996. } else if (!strcmp(opt_overlap_check, "all")) {
  997. overlap_check_template = QCOW2_OL_ALL;
  998. } else {
  999. error_setg(errp, "Unsupported value '%s' for qcow2 option "
  1000. "'overlap-check'. Allowed are any of the following: "
  1001. "none, constant, cached, all", opt_overlap_check);
  1002. ret = -EINVAL;
  1003. goto fail;
  1004. }
  1005. r->overlap_check = 0;
  1006. for (i = 0; i < QCOW2_OL_MAX_BITNR; i++) {
  1007. /* overlap-check defines a template bitmask, but every flag may be
  1008. * overwritten through the associated boolean option */
  1009. r->overlap_check |=
  1010. qemu_opt_get_bool(opts, overlap_bool_option_names[i],
  1011. overlap_check_template & (1 << i)) << i;
  1012. }
  1013. r->discard_passthrough[QCOW2_DISCARD_NEVER] = false;
  1014. r->discard_passthrough[QCOW2_DISCARD_ALWAYS] = true;
  1015. r->discard_passthrough[QCOW2_DISCARD_REQUEST] =
  1016. qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_REQUEST,
  1017. flags & BDRV_O_UNMAP);
  1018. r->discard_passthrough[QCOW2_DISCARD_SNAPSHOT] =
  1019. qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_SNAPSHOT, true);
  1020. r->discard_passthrough[QCOW2_DISCARD_OTHER] =
  1021. qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_OTHER, false);
  1022. r->discard_no_unref = qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_NO_UNREF,
  1023. false);
  1024. if (r->discard_no_unref && s->qcow_version < 3) {
  1025. error_setg(errp,
  1026. "discard-no-unref is only supported since qcow2 version 3");
  1027. ret = -EINVAL;
  1028. goto fail;
  1029. }
  1030. switch (s->crypt_method_header) {
  1031. case QCOW_CRYPT_NONE:
  1032. if (encryptfmt) {
  1033. error_setg(errp, "No encryption in image header, but options "
  1034. "specified format '%s'", encryptfmt);
  1035. ret = -EINVAL;
  1036. goto fail;
  1037. }
  1038. break;
  1039. case QCOW_CRYPT_AES:
  1040. if (encryptfmt && !g_str_equal(encryptfmt, "aes")) {
  1041. error_setg(errp,
  1042. "Header reported 'aes' encryption format but "
  1043. "options specify '%s'", encryptfmt);
  1044. ret = -EINVAL;
  1045. goto fail;
  1046. }
  1047. qdict_put_str(encryptopts, "format", "qcow");
  1048. r->crypto_opts = block_crypto_open_opts_init(encryptopts, errp);
  1049. if (!r->crypto_opts) {
  1050. ret = -EINVAL;
  1051. goto fail;
  1052. }
  1053. break;
  1054. case QCOW_CRYPT_LUKS:
  1055. if (encryptfmt && !g_str_equal(encryptfmt, "luks")) {
  1056. error_setg(errp,
  1057. "Header reported 'luks' encryption format but "
  1058. "options specify '%s'", encryptfmt);
  1059. ret = -EINVAL;
  1060. goto fail;
  1061. }
  1062. qdict_put_str(encryptopts, "format", "luks");
  1063. r->crypto_opts = block_crypto_open_opts_init(encryptopts, errp);
  1064. if (!r->crypto_opts) {
  1065. ret = -EINVAL;
  1066. goto fail;
  1067. }
  1068. break;
  1069. default:
  1070. error_setg(errp, "Unsupported encryption method %d",
  1071. s->crypt_method_header);
  1072. ret = -EINVAL;
  1073. goto fail;
  1074. }
  1075. ret = 0;
  1076. fail:
  1077. qobject_unref(encryptopts);
  1078. qemu_opts_del(opts);
  1079. opts = NULL;
  1080. return ret;
  1081. }
  1082. static void qcow2_update_options_commit(BlockDriverState *bs,
  1083. Qcow2ReopenState *r)
  1084. {
  1085. BDRVQcow2State *s = bs->opaque;
  1086. int i;
  1087. if (s->l2_table_cache) {
  1088. qcow2_cache_destroy(s->l2_table_cache);
  1089. }
  1090. if (s->refcount_block_cache) {
  1091. qcow2_cache_destroy(s->refcount_block_cache);
  1092. }
  1093. s->l2_table_cache = r->l2_table_cache;
  1094. s->refcount_block_cache = r->refcount_block_cache;
  1095. s->l2_slice_size = r->l2_slice_size;
  1096. s->overlap_check = r->overlap_check;
  1097. s->use_lazy_refcounts = r->use_lazy_refcounts;
  1098. for (i = 0; i < QCOW2_DISCARD_MAX; i++) {
  1099. s->discard_passthrough[i] = r->discard_passthrough[i];
  1100. }
  1101. s->discard_no_unref = r->discard_no_unref;
  1102. if (s->cache_clean_interval != r->cache_clean_interval) {
  1103. cache_clean_timer_del(bs);
  1104. s->cache_clean_interval = r->cache_clean_interval;
  1105. cache_clean_timer_init(bs, bdrv_get_aio_context(bs));
  1106. }
  1107. qapi_free_QCryptoBlockOpenOptions(s->crypto_opts);
  1108. s->crypto_opts = r->crypto_opts;
  1109. }
  1110. static void qcow2_update_options_abort(BlockDriverState *bs,
  1111. Qcow2ReopenState *r)
  1112. {
  1113. if (r->l2_table_cache) {
  1114. qcow2_cache_destroy(r->l2_table_cache);
  1115. }
  1116. if (r->refcount_block_cache) {
  1117. qcow2_cache_destroy(r->refcount_block_cache);
  1118. }
  1119. qapi_free_QCryptoBlockOpenOptions(r->crypto_opts);
  1120. }
  1121. static int coroutine_fn GRAPH_RDLOCK
  1122. qcow2_update_options(BlockDriverState *bs, QDict *options, int flags,
  1123. Error **errp)
  1124. {
  1125. Qcow2ReopenState r = {};
  1126. int ret;
  1127. ret = qcow2_update_options_prepare(bs, &r, options, flags, errp);
  1128. if (ret >= 0) {
  1129. qcow2_update_options_commit(bs, &r);
  1130. } else {
  1131. qcow2_update_options_abort(bs, &r);
  1132. }
  1133. return ret;
  1134. }
  1135. static int validate_compression_type(BDRVQcow2State *s, Error **errp)
  1136. {
  1137. switch (s->compression_type) {
  1138. case QCOW2_COMPRESSION_TYPE_ZLIB:
  1139. #ifdef CONFIG_ZSTD
  1140. case QCOW2_COMPRESSION_TYPE_ZSTD:
  1141. #endif
  1142. break;
  1143. default:
  1144. error_setg(errp, "qcow2: unknown compression type: %u",
  1145. s->compression_type);
  1146. return -ENOTSUP;
  1147. }
  1148. /*
  1149. * if the compression type differs from QCOW2_COMPRESSION_TYPE_ZLIB
  1150. * the incompatible feature flag must be set
  1151. */
  1152. if (s->compression_type == QCOW2_COMPRESSION_TYPE_ZLIB) {
  1153. if (s->incompatible_features & QCOW2_INCOMPAT_COMPRESSION) {
  1154. error_setg(errp, "qcow2: Compression type incompatible feature "
  1155. "bit must not be set");
  1156. return -EINVAL;
  1157. }
  1158. } else {
  1159. if (!(s->incompatible_features & QCOW2_INCOMPAT_COMPRESSION)) {
  1160. error_setg(errp, "qcow2: Compression type incompatible feature "
  1161. "bit must be set");
  1162. return -EINVAL;
  1163. }
  1164. }
  1165. return 0;
  1166. }
  1167. /* Called with s->lock held. */
  1168. static int coroutine_fn GRAPH_RDLOCK
  1169. qcow2_do_open(BlockDriverState *bs, QDict *options, int flags,
  1170. bool open_data_file, Error **errp)
  1171. {
  1172. ERRP_GUARD();
  1173. BDRVQcow2State *s = bs->opaque;
  1174. unsigned int len, i;
  1175. int ret = 0;
  1176. QCowHeader header;
  1177. uint64_t ext_end;
  1178. uint64_t l1_vm_state_index;
  1179. bool update_header = false;
  1180. ret = bdrv_co_pread(bs->file, 0, sizeof(header), &header, 0);
  1181. if (ret < 0) {
  1182. error_setg_errno(errp, -ret, "Could not read qcow2 header");
  1183. goto fail;
  1184. }
  1185. header.magic = be32_to_cpu(header.magic);
  1186. header.version = be32_to_cpu(header.version);
  1187. header.backing_file_offset = be64_to_cpu(header.backing_file_offset);
  1188. header.backing_file_size = be32_to_cpu(header.backing_file_size);
  1189. header.size = be64_to_cpu(header.size);
  1190. header.cluster_bits = be32_to_cpu(header.cluster_bits);
  1191. header.crypt_method = be32_to_cpu(header.crypt_method);
  1192. header.l1_table_offset = be64_to_cpu(header.l1_table_offset);
  1193. header.l1_size = be32_to_cpu(header.l1_size);
  1194. header.refcount_table_offset = be64_to_cpu(header.refcount_table_offset);
  1195. header.refcount_table_clusters =
  1196. be32_to_cpu(header.refcount_table_clusters);
  1197. header.snapshots_offset = be64_to_cpu(header.snapshots_offset);
  1198. header.nb_snapshots = be32_to_cpu(header.nb_snapshots);
  1199. if (header.magic != QCOW_MAGIC) {
  1200. error_setg(errp, "Image is not in qcow2 format");
  1201. ret = -EINVAL;
  1202. goto fail;
  1203. }
  1204. if (header.version < 2 || header.version > 3) {
  1205. error_setg(errp, "Unsupported qcow2 version %" PRIu32, header.version);
  1206. ret = -ENOTSUP;
  1207. goto fail;
  1208. }
  1209. s->qcow_version = header.version;
  1210. /* Initialise cluster size */
  1211. if (header.cluster_bits < MIN_CLUSTER_BITS ||
  1212. header.cluster_bits > MAX_CLUSTER_BITS) {
  1213. error_setg(errp, "Unsupported cluster size: 2^%" PRIu32,
  1214. header.cluster_bits);
  1215. ret = -EINVAL;
  1216. goto fail;
  1217. }
  1218. s->cluster_bits = header.cluster_bits;
  1219. s->cluster_size = 1 << s->cluster_bits;
  1220. /* Initialise version 3 header fields */
  1221. if (header.version == 2) {
  1222. header.incompatible_features = 0;
  1223. header.compatible_features = 0;
  1224. header.autoclear_features = 0;
  1225. header.refcount_order = 4;
  1226. header.header_length = 72;
  1227. } else {
  1228. header.incompatible_features =
  1229. be64_to_cpu(header.incompatible_features);
  1230. header.compatible_features = be64_to_cpu(header.compatible_features);
  1231. header.autoclear_features = be64_to_cpu(header.autoclear_features);
  1232. header.refcount_order = be32_to_cpu(header.refcount_order);
  1233. header.header_length = be32_to_cpu(header.header_length);
  1234. if (header.header_length < 104) {
  1235. error_setg(errp, "qcow2 header too short");
  1236. ret = -EINVAL;
  1237. goto fail;
  1238. }
  1239. }
  1240. if (header.header_length > s->cluster_size) {
  1241. error_setg(errp, "qcow2 header exceeds cluster size");
  1242. ret = -EINVAL;
  1243. goto fail;
  1244. }
  1245. if (header.header_length > sizeof(header)) {
  1246. s->unknown_header_fields_size = header.header_length - sizeof(header);
  1247. s->unknown_header_fields = g_malloc(s->unknown_header_fields_size);
  1248. ret = bdrv_co_pread(bs->file, sizeof(header),
  1249. s->unknown_header_fields_size,
  1250. s->unknown_header_fields, 0);
  1251. if (ret < 0) {
  1252. error_setg_errno(errp, -ret, "Could not read unknown qcow2 header "
  1253. "fields");
  1254. goto fail;
  1255. }
  1256. }
  1257. if (header.backing_file_offset > s->cluster_size) {
  1258. error_setg(errp, "Invalid backing file offset");
  1259. ret = -EINVAL;
  1260. goto fail;
  1261. }
  1262. if (header.backing_file_offset) {
  1263. ext_end = header.backing_file_offset;
  1264. } else {
  1265. ext_end = 1 << header.cluster_bits;
  1266. }
  1267. /* Handle feature bits */
  1268. s->incompatible_features = header.incompatible_features;
  1269. s->compatible_features = header.compatible_features;
  1270. s->autoclear_features = header.autoclear_features;
  1271. /*
  1272. * Handle compression type
  1273. * Older qcow2 images don't contain the compression type header.
  1274. * Distinguish them by the header length and use
  1275. * the only valid (default) compression type in that case
  1276. */
  1277. if (header.header_length > offsetof(QCowHeader, compression_type)) {
  1278. s->compression_type = header.compression_type;
  1279. } else {
  1280. s->compression_type = QCOW2_COMPRESSION_TYPE_ZLIB;
  1281. }
  1282. ret = validate_compression_type(s, errp);
  1283. if (ret) {
  1284. goto fail;
  1285. }
  1286. if (s->incompatible_features & ~QCOW2_INCOMPAT_MASK) {
  1287. void *feature_table = NULL;
  1288. qcow2_read_extensions(bs, header.header_length, ext_end,
  1289. &feature_table, flags, NULL, NULL);
  1290. report_unsupported_feature(errp, feature_table,
  1291. s->incompatible_features &
  1292. ~QCOW2_INCOMPAT_MASK);
  1293. ret = -ENOTSUP;
  1294. g_free(feature_table);
  1295. goto fail;
  1296. }
  1297. if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) {
  1298. /* Corrupt images may not be written to unless they are being repaired
  1299. */
  1300. if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_CHECK)) {
  1301. error_setg(errp, "qcow2: Image is corrupt; cannot be opened "
  1302. "read/write");
  1303. ret = -EACCES;
  1304. goto fail;
  1305. }
  1306. }
  1307. s->subclusters_per_cluster =
  1308. has_subclusters(s) ? QCOW_EXTL2_SUBCLUSTERS_PER_CLUSTER : 1;
  1309. s->subcluster_size = s->cluster_size / s->subclusters_per_cluster;
  1310. s->subcluster_bits = ctz32(s->subcluster_size);
  1311. if (s->subcluster_size < (1 << MIN_CLUSTER_BITS)) {
  1312. error_setg(errp, "Unsupported subcluster size: %d", s->subcluster_size);
  1313. ret = -EINVAL;
  1314. goto fail;
  1315. }
  1316. /* Check support for various header values */
  1317. if (header.refcount_order > 6) {
  1318. error_setg(errp, "Reference count entry width too large; may not "
  1319. "exceed 64 bits");
  1320. ret = -EINVAL;
  1321. goto fail;
  1322. }
  1323. s->refcount_order = header.refcount_order;
  1324. s->refcount_bits = 1 << s->refcount_order;
  1325. s->refcount_max = UINT64_C(1) << (s->refcount_bits - 1);
  1326. s->refcount_max += s->refcount_max - 1;
  1327. s->crypt_method_header = header.crypt_method;
  1328. if (s->crypt_method_header) {
  1329. if (bdrv_uses_whitelist() &&
  1330. s->crypt_method_header == QCOW_CRYPT_AES) {
  1331. error_setg(errp,
  1332. "Use of AES-CBC encrypted qcow2 images is no longer "
  1333. "supported in system emulators");
  1334. error_append_hint(errp,
  1335. "You can use 'qemu-img convert' to convert your "
  1336. "image to an alternative supported format, such "
  1337. "as unencrypted qcow2, or raw with the LUKS "
  1338. "format instead.\n");
  1339. ret = -ENOSYS;
  1340. goto fail;
  1341. }
  1342. if (s->crypt_method_header == QCOW_CRYPT_AES) {
  1343. s->crypt_physical_offset = false;
  1344. } else {
  1345. /* Assuming LUKS and any future crypt methods we
  1346. * add will all use physical offsets, due to the
  1347. * fact that the alternative is insecure... */
  1348. s->crypt_physical_offset = true;
  1349. }
  1350. bs->encrypted = true;
  1351. }
  1352. s->l2_bits = s->cluster_bits - ctz32(l2_entry_size(s));
  1353. s->l2_size = 1 << s->l2_bits;
  1354. /* 2^(s->refcount_order - 3) is the refcount width in bytes */
  1355. s->refcount_block_bits = s->cluster_bits - (s->refcount_order - 3);
  1356. s->refcount_block_size = 1 << s->refcount_block_bits;
  1357. bs->total_sectors = header.size / BDRV_SECTOR_SIZE;
  1358. s->csize_shift = (62 - (s->cluster_bits - 8));
  1359. s->csize_mask = (1 << (s->cluster_bits - 8)) - 1;
  1360. s->cluster_offset_mask = (1LL << s->csize_shift) - 1;
  1361. s->refcount_table_offset = header.refcount_table_offset;
  1362. s->refcount_table_size =
  1363. header.refcount_table_clusters << (s->cluster_bits - 3);
  1364. if (header.refcount_table_clusters == 0 && !(flags & BDRV_O_CHECK)) {
  1365. error_setg(errp, "Image does not contain a reference count table");
  1366. ret = -EINVAL;
  1367. goto fail;
  1368. }
  1369. ret = qcow2_validate_table(bs, s->refcount_table_offset,
  1370. header.refcount_table_clusters,
  1371. s->cluster_size, QCOW_MAX_REFTABLE_SIZE,
  1372. "Reference count table", errp);
  1373. if (ret < 0) {
  1374. goto fail;
  1375. }
  1376. if (!(flags & BDRV_O_CHECK)) {
  1377. /*
  1378. * The total size in bytes of the snapshot table is checked in
  1379. * qcow2_read_snapshots() because the size of each snapshot is
  1380. * variable and we don't know it yet.
  1381. * Here we only check the offset and number of snapshots.
  1382. */
  1383. ret = qcow2_validate_table(bs, header.snapshots_offset,
  1384. header.nb_snapshots,
  1385. sizeof(QCowSnapshotHeader),
  1386. sizeof(QCowSnapshotHeader) *
  1387. QCOW_MAX_SNAPSHOTS,
  1388. "Snapshot table", errp);
  1389. if (ret < 0) {
  1390. goto fail;
  1391. }
  1392. }
  1393. /* read the level 1 table */
  1394. ret = qcow2_validate_table(bs, header.l1_table_offset,
  1395. header.l1_size, L1E_SIZE,
  1396. QCOW_MAX_L1_SIZE, "Active L1 table", errp);
  1397. if (ret < 0) {
  1398. goto fail;
  1399. }
  1400. s->l1_size = header.l1_size;
  1401. s->l1_table_offset = header.l1_table_offset;
  1402. l1_vm_state_index = size_to_l1(s, header.size);
  1403. if (l1_vm_state_index > INT_MAX) {
  1404. error_setg(errp, "Image is too big");
  1405. ret = -EFBIG;
  1406. goto fail;
  1407. }
  1408. s->l1_vm_state_index = l1_vm_state_index;
  1409. /* the L1 table must contain at least enough entries to put
  1410. header.size bytes */
  1411. if (s->l1_size < s->l1_vm_state_index) {
  1412. error_setg(errp, "L1 table is too small");
  1413. ret = -EINVAL;
  1414. goto fail;
  1415. }
  1416. if (s->l1_size > 0) {
  1417. s->l1_table = qemu_try_blockalign(bs->file->bs, s->l1_size * L1E_SIZE);
  1418. if (s->l1_table == NULL) {
  1419. error_setg(errp, "Could not allocate L1 table");
  1420. ret = -ENOMEM;
  1421. goto fail;
  1422. }
  1423. ret = bdrv_co_pread(bs->file, s->l1_table_offset, s->l1_size * L1E_SIZE,
  1424. s->l1_table, 0);
  1425. if (ret < 0) {
  1426. error_setg_errno(errp, -ret, "Could not read L1 table");
  1427. goto fail;
  1428. }
  1429. for(i = 0;i < s->l1_size; i++) {
  1430. s->l1_table[i] = be64_to_cpu(s->l1_table[i]);
  1431. }
  1432. }
  1433. /* Parse driver-specific options */
  1434. ret = qcow2_update_options(bs, options, flags, errp);
  1435. if (ret < 0) {
  1436. goto fail;
  1437. }
  1438. s->flags = flags;
  1439. ret = qcow2_refcount_init(bs);
  1440. if (ret != 0) {
  1441. error_setg_errno(errp, -ret, "Could not initialize refcount handling");
  1442. goto fail;
  1443. }
  1444. QLIST_INIT(&s->cluster_allocs);
  1445. QTAILQ_INIT(&s->discards);
  1446. /* read qcow2 extensions */
  1447. if (qcow2_read_extensions(bs, header.header_length, ext_end, NULL,
  1448. flags, &update_header, errp)) {
  1449. ret = -EINVAL;
  1450. goto fail;
  1451. }
  1452. if (open_data_file && (flags & BDRV_O_NO_IO)) {
  1453. /*
  1454. * Don't open the data file for 'qemu-img info' so that it can be used
  1455. * to verify that an untrusted qcow2 image doesn't refer to external
  1456. * files.
  1457. *
  1458. * Note: This still makes has_data_file() return true.
  1459. */
  1460. if (s->incompatible_features & QCOW2_INCOMPAT_DATA_FILE) {
  1461. s->data_file = NULL;
  1462. } else {
  1463. s->data_file = bs->file;
  1464. }
  1465. qdict_extract_subqdict(options, NULL, "data-file.");
  1466. qdict_del(options, "data-file");
  1467. } else if (open_data_file) {
  1468. /* Open external data file */
  1469. bdrv_graph_co_rdunlock();
  1470. s->data_file = bdrv_co_open_child(NULL, options, "data-file", bs,
  1471. &child_of_bds, BDRV_CHILD_DATA,
  1472. true, errp);
  1473. bdrv_graph_co_rdlock();
  1474. if (*errp) {
  1475. ret = -EINVAL;
  1476. goto fail;
  1477. }
  1478. if (s->incompatible_features & QCOW2_INCOMPAT_DATA_FILE) {
  1479. if (!s->data_file && s->image_data_file) {
  1480. bdrv_graph_co_rdunlock();
  1481. s->data_file = bdrv_co_open_child(s->image_data_file, options,
  1482. "data-file", bs,
  1483. &child_of_bds,
  1484. BDRV_CHILD_DATA, false, errp);
  1485. bdrv_graph_co_rdlock();
  1486. if (!s->data_file) {
  1487. ret = -EINVAL;
  1488. goto fail;
  1489. }
  1490. }
  1491. if (!s->data_file) {
  1492. error_setg(errp, "'data-file' is required for this image");
  1493. ret = -EINVAL;
  1494. goto fail;
  1495. }
  1496. /* No data here */
  1497. bs->file->role &= ~BDRV_CHILD_DATA;
  1498. /* Must succeed because we have given up permissions if anything */
  1499. bdrv_child_refresh_perms(bs, bs->file, &error_abort);
  1500. } else {
  1501. if (s->data_file) {
  1502. error_setg(errp, "'data-file' can only be set for images with "
  1503. "an external data file");
  1504. ret = -EINVAL;
  1505. goto fail;
  1506. }
  1507. s->data_file = bs->file;
  1508. if (data_file_is_raw(bs)) {
  1509. error_setg(errp, "data-file-raw requires a data file");
  1510. ret = -EINVAL;
  1511. goto fail;
  1512. }
  1513. }
  1514. }
  1515. /* qcow2_read_extension may have set up the crypto context
  1516. * if the crypt method needs a header region, some methods
  1517. * don't need header extensions, so must check here
  1518. */
  1519. if (s->crypt_method_header && !s->crypto) {
  1520. if (s->crypt_method_header == QCOW_CRYPT_AES) {
  1521. unsigned int cflags = 0;
  1522. if (flags & BDRV_O_NO_IO) {
  1523. cflags |= QCRYPTO_BLOCK_OPEN_NO_IO;
  1524. }
  1525. s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.",
  1526. NULL, NULL, cflags, errp);
  1527. if (!s->crypto) {
  1528. ret = -EINVAL;
  1529. goto fail;
  1530. }
  1531. } else if (!(flags & BDRV_O_NO_IO)) {
  1532. error_setg(errp, "Missing CRYPTO header for crypt method %d",
  1533. s->crypt_method_header);
  1534. ret = -EINVAL;
  1535. goto fail;
  1536. }
  1537. }
  1538. /* read the backing file name */
  1539. if (header.backing_file_offset != 0) {
  1540. len = header.backing_file_size;
  1541. if (len > MIN(1023, s->cluster_size - header.backing_file_offset) ||
  1542. len >= sizeof(bs->backing_file)) {
  1543. error_setg(errp, "Backing file name too long");
  1544. ret = -EINVAL;
  1545. goto fail;
  1546. }
  1547. s->image_backing_file = g_malloc(len + 1);
  1548. ret = bdrv_co_pread(bs->file, header.backing_file_offset, len,
  1549. s->image_backing_file, 0);
  1550. if (ret < 0) {
  1551. error_setg_errno(errp, -ret, "Could not read backing file name");
  1552. goto fail;
  1553. }
  1554. s->image_backing_file[len] = '\0';
  1555. /*
  1556. * Update only when something has changed. This function is called by
  1557. * qcow2_co_invalidate_cache(), and we do not want to reset
  1558. * auto_backing_file unless necessary.
  1559. */
  1560. if (!g_str_equal(s->image_backing_file, bs->backing_file)) {
  1561. pstrcpy(bs->backing_file, sizeof(bs->backing_file),
  1562. s->image_backing_file);
  1563. pstrcpy(bs->auto_backing_file, sizeof(bs->auto_backing_file),
  1564. s->image_backing_file);
  1565. }
  1566. }
  1567. /*
  1568. * Internal snapshots; skip reading them in check mode, because
  1569. * we do not need them then, and we do not want to abort because
  1570. * of a broken table.
  1571. */
  1572. if (!(flags & BDRV_O_CHECK)) {
  1573. s->snapshots_offset = header.snapshots_offset;
  1574. s->nb_snapshots = header.nb_snapshots;
  1575. ret = qcow2_read_snapshots(bs, errp);
  1576. if (ret < 0) {
  1577. goto fail;
  1578. }
  1579. }
  1580. /* Clear unknown autoclear feature bits */
  1581. update_header |= s->autoclear_features & ~QCOW2_AUTOCLEAR_MASK;
  1582. update_header = update_header && bdrv_is_writable(bs);
  1583. if (update_header) {
  1584. s->autoclear_features &= QCOW2_AUTOCLEAR_MASK;
  1585. }
  1586. /* == Handle persistent dirty bitmaps ==
  1587. *
  1588. * We want load dirty bitmaps in three cases:
  1589. *
  1590. * 1. Normal open of the disk in active mode, not related to invalidation
  1591. * after migration.
  1592. *
  1593. * 2. Invalidation of the target vm after pre-copy phase of migration, if
  1594. * bitmaps are _not_ migrating through migration channel, i.e.
  1595. * 'dirty-bitmaps' capability is disabled.
  1596. *
  1597. * 3. Invalidation of source vm after failed or canceled migration.
  1598. * This is a very interesting case. There are two possible types of
  1599. * bitmaps:
  1600. *
  1601. * A. Stored on inactivation and removed. They should be loaded from the
  1602. * image.
  1603. *
  1604. * B. Not stored: not-persistent bitmaps and bitmaps, migrated through
  1605. * the migration channel (with dirty-bitmaps capability).
  1606. *
  1607. * On the other hand, there are two possible sub-cases:
  1608. *
  1609. * 3.1 disk was changed by somebody else while were inactive. In this
  1610. * case all in-RAM dirty bitmaps (both persistent and not) are
  1611. * definitely invalid. And we don't have any method to determine
  1612. * this.
  1613. *
  1614. * Simple and safe thing is to just drop all the bitmaps of type B on
  1615. * inactivation. But in this case we lose bitmaps in valid 4.2 case.
  1616. *
  1617. * On the other hand, resuming source vm, if disk was already changed
  1618. * is a bad thing anyway: not only bitmaps, the whole vm state is
  1619. * out of sync with disk.
  1620. *
  1621. * This means, that user or management tool, who for some reason
  1622. * decided to resume source vm, after disk was already changed by
  1623. * target vm, should at least drop all dirty bitmaps by hand.
  1624. *
  1625. * So, we can ignore this case for now, but TODO: "generation"
  1626. * extension for qcow2, to determine, that image was changed after
  1627. * last inactivation. And if it is changed, we will drop (or at least
  1628. * mark as 'invalid' all the bitmaps of type B, both persistent
  1629. * and not).
  1630. *
  1631. * 3.2 disk was _not_ changed while were inactive. Bitmaps may be saved
  1632. * to disk ('dirty-bitmaps' capability disabled), or not saved
  1633. * ('dirty-bitmaps' capability enabled), but we don't need to care
  1634. * of: let's load bitmaps as always: stored bitmaps will be loaded,
  1635. * and not stored has flag IN_USE=1 in the image and will be skipped
  1636. * on loading.
  1637. *
  1638. * One remaining possible case when we don't want load bitmaps:
  1639. *
  1640. * 4. Open disk in inactive mode in target vm (bitmaps are migrating or
  1641. * will be loaded on invalidation, no needs try loading them before)
  1642. */
  1643. if (!(bdrv_get_flags(bs) & BDRV_O_INACTIVE)) {
  1644. /* It's case 1, 2 or 3.2. Or 3.1 which is BUG in management layer. */
  1645. bool header_updated;
  1646. if (!qcow2_load_dirty_bitmaps(bs, &header_updated, errp)) {
  1647. ret = -EINVAL;
  1648. goto fail;
  1649. }
  1650. update_header = update_header && !header_updated;
  1651. }
  1652. if (update_header) {
  1653. ret = qcow2_update_header(bs);
  1654. if (ret < 0) {
  1655. error_setg_errno(errp, -ret, "Could not update qcow2 header");
  1656. goto fail;
  1657. }
  1658. }
  1659. bs->supported_zero_flags = header.version >= 3 ?
  1660. BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK : 0;
  1661. bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE;
  1662. /* Repair image if dirty */
  1663. if (!(flags & BDRV_O_CHECK) && bdrv_is_writable(bs) &&
  1664. (s->incompatible_features & QCOW2_INCOMPAT_DIRTY)) {
  1665. BdrvCheckResult result = {0};
  1666. ret = qcow2_co_check_locked(bs, &result,
  1667. BDRV_FIX_ERRORS | BDRV_FIX_LEAKS);
  1668. if (ret < 0 || result.check_errors) {
  1669. if (ret >= 0) {
  1670. ret = -EIO;
  1671. }
  1672. error_setg_errno(errp, -ret, "Could not repair dirty image");
  1673. goto fail;
  1674. }
  1675. }
  1676. #ifdef DEBUG_ALLOC
  1677. {
  1678. BdrvCheckResult result = {0};
  1679. qcow2_check_refcounts(bs, &result, 0);
  1680. }
  1681. #endif
  1682. qemu_co_queue_init(&s->thread_task_queue);
  1683. return ret;
  1684. fail:
  1685. g_free(s->image_data_file);
  1686. if (open_data_file && has_data_file(bs)) {
  1687. bdrv_graph_co_rdunlock();
  1688. bdrv_co_unref_child(bs, s->data_file);
  1689. bdrv_graph_co_rdlock();
  1690. s->data_file = NULL;
  1691. }
  1692. g_free(s->unknown_header_fields);
  1693. cleanup_unknown_header_ext(bs);
  1694. qcow2_free_snapshots(bs);
  1695. qcow2_refcount_close(bs);
  1696. qemu_vfree(s->l1_table);
  1697. /* else pre-write overlap checks in cache_destroy may crash */
  1698. s->l1_table = NULL;
  1699. cache_clean_timer_del(bs);
  1700. if (s->l2_table_cache) {
  1701. qcow2_cache_destroy(s->l2_table_cache);
  1702. }
  1703. if (s->refcount_block_cache) {
  1704. qcow2_cache_destroy(s->refcount_block_cache);
  1705. }
  1706. qcrypto_block_free(s->crypto);
  1707. qapi_free_QCryptoBlockOpenOptions(s->crypto_opts);
  1708. return ret;
  1709. }
  1710. typedef struct QCow2OpenCo {
  1711. BlockDriverState *bs;
  1712. QDict *options;
  1713. int flags;
  1714. Error **errp;
  1715. int ret;
  1716. } QCow2OpenCo;
  1717. static void coroutine_fn qcow2_open_entry(void *opaque)
  1718. {
  1719. QCow2OpenCo *qoc = opaque;
  1720. BDRVQcow2State *s = qoc->bs->opaque;
  1721. GRAPH_RDLOCK_GUARD();
  1722. qemu_co_mutex_lock(&s->lock);
  1723. qoc->ret = qcow2_do_open(qoc->bs, qoc->options, qoc->flags, true,
  1724. qoc->errp);
  1725. qemu_co_mutex_unlock(&s->lock);
  1726. aio_wait_kick();
  1727. }
  1728. static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
  1729. Error **errp)
  1730. {
  1731. BDRVQcow2State *s = bs->opaque;
  1732. QCow2OpenCo qoc = {
  1733. .bs = bs,
  1734. .options = options,
  1735. .flags = flags,
  1736. .errp = errp,
  1737. .ret = -EINPROGRESS
  1738. };
  1739. int ret;
  1740. ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
  1741. if (ret < 0) {
  1742. return ret;
  1743. }
  1744. /* Initialise locks */
  1745. qemu_co_mutex_init(&s->lock);
  1746. assert(!qemu_in_coroutine());
  1747. assert(qemu_get_current_aio_context() == qemu_get_aio_context());
  1748. aio_co_enter(bdrv_get_aio_context(bs),
  1749. qemu_coroutine_create(qcow2_open_entry, &qoc));
  1750. AIO_WAIT_WHILE_UNLOCKED(NULL, qoc.ret == -EINPROGRESS);
  1751. return qoc.ret;
  1752. }
  1753. static void qcow2_refresh_limits(BlockDriverState *bs, Error **errp)
  1754. {
  1755. BDRVQcow2State *s = bs->opaque;
  1756. if (bs->encrypted) {
  1757. /* Encryption works on a sector granularity */
  1758. bs->bl.request_alignment = qcrypto_block_get_sector_size(s->crypto);
  1759. }
  1760. bs->bl.pwrite_zeroes_alignment = s->subcluster_size;
  1761. bs->bl.pdiscard_alignment = s->cluster_size;
  1762. }
  1763. static int GRAPH_UNLOCKED
  1764. qcow2_reopen_prepare(BDRVReopenState *state,BlockReopenQueue *queue,
  1765. Error **errp)
  1766. {
  1767. BDRVQcow2State *s = state->bs->opaque;
  1768. Qcow2ReopenState *r;
  1769. int ret;
  1770. GLOBAL_STATE_CODE();
  1771. GRAPH_RDLOCK_GUARD_MAINLOOP();
  1772. r = g_new0(Qcow2ReopenState, 1);
  1773. state->opaque = r;
  1774. ret = qcow2_update_options_prepare(state->bs, r, state->options,
  1775. state->flags, errp);
  1776. if (ret < 0) {
  1777. goto fail;
  1778. }
  1779. /* We need to write out any unwritten data if we reopen read-only. */
  1780. if ((state->flags & BDRV_O_RDWR) == 0) {
  1781. ret = qcow2_reopen_bitmaps_ro(state->bs, errp);
  1782. if (ret < 0) {
  1783. goto fail;
  1784. }
  1785. ret = bdrv_flush(state->bs);
  1786. if (ret < 0) {
  1787. goto fail;
  1788. }
  1789. ret = qcow2_mark_clean(state->bs);
  1790. if (ret < 0) {
  1791. goto fail;
  1792. }
  1793. }
  1794. /*
  1795. * Without an external data file, s->data_file points to the same BdrvChild
  1796. * as bs->file. It needs to be resynced after reopen because bs->file may
  1797. * be changed. We can't use it in the meantime.
  1798. */
  1799. if (!has_data_file(state->bs)) {
  1800. assert(s->data_file == state->bs->file);
  1801. s->data_file = NULL;
  1802. }
  1803. return 0;
  1804. fail:
  1805. qcow2_update_options_abort(state->bs, r);
  1806. g_free(r);
  1807. return ret;
  1808. }
  1809. static void qcow2_reopen_commit(BDRVReopenState *state)
  1810. {
  1811. BDRVQcow2State *s = state->bs->opaque;
  1812. GRAPH_RDLOCK_GUARD_MAINLOOP();
  1813. qcow2_update_options_commit(state->bs, state->opaque);
  1814. if (!s->data_file) {
  1815. /*
  1816. * If we don't have an external data file, s->data_file was cleared by
  1817. * qcow2_reopen_prepare() and needs to be updated.
  1818. */
  1819. s->data_file = state->bs->file;
  1820. }
  1821. g_free(state->opaque);
  1822. }
  1823. static void qcow2_reopen_commit_post(BDRVReopenState *state)
  1824. {
  1825. GRAPH_RDLOCK_GUARD_MAINLOOP();
  1826. if (state->flags & BDRV_O_RDWR) {
  1827. Error *local_err = NULL;
  1828. if (qcow2_reopen_bitmaps_rw(state->bs, &local_err) < 0) {
  1829. /*
  1830. * This is not fatal, bitmaps just left read-only, so all following
  1831. * writes will fail. User can remove read-only bitmaps to unblock
  1832. * writes or retry reopen.
  1833. */
  1834. error_reportf_err(local_err,
  1835. "%s: Failed to make dirty bitmaps writable: ",
  1836. bdrv_get_node_name(state->bs));
  1837. }
  1838. }
  1839. }
  1840. static void qcow2_reopen_abort(BDRVReopenState *state)
  1841. {
  1842. BDRVQcow2State *s = state->bs->opaque;
  1843. GRAPH_RDLOCK_GUARD_MAINLOOP();
  1844. if (!s->data_file) {
  1845. /*
  1846. * If we don't have an external data file, s->data_file was cleared by
  1847. * qcow2_reopen_prepare() and needs to be restored.
  1848. */
  1849. s->data_file = state->bs->file;
  1850. }
  1851. qcow2_update_options_abort(state->bs, state->opaque);
  1852. g_free(state->opaque);
  1853. }
  1854. static void qcow2_join_options(QDict *options, QDict *old_options)
  1855. {
  1856. bool has_new_overlap_template =
  1857. qdict_haskey(options, QCOW2_OPT_OVERLAP) ||
  1858. qdict_haskey(options, QCOW2_OPT_OVERLAP_TEMPLATE);
  1859. bool has_new_total_cache_size =
  1860. qdict_haskey(options, QCOW2_OPT_CACHE_SIZE);
  1861. bool has_all_cache_options;
  1862. /* New overlap template overrides all old overlap options */
  1863. if (has_new_overlap_template) {
  1864. qdict_del(old_options, QCOW2_OPT_OVERLAP);
  1865. qdict_del(old_options, QCOW2_OPT_OVERLAP_TEMPLATE);
  1866. qdict_del(old_options, QCOW2_OPT_OVERLAP_MAIN_HEADER);
  1867. qdict_del(old_options, QCOW2_OPT_OVERLAP_ACTIVE_L1);
  1868. qdict_del(old_options, QCOW2_OPT_OVERLAP_ACTIVE_L2);
  1869. qdict_del(old_options, QCOW2_OPT_OVERLAP_REFCOUNT_TABLE);
  1870. qdict_del(old_options, QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK);
  1871. qdict_del(old_options, QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE);
  1872. qdict_del(old_options, QCOW2_OPT_OVERLAP_INACTIVE_L1);
  1873. qdict_del(old_options, QCOW2_OPT_OVERLAP_INACTIVE_L2);
  1874. }
  1875. /* New total cache size overrides all old options */
  1876. if (qdict_haskey(options, QCOW2_OPT_CACHE_SIZE)) {
  1877. qdict_del(old_options, QCOW2_OPT_L2_CACHE_SIZE);
  1878. qdict_del(old_options, QCOW2_OPT_REFCOUNT_CACHE_SIZE);
  1879. }
  1880. qdict_join(options, old_options, false);
  1881. /*
  1882. * If after merging all cache size options are set, an old total size is
  1883. * overwritten. Do keep all options, however, if all three are new. The
  1884. * resulting error message is what we want to happen.
  1885. */
  1886. has_all_cache_options =
  1887. qdict_haskey(options, QCOW2_OPT_CACHE_SIZE) ||
  1888. qdict_haskey(options, QCOW2_OPT_L2_CACHE_SIZE) ||
  1889. qdict_haskey(options, QCOW2_OPT_REFCOUNT_CACHE_SIZE);
  1890. if (has_all_cache_options && !has_new_total_cache_size) {
  1891. qdict_del(options, QCOW2_OPT_CACHE_SIZE);
  1892. }
  1893. }
  1894. static int coroutine_fn GRAPH_RDLOCK
  1895. qcow2_co_block_status(BlockDriverState *bs, bool want_zero, int64_t offset,
  1896. int64_t count, int64_t *pnum, int64_t *map,
  1897. BlockDriverState **file)
  1898. {
  1899. BDRVQcow2State *s = bs->opaque;
  1900. uint64_t host_offset;
  1901. unsigned int bytes;
  1902. QCow2SubclusterType type;
  1903. int ret, status = 0;
  1904. qemu_co_mutex_lock(&s->lock);
  1905. if (!s->metadata_preallocation_checked) {
  1906. ret = qcow2_detect_metadata_preallocation(bs);
  1907. s->metadata_preallocation = (ret == 1);
  1908. s->metadata_preallocation_checked = true;
  1909. }
  1910. bytes = MIN(INT_MAX, count);
  1911. ret = qcow2_get_host_offset(bs, offset, &bytes, &host_offset, &type);
  1912. qemu_co_mutex_unlock(&s->lock);
  1913. if (ret < 0) {
  1914. return ret;
  1915. }
  1916. *pnum = bytes;
  1917. if ((type == QCOW2_SUBCLUSTER_NORMAL ||
  1918. type == QCOW2_SUBCLUSTER_ZERO_ALLOC ||
  1919. type == QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC) && !s->crypto) {
  1920. *map = host_offset;
  1921. *file = s->data_file->bs;
  1922. status |= BDRV_BLOCK_OFFSET_VALID;
  1923. }
  1924. if (type == QCOW2_SUBCLUSTER_ZERO_PLAIN ||
  1925. type == QCOW2_SUBCLUSTER_ZERO_ALLOC) {
  1926. status |= BDRV_BLOCK_ZERO;
  1927. } else if (type != QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN &&
  1928. type != QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC) {
  1929. status |= BDRV_BLOCK_DATA;
  1930. }
  1931. if (s->metadata_preallocation && (status & BDRV_BLOCK_DATA) &&
  1932. (status & BDRV_BLOCK_OFFSET_VALID))
  1933. {
  1934. status |= BDRV_BLOCK_RECURSE;
  1935. }
  1936. if (type == QCOW2_SUBCLUSTER_COMPRESSED) {
  1937. status |= BDRV_BLOCK_COMPRESSED;
  1938. }
  1939. return status;
  1940. }
  1941. static int coroutine_fn GRAPH_RDLOCK
  1942. qcow2_handle_l2meta(BlockDriverState *bs, QCowL2Meta **pl2meta, bool link_l2)
  1943. {
  1944. int ret = 0;
  1945. QCowL2Meta *l2meta = *pl2meta;
  1946. while (l2meta != NULL) {
  1947. QCowL2Meta *next;
  1948. if (link_l2) {
  1949. ret = qcow2_alloc_cluster_link_l2(bs, l2meta);
  1950. if (ret) {
  1951. goto out;
  1952. }
  1953. } else {
  1954. qcow2_alloc_cluster_abort(bs, l2meta);
  1955. }
  1956. /* Take the request off the list of running requests */
  1957. QLIST_REMOVE(l2meta, next_in_flight);
  1958. qemu_co_queue_restart_all(&l2meta->dependent_requests);
  1959. next = l2meta->next;
  1960. g_free(l2meta);
  1961. l2meta = next;
  1962. }
  1963. out:
  1964. *pl2meta = l2meta;
  1965. return ret;
  1966. }
  1967. static int coroutine_fn GRAPH_RDLOCK
  1968. qcow2_co_preadv_encrypted(BlockDriverState *bs,
  1969. uint64_t host_offset,
  1970. uint64_t offset,
  1971. uint64_t bytes,
  1972. QEMUIOVector *qiov,
  1973. uint64_t qiov_offset)
  1974. {
  1975. int ret;
  1976. BDRVQcow2State *s = bs->opaque;
  1977. uint8_t *buf;
  1978. assert(bs->encrypted && s->crypto);
  1979. assert(bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
  1980. /*
  1981. * For encrypted images, read everything into a temporary
  1982. * contiguous buffer on which the AES functions can work.
  1983. * Also, decryption in a separate buffer is better as it
  1984. * prevents the guest from learning information about the
  1985. * encrypted nature of the virtual disk.
  1986. */
  1987. buf = qemu_try_blockalign(s->data_file->bs, bytes);
  1988. if (buf == NULL) {
  1989. return -ENOMEM;
  1990. }
  1991. BLKDBG_CO_EVENT(bs->file, BLKDBG_READ_AIO);
  1992. ret = bdrv_co_pread(s->data_file, host_offset, bytes, buf, 0);
  1993. if (ret < 0) {
  1994. goto fail;
  1995. }
  1996. if (qcow2_co_decrypt(bs, host_offset, offset, buf, bytes) < 0)
  1997. {
  1998. ret = -EIO;
  1999. goto fail;
  2000. }
  2001. qemu_iovec_from_buf(qiov, qiov_offset, buf, bytes);
  2002. fail:
  2003. qemu_vfree(buf);
  2004. return ret;
  2005. }
  2006. typedef struct Qcow2AioTask {
  2007. AioTask task;
  2008. BlockDriverState *bs;
  2009. QCow2SubclusterType subcluster_type; /* only for read */
  2010. uint64_t host_offset; /* or l2_entry for compressed read */
  2011. uint64_t offset;
  2012. uint64_t bytes;
  2013. QEMUIOVector *qiov;
  2014. uint64_t qiov_offset;
  2015. QCowL2Meta *l2meta; /* only for write */
  2016. } Qcow2AioTask;
  2017. static coroutine_fn int qcow2_co_preadv_task_entry(AioTask *task);
  2018. static coroutine_fn int qcow2_add_task(BlockDriverState *bs,
  2019. AioTaskPool *pool,
  2020. AioTaskFunc func,
  2021. QCow2SubclusterType subcluster_type,
  2022. uint64_t host_offset,
  2023. uint64_t offset,
  2024. uint64_t bytes,
  2025. QEMUIOVector *qiov,
  2026. size_t qiov_offset,
  2027. QCowL2Meta *l2meta)
  2028. {
  2029. Qcow2AioTask local_task;
  2030. Qcow2AioTask *task = pool ? g_new(Qcow2AioTask, 1) : &local_task;
  2031. *task = (Qcow2AioTask) {
  2032. .task.func = func,
  2033. .bs = bs,
  2034. .subcluster_type = subcluster_type,
  2035. .qiov = qiov,
  2036. .host_offset = host_offset,
  2037. .offset = offset,
  2038. .bytes = bytes,
  2039. .qiov_offset = qiov_offset,
  2040. .l2meta = l2meta,
  2041. };
  2042. trace_qcow2_add_task(qemu_coroutine_self(), bs, pool,
  2043. func == qcow2_co_preadv_task_entry ? "read" : "write",
  2044. subcluster_type, host_offset, offset, bytes,
  2045. qiov, qiov_offset);
  2046. if (!pool) {
  2047. return func(&task->task);
  2048. }
  2049. aio_task_pool_start_task(pool, &task->task);
  2050. return 0;
  2051. }
  2052. static int coroutine_fn GRAPH_RDLOCK
  2053. qcow2_co_preadv_task(BlockDriverState *bs, QCow2SubclusterType subc_type,
  2054. uint64_t host_offset, uint64_t offset, uint64_t bytes,
  2055. QEMUIOVector *qiov, size_t qiov_offset)
  2056. {
  2057. BDRVQcow2State *s = bs->opaque;
  2058. switch (subc_type) {
  2059. case QCOW2_SUBCLUSTER_ZERO_PLAIN:
  2060. case QCOW2_SUBCLUSTER_ZERO_ALLOC:
  2061. /* Both zero types are handled in qcow2_co_preadv_part */
  2062. g_assert_not_reached();
  2063. case QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN:
  2064. case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC:
  2065. assert(bs->backing); /* otherwise handled in qcow2_co_preadv_part */
  2066. BLKDBG_CO_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
  2067. return bdrv_co_preadv_part(bs->backing, offset, bytes,
  2068. qiov, qiov_offset, 0);
  2069. case QCOW2_SUBCLUSTER_COMPRESSED:
  2070. return qcow2_co_preadv_compressed(bs, host_offset,
  2071. offset, bytes, qiov, qiov_offset);
  2072. case QCOW2_SUBCLUSTER_NORMAL:
  2073. if (bs->encrypted) {
  2074. return qcow2_co_preadv_encrypted(bs, host_offset,
  2075. offset, bytes, qiov, qiov_offset);
  2076. }
  2077. BLKDBG_CO_EVENT(bs->file, BLKDBG_READ_AIO);
  2078. return bdrv_co_preadv_part(s->data_file, host_offset,
  2079. bytes, qiov, qiov_offset, 0);
  2080. default:
  2081. g_assert_not_reached();
  2082. }
  2083. g_assert_not_reached();
  2084. }
  2085. /*
  2086. * This function can count as GRAPH_RDLOCK because qcow2_co_preadv_part() holds
  2087. * the graph lock and keeps it until this coroutine has terminated.
  2088. */
  2089. static int coroutine_fn GRAPH_RDLOCK qcow2_co_preadv_task_entry(AioTask *task)
  2090. {
  2091. Qcow2AioTask *t = container_of(task, Qcow2AioTask, task);
  2092. assert(!t->l2meta);
  2093. return qcow2_co_preadv_task(t->bs, t->subcluster_type,
  2094. t->host_offset, t->offset, t->bytes,
  2095. t->qiov, t->qiov_offset);
  2096. }
  2097. static int coroutine_fn GRAPH_RDLOCK
  2098. qcow2_co_preadv_part(BlockDriverState *bs, int64_t offset, int64_t bytes,
  2099. QEMUIOVector *qiov, size_t qiov_offset,
  2100. BdrvRequestFlags flags)
  2101. {
  2102. BDRVQcow2State *s = bs->opaque;
  2103. int ret = 0;
  2104. unsigned int cur_bytes; /* number of bytes in current iteration */
  2105. uint64_t host_offset = 0;
  2106. QCow2SubclusterType type;
  2107. AioTaskPool *aio = NULL;
  2108. while (bytes != 0 && aio_task_pool_status(aio) == 0) {
  2109. /* prepare next request */
  2110. cur_bytes = MIN(bytes, INT_MAX);
  2111. if (s->crypto) {
  2112. cur_bytes = MIN(cur_bytes,
  2113. QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
  2114. }
  2115. qemu_co_mutex_lock(&s->lock);
  2116. ret = qcow2_get_host_offset(bs, offset, &cur_bytes,
  2117. &host_offset, &type);
  2118. qemu_co_mutex_unlock(&s->lock);
  2119. if (ret < 0) {
  2120. goto out;
  2121. }
  2122. if (type == QCOW2_SUBCLUSTER_ZERO_PLAIN ||
  2123. type == QCOW2_SUBCLUSTER_ZERO_ALLOC ||
  2124. (type == QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN && !bs->backing) ||
  2125. (type == QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC && !bs->backing))
  2126. {
  2127. qemu_iovec_memset(qiov, qiov_offset, 0, cur_bytes);
  2128. } else {
  2129. if (!aio && cur_bytes != bytes) {
  2130. aio = aio_task_pool_new(QCOW2_MAX_WORKERS);
  2131. }
  2132. ret = qcow2_add_task(bs, aio, qcow2_co_preadv_task_entry, type,
  2133. host_offset, offset, cur_bytes,
  2134. qiov, qiov_offset, NULL);
  2135. if (ret < 0) {
  2136. goto out;
  2137. }
  2138. }
  2139. bytes -= cur_bytes;
  2140. offset += cur_bytes;
  2141. qiov_offset += cur_bytes;
  2142. }
  2143. out:
  2144. if (aio) {
  2145. aio_task_pool_wait_all(aio);
  2146. if (ret == 0) {
  2147. ret = aio_task_pool_status(aio);
  2148. }
  2149. g_free(aio);
  2150. }
  2151. return ret;
  2152. }
  2153. /* Check if it's possible to merge a write request with the writing of
  2154. * the data from the COW regions */
  2155. static bool merge_cow(uint64_t offset, unsigned bytes,
  2156. QEMUIOVector *qiov, size_t qiov_offset,
  2157. QCowL2Meta *l2meta)
  2158. {
  2159. QCowL2Meta *m;
  2160. for (m = l2meta; m != NULL; m = m->next) {
  2161. /* If both COW regions are empty then there's nothing to merge */
  2162. if (m->cow_start.nb_bytes == 0 && m->cow_end.nb_bytes == 0) {
  2163. continue;
  2164. }
  2165. /* If COW regions are handled already, skip this too */
  2166. if (m->skip_cow) {
  2167. continue;
  2168. }
  2169. /*
  2170. * The write request should start immediately after the first
  2171. * COW region. This does not always happen because the area
  2172. * touched by the request can be larger than the one defined
  2173. * by @m (a single request can span an area consisting of a
  2174. * mix of previously unallocated and allocated clusters, that
  2175. * is why @l2meta is a list).
  2176. */
  2177. if (l2meta_cow_start(m) + m->cow_start.nb_bytes != offset) {
  2178. /* In this case the request starts before this region */
  2179. assert(offset < l2meta_cow_start(m));
  2180. assert(m->cow_start.nb_bytes == 0);
  2181. continue;
  2182. }
  2183. /* The write request should end immediately before the second
  2184. * COW region (see above for why it does not always happen) */
  2185. if (m->offset + m->cow_end.offset != offset + bytes) {
  2186. assert(offset + bytes > m->offset + m->cow_end.offset);
  2187. assert(m->cow_end.nb_bytes == 0);
  2188. continue;
  2189. }
  2190. /* Make sure that adding both COW regions to the QEMUIOVector
  2191. * does not exceed IOV_MAX */
  2192. if (qemu_iovec_subvec_niov(qiov, qiov_offset, bytes) > IOV_MAX - 2) {
  2193. continue;
  2194. }
  2195. m->data_qiov = qiov;
  2196. m->data_qiov_offset = qiov_offset;
  2197. return true;
  2198. }
  2199. return false;
  2200. }
  2201. /*
  2202. * Return 1 if the COW regions read as zeroes, 0 if not, < 0 on error.
  2203. * Note that returning 0 does not guarantee non-zero data.
  2204. */
  2205. static int coroutine_fn GRAPH_RDLOCK
  2206. is_zero_cow(BlockDriverState *bs, QCowL2Meta *m)
  2207. {
  2208. /*
  2209. * This check is designed for optimization shortcut so it must be
  2210. * efficient.
  2211. * Instead of is_zero(), use bdrv_co_is_zero_fast() as it is
  2212. * faster (but not as accurate and can result in false negatives).
  2213. */
  2214. int ret = bdrv_co_is_zero_fast(bs, m->offset + m->cow_start.offset,
  2215. m->cow_start.nb_bytes);
  2216. if (ret <= 0) {
  2217. return ret;
  2218. }
  2219. return bdrv_co_is_zero_fast(bs, m->offset + m->cow_end.offset,
  2220. m->cow_end.nb_bytes);
  2221. }
  2222. static int coroutine_fn GRAPH_RDLOCK
  2223. handle_alloc_space(BlockDriverState *bs, QCowL2Meta *l2meta)
  2224. {
  2225. BDRVQcow2State *s = bs->opaque;
  2226. QCowL2Meta *m;
  2227. if (!(s->data_file->bs->supported_zero_flags & BDRV_REQ_NO_FALLBACK)) {
  2228. return 0;
  2229. }
  2230. if (bs->encrypted) {
  2231. return 0;
  2232. }
  2233. for (m = l2meta; m != NULL; m = m->next) {
  2234. int ret;
  2235. uint64_t start_offset = m->alloc_offset + m->cow_start.offset;
  2236. unsigned nb_bytes = m->cow_end.offset + m->cow_end.nb_bytes -
  2237. m->cow_start.offset;
  2238. if (!m->cow_start.nb_bytes && !m->cow_end.nb_bytes) {
  2239. continue;
  2240. }
  2241. ret = is_zero_cow(bs, m);
  2242. if (ret < 0) {
  2243. return ret;
  2244. } else if (ret == 0) {
  2245. continue;
  2246. }
  2247. /*
  2248. * instead of writing zero COW buffers,
  2249. * efficiently zero out the whole clusters
  2250. */
  2251. ret = qcow2_pre_write_overlap_check(bs, 0, start_offset, nb_bytes,
  2252. true);
  2253. if (ret < 0) {
  2254. return ret;
  2255. }
  2256. BLKDBG_CO_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC_SPACE);
  2257. ret = bdrv_co_pwrite_zeroes(s->data_file, start_offset, nb_bytes,
  2258. BDRV_REQ_NO_FALLBACK);
  2259. if (ret < 0) {
  2260. if (ret != -ENOTSUP && ret != -EAGAIN) {
  2261. return ret;
  2262. }
  2263. continue;
  2264. }
  2265. trace_qcow2_skip_cow(qemu_coroutine_self(), m->offset, m->nb_clusters);
  2266. m->skip_cow = true;
  2267. }
  2268. return 0;
  2269. }
  2270. /*
  2271. * qcow2_co_pwritev_task
  2272. * Called with s->lock unlocked
  2273. * l2meta - if not NULL, qcow2_co_pwritev_task() will consume it. Caller must
  2274. * not use it somehow after qcow2_co_pwritev_task() call
  2275. */
  2276. static coroutine_fn GRAPH_RDLOCK
  2277. int qcow2_co_pwritev_task(BlockDriverState *bs, uint64_t host_offset,
  2278. uint64_t offset, uint64_t bytes, QEMUIOVector *qiov,
  2279. uint64_t qiov_offset, QCowL2Meta *l2meta)
  2280. {
  2281. int ret;
  2282. BDRVQcow2State *s = bs->opaque;
  2283. void *crypt_buf = NULL;
  2284. QEMUIOVector encrypted_qiov;
  2285. if (bs->encrypted) {
  2286. assert(s->crypto);
  2287. assert(bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
  2288. crypt_buf = qemu_try_blockalign(bs->file->bs, bytes);
  2289. if (crypt_buf == NULL) {
  2290. ret = -ENOMEM;
  2291. goto out_unlocked;
  2292. }
  2293. qemu_iovec_to_buf(qiov, qiov_offset, crypt_buf, bytes);
  2294. if (qcow2_co_encrypt(bs, host_offset, offset, crypt_buf, bytes) < 0) {
  2295. ret = -EIO;
  2296. goto out_unlocked;
  2297. }
  2298. qemu_iovec_init_buf(&encrypted_qiov, crypt_buf, bytes);
  2299. qiov = &encrypted_qiov;
  2300. qiov_offset = 0;
  2301. }
  2302. /* Try to efficiently initialize the physical space with zeroes */
  2303. ret = handle_alloc_space(bs, l2meta);
  2304. if (ret < 0) {
  2305. goto out_unlocked;
  2306. }
  2307. /*
  2308. * If we need to do COW, check if it's possible to merge the
  2309. * writing of the guest data together with that of the COW regions.
  2310. * If it's not possible (or not necessary) then write the
  2311. * guest data now.
  2312. */
  2313. if (!merge_cow(offset, bytes, qiov, qiov_offset, l2meta)) {
  2314. BLKDBG_CO_EVENT(bs->file, BLKDBG_WRITE_AIO);
  2315. trace_qcow2_writev_data(qemu_coroutine_self(), host_offset);
  2316. ret = bdrv_co_pwritev_part(s->data_file, host_offset,
  2317. bytes, qiov, qiov_offset, 0);
  2318. if (ret < 0) {
  2319. goto out_unlocked;
  2320. }
  2321. }
  2322. qemu_co_mutex_lock(&s->lock);
  2323. ret = qcow2_handle_l2meta(bs, &l2meta, true);
  2324. goto out_locked;
  2325. out_unlocked:
  2326. qemu_co_mutex_lock(&s->lock);
  2327. out_locked:
  2328. qcow2_handle_l2meta(bs, &l2meta, false);
  2329. qemu_co_mutex_unlock(&s->lock);
  2330. qemu_vfree(crypt_buf);
  2331. return ret;
  2332. }
  2333. /*
  2334. * This function can count as GRAPH_RDLOCK because qcow2_co_pwritev_part() holds
  2335. * the graph lock and keeps it until this coroutine has terminated.
  2336. */
  2337. static coroutine_fn GRAPH_RDLOCK int qcow2_co_pwritev_task_entry(AioTask *task)
  2338. {
  2339. Qcow2AioTask *t = container_of(task, Qcow2AioTask, task);
  2340. assert(!t->subcluster_type);
  2341. return qcow2_co_pwritev_task(t->bs, t->host_offset,
  2342. t->offset, t->bytes, t->qiov, t->qiov_offset,
  2343. t->l2meta);
  2344. }
  2345. static int coroutine_fn GRAPH_RDLOCK
  2346. qcow2_co_pwritev_part(BlockDriverState *bs, int64_t offset, int64_t bytes,
  2347. QEMUIOVector *qiov, size_t qiov_offset,
  2348. BdrvRequestFlags flags)
  2349. {
  2350. BDRVQcow2State *s = bs->opaque;
  2351. int offset_in_cluster;
  2352. int ret;
  2353. unsigned int cur_bytes; /* number of sectors in current iteration */
  2354. uint64_t host_offset;
  2355. QCowL2Meta *l2meta = NULL;
  2356. AioTaskPool *aio = NULL;
  2357. trace_qcow2_writev_start_req(qemu_coroutine_self(), offset, bytes);
  2358. while (bytes != 0 && aio_task_pool_status(aio) == 0) {
  2359. l2meta = NULL;
  2360. trace_qcow2_writev_start_part(qemu_coroutine_self());
  2361. offset_in_cluster = offset_into_cluster(s, offset);
  2362. cur_bytes = MIN(bytes, INT_MAX);
  2363. if (bs->encrypted) {
  2364. cur_bytes = MIN(cur_bytes,
  2365. QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size
  2366. - offset_in_cluster);
  2367. }
  2368. qemu_co_mutex_lock(&s->lock);
  2369. ret = qcow2_alloc_host_offset(bs, offset, &cur_bytes,
  2370. &host_offset, &l2meta);
  2371. if (ret < 0) {
  2372. goto out_locked;
  2373. }
  2374. ret = qcow2_pre_write_overlap_check(bs, 0, host_offset,
  2375. cur_bytes, true);
  2376. if (ret < 0) {
  2377. goto out_locked;
  2378. }
  2379. qemu_co_mutex_unlock(&s->lock);
  2380. if (!aio && cur_bytes != bytes) {
  2381. aio = aio_task_pool_new(QCOW2_MAX_WORKERS);
  2382. }
  2383. ret = qcow2_add_task(bs, aio, qcow2_co_pwritev_task_entry, 0,
  2384. host_offset, offset,
  2385. cur_bytes, qiov, qiov_offset, l2meta);
  2386. l2meta = NULL; /* l2meta is consumed by qcow2_co_pwritev_task() */
  2387. if (ret < 0) {
  2388. goto fail_nometa;
  2389. }
  2390. bytes -= cur_bytes;
  2391. offset += cur_bytes;
  2392. qiov_offset += cur_bytes;
  2393. trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_bytes);
  2394. }
  2395. ret = 0;
  2396. qemu_co_mutex_lock(&s->lock);
  2397. out_locked:
  2398. qcow2_handle_l2meta(bs, &l2meta, false);
  2399. qemu_co_mutex_unlock(&s->lock);
  2400. fail_nometa:
  2401. if (aio) {
  2402. aio_task_pool_wait_all(aio);
  2403. if (ret == 0) {
  2404. ret = aio_task_pool_status(aio);
  2405. }
  2406. g_free(aio);
  2407. }
  2408. trace_qcow2_writev_done_req(qemu_coroutine_self(), ret);
  2409. return ret;
  2410. }
  2411. static int GRAPH_RDLOCK qcow2_inactivate(BlockDriverState *bs)
  2412. {
  2413. BDRVQcow2State *s = bs->opaque;
  2414. int ret, result = 0;
  2415. Error *local_err = NULL;
  2416. qcow2_store_persistent_dirty_bitmaps(bs, true, &local_err);
  2417. if (local_err != NULL) {
  2418. result = -EINVAL;
  2419. error_reportf_err(local_err, "Lost persistent bitmaps during "
  2420. "inactivation of node '%s': ",
  2421. bdrv_get_device_or_node_name(bs));
  2422. }
  2423. ret = qcow2_cache_flush(bs, s->l2_table_cache);
  2424. if (ret) {
  2425. result = ret;
  2426. error_report("Failed to flush the L2 table cache: %s",
  2427. strerror(-ret));
  2428. }
  2429. ret = qcow2_cache_flush(bs, s->refcount_block_cache);
  2430. if (ret) {
  2431. result = ret;
  2432. error_report("Failed to flush the refcount block cache: %s",
  2433. strerror(-ret));
  2434. }
  2435. if (result == 0) {
  2436. qcow2_mark_clean(bs);
  2437. }
  2438. return result;
  2439. }
  2440. static void coroutine_mixed_fn GRAPH_RDLOCK
  2441. qcow2_do_close(BlockDriverState *bs, bool close_data_file)
  2442. {
  2443. BDRVQcow2State *s = bs->opaque;
  2444. qemu_vfree(s->l1_table);
  2445. /* else pre-write overlap checks in cache_destroy may crash */
  2446. s->l1_table = NULL;
  2447. if (!(s->flags & BDRV_O_INACTIVE)) {
  2448. qcow2_inactivate(bs);
  2449. }
  2450. cache_clean_timer_del(bs);
  2451. qcow2_cache_destroy(s->l2_table_cache);
  2452. qcow2_cache_destroy(s->refcount_block_cache);
  2453. qcrypto_block_free(s->crypto);
  2454. s->crypto = NULL;
  2455. qapi_free_QCryptoBlockOpenOptions(s->crypto_opts);
  2456. g_free(s->unknown_header_fields);
  2457. cleanup_unknown_header_ext(bs);
  2458. g_free(s->image_data_file);
  2459. g_free(s->image_backing_file);
  2460. g_free(s->image_backing_format);
  2461. if (close_data_file && has_data_file(bs)) {
  2462. GLOBAL_STATE_CODE();
  2463. bdrv_graph_rdunlock_main_loop();
  2464. bdrv_graph_wrlock();
  2465. bdrv_unref_child(bs, s->data_file);
  2466. bdrv_graph_wrunlock();
  2467. s->data_file = NULL;
  2468. bdrv_graph_rdlock_main_loop();
  2469. }
  2470. qcow2_refcount_close(bs);
  2471. qcow2_free_snapshots(bs);
  2472. }
  2473. static void GRAPH_UNLOCKED qcow2_close(BlockDriverState *bs)
  2474. {
  2475. GLOBAL_STATE_CODE();
  2476. GRAPH_RDLOCK_GUARD_MAINLOOP();
  2477. qcow2_do_close(bs, true);
  2478. }
  2479. static void coroutine_fn GRAPH_RDLOCK
  2480. qcow2_co_invalidate_cache(BlockDriverState *bs, Error **errp)
  2481. {
  2482. ERRP_GUARD();
  2483. BDRVQcow2State *s = bs->opaque;
  2484. BdrvChild *data_file;
  2485. int flags = s->flags;
  2486. QCryptoBlock *crypto = NULL;
  2487. QDict *options;
  2488. int ret;
  2489. /*
  2490. * Backing files are read-only which makes all of their metadata immutable,
  2491. * that means we don't have to worry about reopening them here.
  2492. */
  2493. crypto = s->crypto;
  2494. s->crypto = NULL;
  2495. /*
  2496. * Do not reopen s->data_file (i.e., have qcow2_do_close() not close it,
  2497. * and then prevent qcow2_do_open() from opening it), because this function
  2498. * runs in the I/O path and as such we must not invoke global-state
  2499. * functions like bdrv_unref_child() and bdrv_open_child().
  2500. */
  2501. qcow2_do_close(bs, false);
  2502. data_file = s->data_file;
  2503. memset(s, 0, sizeof(BDRVQcow2State));
  2504. s->data_file = data_file;
  2505. options = qdict_clone_shallow(bs->options);
  2506. flags &= ~BDRV_O_INACTIVE;
  2507. qemu_co_mutex_lock(&s->lock);
  2508. ret = qcow2_do_open(bs, options, flags, false, errp);
  2509. qemu_co_mutex_unlock(&s->lock);
  2510. qobject_unref(options);
  2511. if (ret < 0) {
  2512. error_prepend(errp, "Could not reopen qcow2 layer: ");
  2513. bs->drv = NULL;
  2514. return;
  2515. }
  2516. s->crypto = crypto;
  2517. }
  2518. static size_t header_ext_add(char *buf, uint32_t magic, const void *s,
  2519. size_t len, size_t buflen)
  2520. {
  2521. QCowExtension *ext_backing_fmt = (QCowExtension*) buf;
  2522. size_t ext_len = sizeof(QCowExtension) + ((len + 7) & ~7);
  2523. if (buflen < ext_len) {
  2524. return -ENOSPC;
  2525. }
  2526. *ext_backing_fmt = (QCowExtension) {
  2527. .magic = cpu_to_be32(magic),
  2528. .len = cpu_to_be32(len),
  2529. };
  2530. if (len) {
  2531. memcpy(buf + sizeof(QCowExtension), s, len);
  2532. }
  2533. return ext_len;
  2534. }
  2535. /*
  2536. * Updates the qcow2 header, including the variable length parts of it, i.e.
  2537. * the backing file name and all extensions. qcow2 was not designed to allow
  2538. * such changes, so if we run out of space (we can only use the first cluster)
  2539. * this function may fail.
  2540. *
  2541. * Returns 0 on success, -errno in error cases.
  2542. */
  2543. int qcow2_update_header(BlockDriverState *bs)
  2544. {
  2545. BDRVQcow2State *s = bs->opaque;
  2546. QCowHeader *header;
  2547. char *buf;
  2548. size_t buflen = s->cluster_size;
  2549. int ret;
  2550. uint64_t total_size;
  2551. uint32_t refcount_table_clusters;
  2552. size_t header_length;
  2553. Qcow2UnknownHeaderExtension *uext;
  2554. buf = qemu_blockalign(bs, buflen);
  2555. /* Header structure */
  2556. header = (QCowHeader*) buf;
  2557. if (buflen < sizeof(*header)) {
  2558. ret = -ENOSPC;
  2559. goto fail;
  2560. }
  2561. header_length = sizeof(*header) + s->unknown_header_fields_size;
  2562. total_size = bs->total_sectors * BDRV_SECTOR_SIZE;
  2563. refcount_table_clusters = s->refcount_table_size >> (s->cluster_bits - 3);
  2564. ret = validate_compression_type(s, NULL);
  2565. if (ret) {
  2566. goto fail;
  2567. }
  2568. *header = (QCowHeader) {
  2569. /* Version 2 fields */
  2570. .magic = cpu_to_be32(QCOW_MAGIC),
  2571. .version = cpu_to_be32(s->qcow_version),
  2572. .backing_file_offset = 0,
  2573. .backing_file_size = 0,
  2574. .cluster_bits = cpu_to_be32(s->cluster_bits),
  2575. .size = cpu_to_be64(total_size),
  2576. .crypt_method = cpu_to_be32(s->crypt_method_header),
  2577. .l1_size = cpu_to_be32(s->l1_size),
  2578. .l1_table_offset = cpu_to_be64(s->l1_table_offset),
  2579. .refcount_table_offset = cpu_to_be64(s->refcount_table_offset),
  2580. .refcount_table_clusters = cpu_to_be32(refcount_table_clusters),
  2581. .nb_snapshots = cpu_to_be32(s->nb_snapshots),
  2582. .snapshots_offset = cpu_to_be64(s->snapshots_offset),
  2583. /* Version 3 fields */
  2584. .incompatible_features = cpu_to_be64(s->incompatible_features),
  2585. .compatible_features = cpu_to_be64(s->compatible_features),
  2586. .autoclear_features = cpu_to_be64(s->autoclear_features),
  2587. .refcount_order = cpu_to_be32(s->refcount_order),
  2588. .header_length = cpu_to_be32(header_length),
  2589. .compression_type = s->compression_type,
  2590. };
  2591. /* For older versions, write a shorter header */
  2592. switch (s->qcow_version) {
  2593. case 2:
  2594. ret = offsetof(QCowHeader, incompatible_features);
  2595. break;
  2596. case 3:
  2597. ret = sizeof(*header);
  2598. break;
  2599. default:
  2600. ret = -EINVAL;
  2601. goto fail;
  2602. }
  2603. buf += ret;
  2604. buflen -= ret;
  2605. memset(buf, 0, buflen);
  2606. /* Preserve any unknown field in the header */
  2607. if (s->unknown_header_fields_size) {
  2608. if (buflen < s->unknown_header_fields_size) {
  2609. ret = -ENOSPC;
  2610. goto fail;
  2611. }
  2612. memcpy(buf, s->unknown_header_fields, s->unknown_header_fields_size);
  2613. buf += s->unknown_header_fields_size;
  2614. buflen -= s->unknown_header_fields_size;
  2615. }
  2616. /* Backing file format header extension */
  2617. if (s->image_backing_format) {
  2618. ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BACKING_FORMAT,
  2619. s->image_backing_format,
  2620. strlen(s->image_backing_format),
  2621. buflen);
  2622. if (ret < 0) {
  2623. goto fail;
  2624. }
  2625. buf += ret;
  2626. buflen -= ret;
  2627. }
  2628. /* External data file header extension */
  2629. if (has_data_file(bs) && s->image_data_file) {
  2630. ret = header_ext_add(buf, QCOW2_EXT_MAGIC_DATA_FILE,
  2631. s->image_data_file, strlen(s->image_data_file),
  2632. buflen);
  2633. if (ret < 0) {
  2634. goto fail;
  2635. }
  2636. buf += ret;
  2637. buflen -= ret;
  2638. }
  2639. /* Full disk encryption header pointer extension */
  2640. if (s->crypto_header.offset != 0) {
  2641. s->crypto_header.offset = cpu_to_be64(s->crypto_header.offset);
  2642. s->crypto_header.length = cpu_to_be64(s->crypto_header.length);
  2643. ret = header_ext_add(buf, QCOW2_EXT_MAGIC_CRYPTO_HEADER,
  2644. &s->crypto_header, sizeof(s->crypto_header),
  2645. buflen);
  2646. s->crypto_header.offset = be64_to_cpu(s->crypto_header.offset);
  2647. s->crypto_header.length = be64_to_cpu(s->crypto_header.length);
  2648. if (ret < 0) {
  2649. goto fail;
  2650. }
  2651. buf += ret;
  2652. buflen -= ret;
  2653. }
  2654. /*
  2655. * Feature table. A mere 8 feature names occupies 392 bytes, and
  2656. * when coupled with the v3 minimum header of 104 bytes plus the
  2657. * 8-byte end-of-extension marker, that would leave only 8 bytes
  2658. * for a backing file name in an image with 512-byte clusters.
  2659. * Thus, we choose to omit this header for cluster sizes 4k and
  2660. * smaller.
  2661. */
  2662. if (s->qcow_version >= 3 && s->cluster_size > 4096) {
  2663. static const Qcow2Feature features[] = {
  2664. {
  2665. .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
  2666. .bit = QCOW2_INCOMPAT_DIRTY_BITNR,
  2667. .name = "dirty bit",
  2668. },
  2669. {
  2670. .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
  2671. .bit = QCOW2_INCOMPAT_CORRUPT_BITNR,
  2672. .name = "corrupt bit",
  2673. },
  2674. {
  2675. .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
  2676. .bit = QCOW2_INCOMPAT_DATA_FILE_BITNR,
  2677. .name = "external data file",
  2678. },
  2679. {
  2680. .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
  2681. .bit = QCOW2_INCOMPAT_COMPRESSION_BITNR,
  2682. .name = "compression type",
  2683. },
  2684. {
  2685. .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
  2686. .bit = QCOW2_INCOMPAT_EXTL2_BITNR,
  2687. .name = "extended L2 entries",
  2688. },
  2689. {
  2690. .type = QCOW2_FEAT_TYPE_COMPATIBLE,
  2691. .bit = QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR,
  2692. .name = "lazy refcounts",
  2693. },
  2694. {
  2695. .type = QCOW2_FEAT_TYPE_AUTOCLEAR,
  2696. .bit = QCOW2_AUTOCLEAR_BITMAPS_BITNR,
  2697. .name = "bitmaps",
  2698. },
  2699. {
  2700. .type = QCOW2_FEAT_TYPE_AUTOCLEAR,
  2701. .bit = QCOW2_AUTOCLEAR_DATA_FILE_RAW_BITNR,
  2702. .name = "raw external data",
  2703. },
  2704. };
  2705. ret = header_ext_add(buf, QCOW2_EXT_MAGIC_FEATURE_TABLE,
  2706. features, sizeof(features), buflen);
  2707. if (ret < 0) {
  2708. goto fail;
  2709. }
  2710. buf += ret;
  2711. buflen -= ret;
  2712. }
  2713. /* Bitmap extension */
  2714. if (s->nb_bitmaps > 0) {
  2715. Qcow2BitmapHeaderExt bitmaps_header = {
  2716. .nb_bitmaps = cpu_to_be32(s->nb_bitmaps),
  2717. .bitmap_directory_size =
  2718. cpu_to_be64(s->bitmap_directory_size),
  2719. .bitmap_directory_offset =
  2720. cpu_to_be64(s->bitmap_directory_offset)
  2721. };
  2722. ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BITMAPS,
  2723. &bitmaps_header, sizeof(bitmaps_header),
  2724. buflen);
  2725. if (ret < 0) {
  2726. goto fail;
  2727. }
  2728. buf += ret;
  2729. buflen -= ret;
  2730. }
  2731. /* Keep unknown header extensions */
  2732. QLIST_FOREACH(uext, &s->unknown_header_ext, next) {
  2733. ret = header_ext_add(buf, uext->magic, uext->data, uext->len, buflen);
  2734. if (ret < 0) {
  2735. goto fail;
  2736. }
  2737. buf += ret;
  2738. buflen -= ret;
  2739. }
  2740. /* End of header extensions */
  2741. ret = header_ext_add(buf, QCOW2_EXT_MAGIC_END, NULL, 0, buflen);
  2742. if (ret < 0) {
  2743. goto fail;
  2744. }
  2745. buf += ret;
  2746. buflen -= ret;
  2747. /* Backing file name */
  2748. if (s->image_backing_file) {
  2749. size_t backing_file_len = strlen(s->image_backing_file);
  2750. if (buflen < backing_file_len) {
  2751. ret = -ENOSPC;
  2752. goto fail;
  2753. }
  2754. /* Using strncpy is ok here, since buf is not NUL-terminated. */
  2755. strncpy(buf, s->image_backing_file, buflen);
  2756. header->backing_file_offset = cpu_to_be64(buf - ((char*) header));
  2757. header->backing_file_size = cpu_to_be32(backing_file_len);
  2758. }
  2759. /* Write the new header */
  2760. ret = bdrv_pwrite(bs->file, 0, s->cluster_size, header, 0);
  2761. if (ret < 0) {
  2762. goto fail;
  2763. }
  2764. ret = 0;
  2765. fail:
  2766. qemu_vfree(header);
  2767. return ret;
  2768. }
  2769. static int coroutine_fn GRAPH_RDLOCK
  2770. qcow2_co_change_backing_file(BlockDriverState *bs, const char *backing_file,
  2771. const char *backing_fmt)
  2772. {
  2773. BDRVQcow2State *s = bs->opaque;
  2774. /* Adding a backing file means that the external data file alone won't be
  2775. * enough to make sense of the content */
  2776. if (backing_file && data_file_is_raw(bs)) {
  2777. return -EINVAL;
  2778. }
  2779. if (backing_file && strlen(backing_file) > 1023) {
  2780. return -EINVAL;
  2781. }
  2782. pstrcpy(bs->auto_backing_file, sizeof(bs->auto_backing_file),
  2783. backing_file ?: "");
  2784. pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
  2785. pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
  2786. g_free(s->image_backing_file);
  2787. g_free(s->image_backing_format);
  2788. s->image_backing_file = backing_file ? g_strdup(bs->backing_file) : NULL;
  2789. s->image_backing_format = backing_fmt ? g_strdup(bs->backing_format) : NULL;
  2790. return qcow2_update_header(bs);
  2791. }
  2792. static int coroutine_fn GRAPH_RDLOCK
  2793. qcow2_set_up_encryption(BlockDriverState *bs,
  2794. QCryptoBlockCreateOptions *cryptoopts,
  2795. Error **errp)
  2796. {
  2797. BDRVQcow2State *s = bs->opaque;
  2798. QCryptoBlock *crypto = NULL;
  2799. int fmt, ret;
  2800. switch (cryptoopts->format) {
  2801. case QCRYPTO_BLOCK_FORMAT_LUKS:
  2802. fmt = QCOW_CRYPT_LUKS;
  2803. break;
  2804. case QCRYPTO_BLOCK_FORMAT_QCOW:
  2805. fmt = QCOW_CRYPT_AES;
  2806. break;
  2807. default:
  2808. error_setg(errp, "Crypto format not supported in qcow2");
  2809. return -EINVAL;
  2810. }
  2811. s->crypt_method_header = fmt;
  2812. crypto = qcrypto_block_create(cryptoopts, "encrypt.",
  2813. qcow2_crypto_hdr_init_func,
  2814. qcow2_crypto_hdr_write_func,
  2815. bs, 0, errp);
  2816. if (!crypto) {
  2817. return -EINVAL;
  2818. }
  2819. ret = qcow2_update_header(bs);
  2820. if (ret < 0) {
  2821. error_setg_errno(errp, -ret, "Could not write encryption header");
  2822. goto out;
  2823. }
  2824. ret = 0;
  2825. out:
  2826. qcrypto_block_free(crypto);
  2827. return ret;
  2828. }
  2829. /**
  2830. * Preallocates metadata structures for data clusters between @offset (in the
  2831. * guest disk) and @new_length (which is thus generally the new guest disk
  2832. * size).
  2833. *
  2834. * Returns: 0 on success, -errno on failure.
  2835. */
  2836. static int coroutine_fn GRAPH_RDLOCK
  2837. preallocate_co(BlockDriverState *bs, uint64_t offset, uint64_t new_length,
  2838. PreallocMode mode, Error **errp)
  2839. {
  2840. BDRVQcow2State *s = bs->opaque;
  2841. uint64_t bytes;
  2842. uint64_t host_offset = 0;
  2843. int64_t file_length;
  2844. unsigned int cur_bytes;
  2845. int ret;
  2846. QCowL2Meta *meta = NULL, *m;
  2847. assert(offset <= new_length);
  2848. bytes = new_length - offset;
  2849. while (bytes) {
  2850. cur_bytes = MIN(bytes, QEMU_ALIGN_DOWN(INT_MAX, s->cluster_size));
  2851. ret = qcow2_alloc_host_offset(bs, offset, &cur_bytes,
  2852. &host_offset, &meta);
  2853. if (ret < 0) {
  2854. error_setg_errno(errp, -ret, "Allocating clusters failed");
  2855. goto out;
  2856. }
  2857. for (m = meta; m != NULL; m = m->next) {
  2858. m->prealloc = true;
  2859. }
  2860. ret = qcow2_handle_l2meta(bs, &meta, true);
  2861. if (ret < 0) {
  2862. error_setg_errno(errp, -ret, "Mapping clusters failed");
  2863. goto out;
  2864. }
  2865. /* TODO Preallocate data if requested */
  2866. bytes -= cur_bytes;
  2867. offset += cur_bytes;
  2868. }
  2869. /*
  2870. * It is expected that the image file is large enough to actually contain
  2871. * all of the allocated clusters (otherwise we get failing reads after
  2872. * EOF). Extend the image to the last allocated sector.
  2873. */
  2874. file_length = bdrv_co_getlength(s->data_file->bs);
  2875. if (file_length < 0) {
  2876. error_setg_errno(errp, -file_length, "Could not get file size");
  2877. ret = file_length;
  2878. goto out;
  2879. }
  2880. if (host_offset + cur_bytes > file_length) {
  2881. if (mode == PREALLOC_MODE_METADATA) {
  2882. mode = PREALLOC_MODE_OFF;
  2883. }
  2884. ret = bdrv_co_truncate(s->data_file, host_offset + cur_bytes, false,
  2885. mode, 0, errp);
  2886. if (ret < 0) {
  2887. goto out;
  2888. }
  2889. }
  2890. ret = 0;
  2891. out:
  2892. qcow2_handle_l2meta(bs, &meta, false);
  2893. return ret;
  2894. }
  2895. /* qcow2_refcount_metadata_size:
  2896. * @clusters: number of clusters to refcount (including data and L1/L2 tables)
  2897. * @cluster_size: size of a cluster, in bytes
  2898. * @refcount_order: refcount bits power-of-2 exponent
  2899. * @generous_increase: allow for the refcount table to be 1.5x as large as it
  2900. * needs to be
  2901. *
  2902. * Returns: Number of bytes required for refcount blocks and table metadata.
  2903. */
  2904. int64_t qcow2_refcount_metadata_size(int64_t clusters, size_t cluster_size,
  2905. int refcount_order, bool generous_increase,
  2906. uint64_t *refblock_count)
  2907. {
  2908. /*
  2909. * Every host cluster is reference-counted, including metadata (even
  2910. * refcount metadata is recursively included).
  2911. *
  2912. * An accurate formula for the size of refcount metadata size is difficult
  2913. * to derive. An easier method of calculation is finding the fixed point
  2914. * where no further refcount blocks or table clusters are required to
  2915. * reference count every cluster.
  2916. */
  2917. int64_t blocks_per_table_cluster = cluster_size / REFTABLE_ENTRY_SIZE;
  2918. int64_t refcounts_per_block = cluster_size * 8 / (1 << refcount_order);
  2919. int64_t table = 0; /* number of refcount table clusters */
  2920. int64_t blocks = 0; /* number of refcount block clusters */
  2921. int64_t last;
  2922. int64_t n = 0;
  2923. do {
  2924. last = n;
  2925. blocks = DIV_ROUND_UP(clusters + table + blocks, refcounts_per_block);
  2926. table = DIV_ROUND_UP(blocks, blocks_per_table_cluster);
  2927. n = clusters + blocks + table;
  2928. if (n == last && generous_increase) {
  2929. clusters += DIV_ROUND_UP(table, 2);
  2930. n = 0; /* force another loop */
  2931. generous_increase = false;
  2932. }
  2933. } while (n != last);
  2934. if (refblock_count) {
  2935. *refblock_count = blocks;
  2936. }
  2937. return (blocks + table) * cluster_size;
  2938. }
  2939. /**
  2940. * qcow2_calc_prealloc_size:
  2941. * @total_size: virtual disk size in bytes
  2942. * @cluster_size: cluster size in bytes
  2943. * @refcount_order: refcount bits power-of-2 exponent
  2944. * @extended_l2: true if the image has extended L2 entries
  2945. *
  2946. * Returns: Total number of bytes required for the fully allocated image
  2947. * (including metadata).
  2948. */
  2949. static int64_t qcow2_calc_prealloc_size(int64_t total_size,
  2950. size_t cluster_size,
  2951. int refcount_order,
  2952. bool extended_l2)
  2953. {
  2954. int64_t meta_size = 0;
  2955. uint64_t nl1e, nl2e;
  2956. int64_t aligned_total_size = ROUND_UP(total_size, cluster_size);
  2957. size_t l2e_size = extended_l2 ? L2E_SIZE_EXTENDED : L2E_SIZE_NORMAL;
  2958. /* header: 1 cluster */
  2959. meta_size += cluster_size;
  2960. /* total size of L2 tables */
  2961. nl2e = aligned_total_size / cluster_size;
  2962. nl2e = ROUND_UP(nl2e, cluster_size / l2e_size);
  2963. meta_size += nl2e * l2e_size;
  2964. /* total size of L1 tables */
  2965. nl1e = nl2e * l2e_size / cluster_size;
  2966. nl1e = ROUND_UP(nl1e, cluster_size / L1E_SIZE);
  2967. meta_size += nl1e * L1E_SIZE;
  2968. /* total size of refcount table and blocks */
  2969. meta_size += qcow2_refcount_metadata_size(
  2970. (meta_size + aligned_total_size) / cluster_size,
  2971. cluster_size, refcount_order, false, NULL);
  2972. return meta_size + aligned_total_size;
  2973. }
  2974. static bool validate_cluster_size(size_t cluster_size, bool extended_l2,
  2975. Error **errp)
  2976. {
  2977. int cluster_bits = ctz32(cluster_size);
  2978. if (cluster_bits < MIN_CLUSTER_BITS || cluster_bits > MAX_CLUSTER_BITS ||
  2979. (1 << cluster_bits) != cluster_size)
  2980. {
  2981. error_setg(errp, "Cluster size must be a power of two between %d and "
  2982. "%dk", 1 << MIN_CLUSTER_BITS, 1 << (MAX_CLUSTER_BITS - 10));
  2983. return false;
  2984. }
  2985. if (extended_l2) {
  2986. unsigned min_cluster_size =
  2987. (1 << MIN_CLUSTER_BITS) * QCOW_EXTL2_SUBCLUSTERS_PER_CLUSTER;
  2988. if (cluster_size < min_cluster_size) {
  2989. error_setg(errp, "Extended L2 entries are only supported with "
  2990. "cluster sizes of at least %u bytes", min_cluster_size);
  2991. return false;
  2992. }
  2993. }
  2994. return true;
  2995. }
  2996. static size_t qcow2_opt_get_cluster_size_del(QemuOpts *opts, bool extended_l2,
  2997. Error **errp)
  2998. {
  2999. size_t cluster_size;
  3000. cluster_size = qemu_opt_get_size_del(opts, BLOCK_OPT_CLUSTER_SIZE,
  3001. DEFAULT_CLUSTER_SIZE);
  3002. if (!validate_cluster_size(cluster_size, extended_l2, errp)) {
  3003. return 0;
  3004. }
  3005. return cluster_size;
  3006. }
  3007. static int qcow2_opt_get_version_del(QemuOpts *opts, Error **errp)
  3008. {
  3009. char *buf;
  3010. int ret;
  3011. buf = qemu_opt_get_del(opts, BLOCK_OPT_COMPAT_LEVEL);
  3012. if (!buf) {
  3013. ret = 3; /* default */
  3014. } else if (!strcmp(buf, "0.10")) {
  3015. ret = 2;
  3016. } else if (!strcmp(buf, "1.1")) {
  3017. ret = 3;
  3018. } else {
  3019. error_setg(errp, "Invalid compatibility level: '%s'", buf);
  3020. ret = -EINVAL;
  3021. }
  3022. g_free(buf);
  3023. return ret;
  3024. }
  3025. static uint64_t qcow2_opt_get_refcount_bits_del(QemuOpts *opts, int version,
  3026. Error **errp)
  3027. {
  3028. uint64_t refcount_bits;
  3029. refcount_bits = qemu_opt_get_number_del(opts, BLOCK_OPT_REFCOUNT_BITS, 16);
  3030. if (refcount_bits > 64 || !is_power_of_2(refcount_bits)) {
  3031. error_setg(errp, "Refcount width must be a power of two and may not "
  3032. "exceed 64 bits");
  3033. return 0;
  3034. }
  3035. if (version < 3 && refcount_bits != 16) {
  3036. error_setg(errp, "Different refcount widths than 16 bits require "
  3037. "compatibility level 1.1 or above (use compat=1.1 or "
  3038. "greater)");
  3039. return 0;
  3040. }
  3041. return refcount_bits;
  3042. }
  3043. static int coroutine_fn GRAPH_UNLOCKED
  3044. qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
  3045. {
  3046. ERRP_GUARD();
  3047. BlockdevCreateOptionsQcow2 *qcow2_opts;
  3048. QDict *options;
  3049. /*
  3050. * Open the image file and write a minimal qcow2 header.
  3051. *
  3052. * We keep things simple and start with a zero-sized image. We also
  3053. * do without refcount blocks or a L1 table for now. We'll fix the
  3054. * inconsistency later.
  3055. *
  3056. * We do need a refcount table because growing the refcount table means
  3057. * allocating two new refcount blocks - the second of which would be at
  3058. * 2 GB for 64k clusters, and we don't want to have a 2 GB initial file
  3059. * size for any qcow2 image.
  3060. */
  3061. BlockBackend *blk = NULL;
  3062. BlockDriverState *bs = NULL;
  3063. BlockDriverState *data_bs = NULL;
  3064. QCowHeader *header;
  3065. size_t cluster_size;
  3066. int version;
  3067. int refcount_order;
  3068. uint64_t *refcount_table;
  3069. int ret;
  3070. uint8_t compression_type = QCOW2_COMPRESSION_TYPE_ZLIB;
  3071. assert(create_options->driver == BLOCKDEV_DRIVER_QCOW2);
  3072. qcow2_opts = &create_options->u.qcow2;
  3073. bs = bdrv_co_open_blockdev_ref(qcow2_opts->file, errp);
  3074. if (bs == NULL) {
  3075. return -EIO;
  3076. }
  3077. /* Validate options and set default values */
  3078. if (!QEMU_IS_ALIGNED(qcow2_opts->size, BDRV_SECTOR_SIZE)) {
  3079. error_setg(errp, "Image size must be a multiple of %u bytes",
  3080. (unsigned) BDRV_SECTOR_SIZE);
  3081. ret = -EINVAL;
  3082. goto out;
  3083. }
  3084. if (qcow2_opts->has_version) {
  3085. switch (qcow2_opts->version) {
  3086. case BLOCKDEV_QCOW2_VERSION_V2:
  3087. version = 2;
  3088. break;
  3089. case BLOCKDEV_QCOW2_VERSION_V3:
  3090. version = 3;
  3091. break;
  3092. default:
  3093. g_assert_not_reached();
  3094. }
  3095. } else {
  3096. version = 3;
  3097. }
  3098. if (qcow2_opts->has_cluster_size) {
  3099. cluster_size = qcow2_opts->cluster_size;
  3100. } else {
  3101. cluster_size = DEFAULT_CLUSTER_SIZE;
  3102. }
  3103. if (!qcow2_opts->has_extended_l2) {
  3104. qcow2_opts->extended_l2 = false;
  3105. }
  3106. if (qcow2_opts->extended_l2) {
  3107. if (version < 3) {
  3108. error_setg(errp, "Extended L2 entries are only supported with "
  3109. "compatibility level 1.1 and above (use version=v3 or "
  3110. "greater)");
  3111. ret = -EINVAL;
  3112. goto out;
  3113. }
  3114. }
  3115. if (!validate_cluster_size(cluster_size, qcow2_opts->extended_l2, errp)) {
  3116. ret = -EINVAL;
  3117. goto out;
  3118. }
  3119. if (!qcow2_opts->has_preallocation) {
  3120. qcow2_opts->preallocation = PREALLOC_MODE_OFF;
  3121. }
  3122. if (qcow2_opts->backing_file &&
  3123. qcow2_opts->preallocation != PREALLOC_MODE_OFF &&
  3124. !qcow2_opts->extended_l2)
  3125. {
  3126. error_setg(errp, "Backing file and preallocation can only be used at "
  3127. "the same time if extended_l2 is on");
  3128. ret = -EINVAL;
  3129. goto out;
  3130. }
  3131. if (qcow2_opts->has_backing_fmt && !qcow2_opts->backing_file) {
  3132. error_setg(errp, "Backing format cannot be used without backing file");
  3133. ret = -EINVAL;
  3134. goto out;
  3135. }
  3136. if (!qcow2_opts->has_lazy_refcounts) {
  3137. qcow2_opts->lazy_refcounts = false;
  3138. }
  3139. if (version < 3 && qcow2_opts->lazy_refcounts) {
  3140. error_setg(errp, "Lazy refcounts only supported with compatibility "
  3141. "level 1.1 and above (use version=v3 or greater)");
  3142. ret = -EINVAL;
  3143. goto out;
  3144. }
  3145. if (!qcow2_opts->has_refcount_bits) {
  3146. qcow2_opts->refcount_bits = 16;
  3147. }
  3148. if (qcow2_opts->refcount_bits > 64 ||
  3149. !is_power_of_2(qcow2_opts->refcount_bits))
  3150. {
  3151. error_setg(errp, "Refcount width must be a power of two and may not "
  3152. "exceed 64 bits");
  3153. ret = -EINVAL;
  3154. goto out;
  3155. }
  3156. if (version < 3 && qcow2_opts->refcount_bits != 16) {
  3157. error_setg(errp, "Different refcount widths than 16 bits require "
  3158. "compatibility level 1.1 or above (use version=v3 or "
  3159. "greater)");
  3160. ret = -EINVAL;
  3161. goto out;
  3162. }
  3163. refcount_order = ctz32(qcow2_opts->refcount_bits);
  3164. if (qcow2_opts->data_file_raw && !qcow2_opts->data_file) {
  3165. error_setg(errp, "data-file-raw requires data-file");
  3166. ret = -EINVAL;
  3167. goto out;
  3168. }
  3169. if (qcow2_opts->data_file_raw && qcow2_opts->backing_file) {
  3170. error_setg(errp, "Backing file and data-file-raw cannot be used at "
  3171. "the same time");
  3172. ret = -EINVAL;
  3173. goto out;
  3174. }
  3175. if (qcow2_opts->data_file_raw &&
  3176. qcow2_opts->preallocation == PREALLOC_MODE_OFF)
  3177. {
  3178. /*
  3179. * data-file-raw means that "the external data file can be
  3180. * read as a consistent standalone raw image without looking
  3181. * at the qcow2 metadata." It does not say that the metadata
  3182. * must be ignored, though (and the qcow2 driver in fact does
  3183. * not ignore it), so the L1/L2 tables must be present and
  3184. * give a 1:1 mapping, so you get the same result regardless
  3185. * of whether you look at the metadata or whether you ignore
  3186. * it.
  3187. */
  3188. qcow2_opts->preallocation = PREALLOC_MODE_METADATA;
  3189. /*
  3190. * Cannot use preallocation with backing files, but giving a
  3191. * backing file when specifying data_file_raw is an error
  3192. * anyway.
  3193. */
  3194. assert(!qcow2_opts->backing_file);
  3195. }
  3196. if (qcow2_opts->data_file) {
  3197. if (version < 3) {
  3198. error_setg(errp, "External data files are only supported with "
  3199. "compatibility level 1.1 and above (use version=v3 or "
  3200. "greater)");
  3201. ret = -EINVAL;
  3202. goto out;
  3203. }
  3204. data_bs = bdrv_co_open_blockdev_ref(qcow2_opts->data_file, errp);
  3205. if (data_bs == NULL) {
  3206. ret = -EIO;
  3207. goto out;
  3208. }
  3209. }
  3210. if (qcow2_opts->has_compression_type &&
  3211. qcow2_opts->compression_type != QCOW2_COMPRESSION_TYPE_ZLIB) {
  3212. ret = -EINVAL;
  3213. if (version < 3) {
  3214. error_setg(errp, "Non-zlib compression type is only supported with "
  3215. "compatibility level 1.1 and above (use version=v3 or "
  3216. "greater)");
  3217. goto out;
  3218. }
  3219. switch (qcow2_opts->compression_type) {
  3220. #ifdef CONFIG_ZSTD
  3221. case QCOW2_COMPRESSION_TYPE_ZSTD:
  3222. break;
  3223. #endif
  3224. default:
  3225. error_setg(errp, "Unknown compression type");
  3226. goto out;
  3227. }
  3228. compression_type = qcow2_opts->compression_type;
  3229. }
  3230. /* Create BlockBackend to write to the image */
  3231. blk = blk_co_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL,
  3232. errp);
  3233. if (!blk) {
  3234. ret = -EPERM;
  3235. goto out;
  3236. }
  3237. blk_set_allow_write_beyond_eof(blk, true);
  3238. /* Write the header */
  3239. QEMU_BUILD_BUG_ON((1 << MIN_CLUSTER_BITS) < sizeof(*header));
  3240. header = g_malloc0(cluster_size);
  3241. *header = (QCowHeader) {
  3242. .magic = cpu_to_be32(QCOW_MAGIC),
  3243. .version = cpu_to_be32(version),
  3244. .cluster_bits = cpu_to_be32(ctz32(cluster_size)),
  3245. .size = cpu_to_be64(0),
  3246. .l1_table_offset = cpu_to_be64(0),
  3247. .l1_size = cpu_to_be32(0),
  3248. .refcount_table_offset = cpu_to_be64(cluster_size),
  3249. .refcount_table_clusters = cpu_to_be32(1),
  3250. .refcount_order = cpu_to_be32(refcount_order),
  3251. /* don't deal with endianness since compression_type is 1 byte long */
  3252. .compression_type = compression_type,
  3253. .header_length = cpu_to_be32(sizeof(*header)),
  3254. };
  3255. /* We'll update this to correct value later */
  3256. header->crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
  3257. if (qcow2_opts->lazy_refcounts) {
  3258. header->compatible_features |=
  3259. cpu_to_be64(QCOW2_COMPAT_LAZY_REFCOUNTS);
  3260. }
  3261. if (data_bs) {
  3262. header->incompatible_features |=
  3263. cpu_to_be64(QCOW2_INCOMPAT_DATA_FILE);
  3264. }
  3265. if (qcow2_opts->data_file_raw) {
  3266. header->autoclear_features |=
  3267. cpu_to_be64(QCOW2_AUTOCLEAR_DATA_FILE_RAW);
  3268. }
  3269. if (compression_type != QCOW2_COMPRESSION_TYPE_ZLIB) {
  3270. header->incompatible_features |=
  3271. cpu_to_be64(QCOW2_INCOMPAT_COMPRESSION);
  3272. }
  3273. if (qcow2_opts->extended_l2) {
  3274. header->incompatible_features |=
  3275. cpu_to_be64(QCOW2_INCOMPAT_EXTL2);
  3276. }
  3277. ret = blk_co_pwrite(blk, 0, cluster_size, header, 0);
  3278. g_free(header);
  3279. if (ret < 0) {
  3280. error_setg_errno(errp, -ret, "Could not write qcow2 header");
  3281. goto out;
  3282. }
  3283. /* Write a refcount table with one refcount block */
  3284. refcount_table = g_malloc0(2 * cluster_size);
  3285. refcount_table[0] = cpu_to_be64(2 * cluster_size);
  3286. ret = blk_co_pwrite(blk, cluster_size, 2 * cluster_size, refcount_table, 0);
  3287. g_free(refcount_table);
  3288. if (ret < 0) {
  3289. error_setg_errno(errp, -ret, "Could not write refcount table");
  3290. goto out;
  3291. }
  3292. blk_co_unref(blk);
  3293. blk = NULL;
  3294. /*
  3295. * And now open the image and make it consistent first (i.e. increase the
  3296. * refcount of the cluster that is occupied by the header and the refcount
  3297. * table)
  3298. */
  3299. options = qdict_new();
  3300. qdict_put_str(options, "driver", "qcow2");
  3301. qdict_put_str(options, "file", bs->node_name);
  3302. if (data_bs) {
  3303. qdict_put_str(options, "data-file", data_bs->node_name);
  3304. }
  3305. blk = blk_co_new_open(NULL, NULL, options,
  3306. BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH,
  3307. errp);
  3308. if (blk == NULL) {
  3309. ret = -EIO;
  3310. goto out;
  3311. }
  3312. bdrv_graph_co_rdlock();
  3313. ret = qcow2_alloc_clusters(blk_bs(blk), 3 * cluster_size);
  3314. if (ret < 0) {
  3315. bdrv_graph_co_rdunlock();
  3316. error_setg_errno(errp, -ret, "Could not allocate clusters for qcow2 "
  3317. "header and refcount table");
  3318. goto out;
  3319. } else if (ret != 0) {
  3320. error_report("Huh, first cluster in empty image is already in use?");
  3321. abort();
  3322. }
  3323. /* Set the external data file if necessary */
  3324. if (data_bs) {
  3325. BDRVQcow2State *s = blk_bs(blk)->opaque;
  3326. s->image_data_file = g_strdup(data_bs->filename);
  3327. }
  3328. /* Create a full header (including things like feature table) */
  3329. ret = qcow2_update_header(blk_bs(blk));
  3330. bdrv_graph_co_rdunlock();
  3331. if (ret < 0) {
  3332. error_setg_errno(errp, -ret, "Could not update qcow2 header");
  3333. goto out;
  3334. }
  3335. /* Okay, now that we have a valid image, let's give it the right size */
  3336. ret = blk_co_truncate(blk, qcow2_opts->size, false,
  3337. qcow2_opts->preallocation, 0, errp);
  3338. if (ret < 0) {
  3339. error_prepend(errp, "Could not resize image: ");
  3340. goto out;
  3341. }
  3342. /* Want a backing file? There you go. */
  3343. if (qcow2_opts->backing_file) {
  3344. const char *backing_format = NULL;
  3345. if (qcow2_opts->has_backing_fmt) {
  3346. backing_format = BlockdevDriver_str(qcow2_opts->backing_fmt);
  3347. }
  3348. bdrv_graph_co_rdlock();
  3349. ret = bdrv_co_change_backing_file(blk_bs(blk), qcow2_opts->backing_file,
  3350. backing_format, false);
  3351. bdrv_graph_co_rdunlock();
  3352. if (ret < 0) {
  3353. error_setg_errno(errp, -ret, "Could not assign backing file '%s' "
  3354. "with format '%s'", qcow2_opts->backing_file,
  3355. backing_format);
  3356. goto out;
  3357. }
  3358. }
  3359. /* Want encryption? There you go. */
  3360. if (qcow2_opts->encrypt) {
  3361. bdrv_graph_co_rdlock();
  3362. ret = qcow2_set_up_encryption(blk_bs(blk), qcow2_opts->encrypt, errp);
  3363. bdrv_graph_co_rdunlock();
  3364. if (ret < 0) {
  3365. goto out;
  3366. }
  3367. }
  3368. blk_co_unref(blk);
  3369. blk = NULL;
  3370. /* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning.
  3371. * Using BDRV_O_NO_IO, since encryption is now setup we don't want to
  3372. * have to setup decryption context. We're not doing any I/O on the top
  3373. * level BlockDriverState, only lower layers, where BDRV_O_NO_IO does
  3374. * not have effect.
  3375. */
  3376. options = qdict_new();
  3377. qdict_put_str(options, "driver", "qcow2");
  3378. qdict_put_str(options, "file", bs->node_name);
  3379. if (data_bs) {
  3380. qdict_put_str(options, "data-file", data_bs->node_name);
  3381. }
  3382. blk = blk_co_new_open(NULL, NULL, options,
  3383. BDRV_O_RDWR | BDRV_O_NO_BACKING | BDRV_O_NO_IO,
  3384. errp);
  3385. if (blk == NULL) {
  3386. ret = -EIO;
  3387. goto out;
  3388. }
  3389. ret = 0;
  3390. out:
  3391. blk_co_unref(blk);
  3392. bdrv_co_unref(bs);
  3393. bdrv_co_unref(data_bs);
  3394. return ret;
  3395. }
  3396. static int coroutine_fn GRAPH_UNLOCKED
  3397. qcow2_co_create_opts(BlockDriver *drv, const char *filename, QemuOpts *opts,
  3398. Error **errp)
  3399. {
  3400. BlockdevCreateOptions *create_options = NULL;
  3401. QDict *qdict;
  3402. Visitor *v;
  3403. BlockDriverState *bs = NULL;
  3404. BlockDriverState *data_bs = NULL;
  3405. const char *val;
  3406. int ret;
  3407. /* Only the keyval visitor supports the dotted syntax needed for
  3408. * encryption, so go through a QDict before getting a QAPI type. Ignore
  3409. * options meant for the protocol layer so that the visitor doesn't
  3410. * complain. */
  3411. qdict = qemu_opts_to_qdict_filtered(opts, NULL, bdrv_qcow2.create_opts,
  3412. true);
  3413. /* Handle encryption options */
  3414. val = qdict_get_try_str(qdict, BLOCK_OPT_ENCRYPT);
  3415. if (val && !strcmp(val, "on")) {
  3416. qdict_put_str(qdict, BLOCK_OPT_ENCRYPT, "qcow");
  3417. } else if (val && !strcmp(val, "off")) {
  3418. qdict_del(qdict, BLOCK_OPT_ENCRYPT);
  3419. }
  3420. val = qdict_get_try_str(qdict, BLOCK_OPT_ENCRYPT_FORMAT);
  3421. if (val && !strcmp(val, "aes")) {
  3422. qdict_put_str(qdict, BLOCK_OPT_ENCRYPT_FORMAT, "qcow");
  3423. }
  3424. /* Convert compat=0.10/1.1 into compat=v2/v3, to be renamed into
  3425. * version=v2/v3 below. */
  3426. val = qdict_get_try_str(qdict, BLOCK_OPT_COMPAT_LEVEL);
  3427. if (val && !strcmp(val, "0.10")) {
  3428. qdict_put_str(qdict, BLOCK_OPT_COMPAT_LEVEL, "v2");
  3429. } else if (val && !strcmp(val, "1.1")) {
  3430. qdict_put_str(qdict, BLOCK_OPT_COMPAT_LEVEL, "v3");
  3431. }
  3432. /* Change legacy command line options into QMP ones */
  3433. static const QDictRenames opt_renames[] = {
  3434. { BLOCK_OPT_BACKING_FILE, "backing-file" },
  3435. { BLOCK_OPT_BACKING_FMT, "backing-fmt" },
  3436. { BLOCK_OPT_CLUSTER_SIZE, "cluster-size" },
  3437. { BLOCK_OPT_LAZY_REFCOUNTS, "lazy-refcounts" },
  3438. { BLOCK_OPT_EXTL2, "extended-l2" },
  3439. { BLOCK_OPT_REFCOUNT_BITS, "refcount-bits" },
  3440. { BLOCK_OPT_ENCRYPT, BLOCK_OPT_ENCRYPT_FORMAT },
  3441. { BLOCK_OPT_COMPAT_LEVEL, "version" },
  3442. { BLOCK_OPT_DATA_FILE_RAW, "data-file-raw" },
  3443. { BLOCK_OPT_COMPRESSION_TYPE, "compression-type" },
  3444. { NULL, NULL },
  3445. };
  3446. if (!qdict_rename_keys(qdict, opt_renames, errp)) {
  3447. ret = -EINVAL;
  3448. goto finish;
  3449. }
  3450. /* Create and open the file (protocol layer) */
  3451. ret = bdrv_co_create_file(filename, opts, errp);
  3452. if (ret < 0) {
  3453. goto finish;
  3454. }
  3455. bs = bdrv_co_open(filename, NULL, NULL,
  3456. BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
  3457. if (bs == NULL) {
  3458. ret = -EIO;
  3459. goto finish;
  3460. }
  3461. /* Create and open an external data file (protocol layer) */
  3462. val = qdict_get_try_str(qdict, BLOCK_OPT_DATA_FILE);
  3463. if (val) {
  3464. ret = bdrv_co_create_file(val, opts, errp);
  3465. if (ret < 0) {
  3466. goto finish;
  3467. }
  3468. data_bs = bdrv_co_open(val, NULL, NULL,
  3469. BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
  3470. errp);
  3471. if (data_bs == NULL) {
  3472. ret = -EIO;
  3473. goto finish;
  3474. }
  3475. qdict_del(qdict, BLOCK_OPT_DATA_FILE);
  3476. qdict_put_str(qdict, "data-file", data_bs->node_name);
  3477. }
  3478. /* Set 'driver' and 'node' options */
  3479. qdict_put_str(qdict, "driver", "qcow2");
  3480. qdict_put_str(qdict, "file", bs->node_name);
  3481. /* Now get the QAPI type BlockdevCreateOptions */
  3482. v = qobject_input_visitor_new_flat_confused(qdict, errp);
  3483. if (!v) {
  3484. ret = -EINVAL;
  3485. goto finish;
  3486. }
  3487. visit_type_BlockdevCreateOptions(v, NULL, &create_options, errp);
  3488. visit_free(v);
  3489. if (!create_options) {
  3490. ret = -EINVAL;
  3491. goto finish;
  3492. }
  3493. /* Silently round up size */
  3494. create_options->u.qcow2.size = ROUND_UP(create_options->u.qcow2.size,
  3495. BDRV_SECTOR_SIZE);
  3496. /* Create the qcow2 image (format layer) */
  3497. ret = qcow2_co_create(create_options, errp);
  3498. finish:
  3499. if (ret < 0) {
  3500. bdrv_graph_co_rdlock();
  3501. bdrv_co_delete_file_noerr(bs);
  3502. bdrv_co_delete_file_noerr(data_bs);
  3503. bdrv_graph_co_rdunlock();
  3504. } else {
  3505. ret = 0;
  3506. }
  3507. qobject_unref(qdict);
  3508. bdrv_co_unref(bs);
  3509. bdrv_co_unref(data_bs);
  3510. qapi_free_BlockdevCreateOptions(create_options);
  3511. return ret;
  3512. }
  3513. static bool coroutine_fn GRAPH_RDLOCK
  3514. is_zero(BlockDriverState *bs, int64_t offset, int64_t bytes)
  3515. {
  3516. int64_t nr;
  3517. int res;
  3518. /* Clamp to image length, before checking status of underlying sectors */
  3519. if (offset + bytes > bs->total_sectors * BDRV_SECTOR_SIZE) {
  3520. bytes = bs->total_sectors * BDRV_SECTOR_SIZE - offset;
  3521. }
  3522. if (!bytes) {
  3523. return true;
  3524. }
  3525. /*
  3526. * bdrv_block_status_above doesn't merge different types of zeros, for
  3527. * example, zeros which come from the region which is unallocated in
  3528. * the whole backing chain, and zeros which come because of a short
  3529. * backing file. So, we need a loop.
  3530. */
  3531. do {
  3532. res = bdrv_co_block_status_above(bs, NULL, offset, bytes, &nr, NULL, NULL);
  3533. offset += nr;
  3534. bytes -= nr;
  3535. } while (res >= 0 && (res & BDRV_BLOCK_ZERO) && nr && bytes);
  3536. return res >= 0 && (res & BDRV_BLOCK_ZERO) && bytes == 0;
  3537. }
  3538. static int coroutine_fn GRAPH_RDLOCK
  3539. qcow2_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
  3540. BdrvRequestFlags flags)
  3541. {
  3542. int ret;
  3543. BDRVQcow2State *s = bs->opaque;
  3544. uint32_t head = offset_into_subcluster(s, offset);
  3545. uint32_t tail = ROUND_UP(offset + bytes, s->subcluster_size) -
  3546. (offset + bytes);
  3547. trace_qcow2_pwrite_zeroes_start_req(qemu_coroutine_self(), offset, bytes);
  3548. if (offset + bytes == bs->total_sectors * BDRV_SECTOR_SIZE) {
  3549. tail = 0;
  3550. }
  3551. if (head || tail) {
  3552. uint64_t off;
  3553. unsigned int nr;
  3554. QCow2SubclusterType type;
  3555. assert(head + bytes + tail <= s->subcluster_size);
  3556. /* check whether remainder of cluster already reads as zero */
  3557. if (!(is_zero(bs, offset - head, head) &&
  3558. is_zero(bs, offset + bytes, tail))) {
  3559. return -ENOTSUP;
  3560. }
  3561. qemu_co_mutex_lock(&s->lock);
  3562. /* We can have new write after previous check */
  3563. offset -= head;
  3564. bytes = s->subcluster_size;
  3565. nr = s->subcluster_size;
  3566. ret = qcow2_get_host_offset(bs, offset, &nr, &off, &type);
  3567. if (ret < 0 ||
  3568. (type != QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN &&
  3569. type != QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC &&
  3570. type != QCOW2_SUBCLUSTER_ZERO_PLAIN &&
  3571. type != QCOW2_SUBCLUSTER_ZERO_ALLOC)) {
  3572. qemu_co_mutex_unlock(&s->lock);
  3573. return ret < 0 ? ret : -ENOTSUP;
  3574. }
  3575. } else {
  3576. qemu_co_mutex_lock(&s->lock);
  3577. }
  3578. trace_qcow2_pwrite_zeroes(qemu_coroutine_self(), offset, bytes);
  3579. /* Whatever is left can use real zero subclusters */
  3580. ret = qcow2_subcluster_zeroize(bs, offset, bytes, flags);
  3581. qemu_co_mutex_unlock(&s->lock);
  3582. return ret;
  3583. }
  3584. static int coroutine_fn GRAPH_RDLOCK
  3585. qcow2_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
  3586. {
  3587. int ret;
  3588. BDRVQcow2State *s = bs->opaque;
  3589. /* If the image does not support QCOW_OFLAG_ZERO then discarding
  3590. * clusters could expose stale data from the backing file. */
  3591. if (s->qcow_version < 3 && bs->backing) {
  3592. return -ENOTSUP;
  3593. }
  3594. if (!QEMU_IS_ALIGNED(offset | bytes, s->cluster_size)) {
  3595. assert(bytes < s->cluster_size);
  3596. /* Ignore partial clusters, except for the special case of the
  3597. * complete partial cluster at the end of an unaligned file */
  3598. if (!QEMU_IS_ALIGNED(offset, s->cluster_size) ||
  3599. offset + bytes != bs->total_sectors * BDRV_SECTOR_SIZE) {
  3600. return -ENOTSUP;
  3601. }
  3602. }
  3603. qemu_co_mutex_lock(&s->lock);
  3604. ret = qcow2_cluster_discard(bs, offset, bytes, QCOW2_DISCARD_REQUEST,
  3605. false);
  3606. qemu_co_mutex_unlock(&s->lock);
  3607. return ret;
  3608. }
  3609. static int coroutine_fn GRAPH_RDLOCK
  3610. qcow2_co_copy_range_from(BlockDriverState *bs,
  3611. BdrvChild *src, int64_t src_offset,
  3612. BdrvChild *dst, int64_t dst_offset,
  3613. int64_t bytes, BdrvRequestFlags read_flags,
  3614. BdrvRequestFlags write_flags)
  3615. {
  3616. BDRVQcow2State *s = bs->opaque;
  3617. int ret;
  3618. unsigned int cur_bytes; /* number of bytes in current iteration */
  3619. BdrvChild *child = NULL;
  3620. BdrvRequestFlags cur_write_flags;
  3621. assert(!bs->encrypted);
  3622. qemu_co_mutex_lock(&s->lock);
  3623. while (bytes != 0) {
  3624. uint64_t copy_offset = 0;
  3625. QCow2SubclusterType type;
  3626. /* prepare next request */
  3627. cur_bytes = MIN(bytes, INT_MAX);
  3628. cur_write_flags = write_flags;
  3629. ret = qcow2_get_host_offset(bs, src_offset, &cur_bytes,
  3630. &copy_offset, &type);
  3631. if (ret < 0) {
  3632. goto out;
  3633. }
  3634. switch (type) {
  3635. case QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN:
  3636. case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC:
  3637. if (bs->backing && bs->backing->bs) {
  3638. int64_t backing_length = bdrv_co_getlength(bs->backing->bs);
  3639. if (src_offset >= backing_length) {
  3640. cur_write_flags |= BDRV_REQ_ZERO_WRITE;
  3641. } else {
  3642. child = bs->backing;
  3643. cur_bytes = MIN(cur_bytes, backing_length - src_offset);
  3644. copy_offset = src_offset;
  3645. }
  3646. } else {
  3647. cur_write_flags |= BDRV_REQ_ZERO_WRITE;
  3648. }
  3649. break;
  3650. case QCOW2_SUBCLUSTER_ZERO_PLAIN:
  3651. case QCOW2_SUBCLUSTER_ZERO_ALLOC:
  3652. cur_write_flags |= BDRV_REQ_ZERO_WRITE;
  3653. break;
  3654. case QCOW2_SUBCLUSTER_COMPRESSED:
  3655. ret = -ENOTSUP;
  3656. goto out;
  3657. case QCOW2_SUBCLUSTER_NORMAL:
  3658. child = s->data_file;
  3659. break;
  3660. default:
  3661. abort();
  3662. }
  3663. qemu_co_mutex_unlock(&s->lock);
  3664. ret = bdrv_co_copy_range_from(child,
  3665. copy_offset,
  3666. dst, dst_offset,
  3667. cur_bytes, read_flags, cur_write_flags);
  3668. qemu_co_mutex_lock(&s->lock);
  3669. if (ret < 0) {
  3670. goto out;
  3671. }
  3672. bytes -= cur_bytes;
  3673. src_offset += cur_bytes;
  3674. dst_offset += cur_bytes;
  3675. }
  3676. ret = 0;
  3677. out:
  3678. qemu_co_mutex_unlock(&s->lock);
  3679. return ret;
  3680. }
  3681. static int coroutine_fn GRAPH_RDLOCK
  3682. qcow2_co_copy_range_to(BlockDriverState *bs,
  3683. BdrvChild *src, int64_t src_offset,
  3684. BdrvChild *dst, int64_t dst_offset,
  3685. int64_t bytes, BdrvRequestFlags read_flags,
  3686. BdrvRequestFlags write_flags)
  3687. {
  3688. BDRVQcow2State *s = bs->opaque;
  3689. int ret;
  3690. unsigned int cur_bytes; /* number of sectors in current iteration */
  3691. uint64_t host_offset;
  3692. QCowL2Meta *l2meta = NULL;
  3693. assert(!bs->encrypted);
  3694. qemu_co_mutex_lock(&s->lock);
  3695. while (bytes != 0) {
  3696. l2meta = NULL;
  3697. cur_bytes = MIN(bytes, INT_MAX);
  3698. /* TODO:
  3699. * If src->bs == dst->bs, we could simply copy by incrementing
  3700. * the refcnt, without copying user data.
  3701. * Or if src->bs == dst->bs->backing->bs, we could copy by discarding. */
  3702. ret = qcow2_alloc_host_offset(bs, dst_offset, &cur_bytes,
  3703. &host_offset, &l2meta);
  3704. if (ret < 0) {
  3705. goto fail;
  3706. }
  3707. ret = qcow2_pre_write_overlap_check(bs, 0, host_offset, cur_bytes,
  3708. true);
  3709. if (ret < 0) {
  3710. goto fail;
  3711. }
  3712. qemu_co_mutex_unlock(&s->lock);
  3713. ret = bdrv_co_copy_range_to(src, src_offset, s->data_file, host_offset,
  3714. cur_bytes, read_flags, write_flags);
  3715. qemu_co_mutex_lock(&s->lock);
  3716. if (ret < 0) {
  3717. goto fail;
  3718. }
  3719. ret = qcow2_handle_l2meta(bs, &l2meta, true);
  3720. if (ret) {
  3721. goto fail;
  3722. }
  3723. bytes -= cur_bytes;
  3724. src_offset += cur_bytes;
  3725. dst_offset += cur_bytes;
  3726. }
  3727. ret = 0;
  3728. fail:
  3729. qcow2_handle_l2meta(bs, &l2meta, false);
  3730. qemu_co_mutex_unlock(&s->lock);
  3731. trace_qcow2_writev_done_req(qemu_coroutine_self(), ret);
  3732. return ret;
  3733. }
  3734. static int coroutine_fn GRAPH_RDLOCK
  3735. qcow2_co_truncate(BlockDriverState *bs, int64_t offset, bool exact,
  3736. PreallocMode prealloc, BdrvRequestFlags flags, Error **errp)
  3737. {
  3738. ERRP_GUARD();
  3739. BDRVQcow2State *s = bs->opaque;
  3740. uint64_t old_length;
  3741. int64_t new_l1_size;
  3742. int ret;
  3743. QDict *options;
  3744. if (prealloc != PREALLOC_MODE_OFF && prealloc != PREALLOC_MODE_METADATA &&
  3745. prealloc != PREALLOC_MODE_FALLOC && prealloc != PREALLOC_MODE_FULL)
  3746. {
  3747. error_setg(errp, "Unsupported preallocation mode '%s'",
  3748. PreallocMode_str(prealloc));
  3749. return -ENOTSUP;
  3750. }
  3751. if (!QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE)) {
  3752. error_setg(errp, "The new size must be a multiple of %u",
  3753. (unsigned) BDRV_SECTOR_SIZE);
  3754. return -EINVAL;
  3755. }
  3756. qemu_co_mutex_lock(&s->lock);
  3757. /*
  3758. * Even though we store snapshot size for all images, it was not
  3759. * required until v3, so it is not safe to proceed for v2.
  3760. */
  3761. if (s->nb_snapshots && s->qcow_version < 3) {
  3762. error_setg(errp, "Can't resize a v2 image which has snapshots");
  3763. ret = -ENOTSUP;
  3764. goto fail;
  3765. }
  3766. /* See qcow2-bitmap.c for which bitmap scenarios prevent a resize. */
  3767. if (qcow2_truncate_bitmaps_check(bs, errp)) {
  3768. ret = -ENOTSUP;
  3769. goto fail;
  3770. }
  3771. old_length = bs->total_sectors * BDRV_SECTOR_SIZE;
  3772. new_l1_size = size_to_l1(s, offset);
  3773. if (offset < old_length) {
  3774. int64_t last_cluster, old_file_size;
  3775. if (prealloc != PREALLOC_MODE_OFF) {
  3776. error_setg(errp,
  3777. "Preallocation can't be used for shrinking an image");
  3778. ret = -EINVAL;
  3779. goto fail;
  3780. }
  3781. ret = qcow2_cluster_discard(bs, ROUND_UP(offset, s->cluster_size),
  3782. old_length - ROUND_UP(offset,
  3783. s->cluster_size),
  3784. QCOW2_DISCARD_ALWAYS, true);
  3785. if (ret < 0) {
  3786. error_setg_errno(errp, -ret, "Failed to discard cropped clusters");
  3787. goto fail;
  3788. }
  3789. ret = qcow2_shrink_l1_table(bs, new_l1_size);
  3790. if (ret < 0) {
  3791. error_setg_errno(errp, -ret,
  3792. "Failed to reduce the number of L2 tables");
  3793. goto fail;
  3794. }
  3795. ret = qcow2_shrink_reftable(bs);
  3796. if (ret < 0) {
  3797. error_setg_errno(errp, -ret,
  3798. "Failed to discard unused refblocks");
  3799. goto fail;
  3800. }
  3801. old_file_size = bdrv_co_getlength(bs->file->bs);
  3802. if (old_file_size < 0) {
  3803. error_setg_errno(errp, -old_file_size,
  3804. "Failed to inquire current file length");
  3805. ret = old_file_size;
  3806. goto fail;
  3807. }
  3808. last_cluster = qcow2_get_last_cluster(bs, old_file_size);
  3809. if (last_cluster < 0) {
  3810. error_setg_errno(errp, -last_cluster,
  3811. "Failed to find the last cluster");
  3812. ret = last_cluster;
  3813. goto fail;
  3814. }
  3815. if ((last_cluster + 1) * s->cluster_size < old_file_size) {
  3816. Error *local_err = NULL;
  3817. /*
  3818. * Do not pass @exact here: It will not help the user if
  3819. * we get an error here just because they wanted to shrink
  3820. * their qcow2 image (on a block device) with qemu-img.
  3821. * (And on the qcow2 layer, the @exact requirement is
  3822. * always fulfilled, so there is no need to pass it on.)
  3823. */
  3824. bdrv_co_truncate(bs->file, (last_cluster + 1) * s->cluster_size,
  3825. false, PREALLOC_MODE_OFF, 0, &local_err);
  3826. if (local_err) {
  3827. warn_reportf_err(local_err,
  3828. "Failed to truncate the tail of the image: ");
  3829. }
  3830. }
  3831. } else {
  3832. ret = qcow2_grow_l1_table(bs, new_l1_size, true);
  3833. if (ret < 0) {
  3834. error_setg_errno(errp, -ret, "Failed to grow the L1 table");
  3835. goto fail;
  3836. }
  3837. if (data_file_is_raw(bs) && prealloc == PREALLOC_MODE_OFF) {
  3838. /*
  3839. * When creating a qcow2 image with data-file-raw, we enforce
  3840. * at least prealloc=metadata, so that the L1/L2 tables are
  3841. * fully allocated and reading from the data file will return
  3842. * the same data as reading from the qcow2 image. When the
  3843. * image is grown, we must consequently preallocate the
  3844. * metadata structures to cover the added area.
  3845. */
  3846. prealloc = PREALLOC_MODE_METADATA;
  3847. }
  3848. }
  3849. switch (prealloc) {
  3850. case PREALLOC_MODE_OFF:
  3851. if (has_data_file(bs)) {
  3852. /*
  3853. * If the caller wants an exact resize, the external data
  3854. * file should be resized to the exact target size, too,
  3855. * so we pass @exact here.
  3856. */
  3857. ret = bdrv_co_truncate(s->data_file, offset, exact, prealloc, 0,
  3858. errp);
  3859. if (ret < 0) {
  3860. goto fail;
  3861. }
  3862. }
  3863. break;
  3864. case PREALLOC_MODE_METADATA:
  3865. ret = preallocate_co(bs, old_length, offset, prealloc, errp);
  3866. if (ret < 0) {
  3867. goto fail;
  3868. }
  3869. break;
  3870. case PREALLOC_MODE_FALLOC:
  3871. case PREALLOC_MODE_FULL:
  3872. {
  3873. int64_t allocation_start, host_offset, guest_offset;
  3874. int64_t clusters_allocated;
  3875. int64_t old_file_size, last_cluster, new_file_size;
  3876. uint64_t nb_new_data_clusters, nb_new_l2_tables;
  3877. bool subclusters_need_allocation = false;
  3878. /* With a data file, preallocation means just allocating the metadata
  3879. * and forwarding the truncate request to the data file */
  3880. if (has_data_file(bs)) {
  3881. ret = preallocate_co(bs, old_length, offset, prealloc, errp);
  3882. if (ret < 0) {
  3883. goto fail;
  3884. }
  3885. break;
  3886. }
  3887. old_file_size = bdrv_co_getlength(bs->file->bs);
  3888. if (old_file_size < 0) {
  3889. error_setg_errno(errp, -old_file_size,
  3890. "Failed to inquire current file length");
  3891. ret = old_file_size;
  3892. goto fail;
  3893. }
  3894. last_cluster = qcow2_get_last_cluster(bs, old_file_size);
  3895. if (last_cluster >= 0) {
  3896. old_file_size = (last_cluster + 1) * s->cluster_size;
  3897. } else {
  3898. old_file_size = ROUND_UP(old_file_size, s->cluster_size);
  3899. }
  3900. nb_new_data_clusters = (ROUND_UP(offset, s->cluster_size) -
  3901. start_of_cluster(s, old_length)) >> s->cluster_bits;
  3902. /* This is an overestimation; we will not actually allocate space for
  3903. * these in the file but just make sure the new refcount structures are
  3904. * able to cover them so we will not have to allocate new refblocks
  3905. * while entering the data blocks in the potentially new L2 tables.
  3906. * (We do not actually care where the L2 tables are placed. Maybe they
  3907. * are already allocated or they can be placed somewhere before
  3908. * @old_file_size. It does not matter because they will be fully
  3909. * allocated automatically, so they do not need to be covered by the
  3910. * preallocation. All that matters is that we will not have to allocate
  3911. * new refcount structures for them.) */
  3912. nb_new_l2_tables = DIV_ROUND_UP(nb_new_data_clusters,
  3913. s->cluster_size / l2_entry_size(s));
  3914. /* The cluster range may not be aligned to L2 boundaries, so add one L2
  3915. * table for a potential head/tail */
  3916. nb_new_l2_tables++;
  3917. allocation_start = qcow2_refcount_area(bs, old_file_size,
  3918. nb_new_data_clusters +
  3919. nb_new_l2_tables,
  3920. true, 0, 0);
  3921. if (allocation_start < 0) {
  3922. error_setg_errno(errp, -allocation_start,
  3923. "Failed to resize refcount structures");
  3924. ret = allocation_start;
  3925. goto fail;
  3926. }
  3927. clusters_allocated = qcow2_alloc_clusters_at(bs, allocation_start,
  3928. nb_new_data_clusters);
  3929. if (clusters_allocated < 0) {
  3930. error_setg_errno(errp, -clusters_allocated,
  3931. "Failed to allocate data clusters");
  3932. ret = clusters_allocated;
  3933. goto fail;
  3934. }
  3935. assert(clusters_allocated == nb_new_data_clusters);
  3936. /* Allocate the data area */
  3937. new_file_size = allocation_start +
  3938. nb_new_data_clusters * s->cluster_size;
  3939. /*
  3940. * Image file grows, so @exact does not matter.
  3941. *
  3942. * If we need to zero out the new area, try first whether the protocol
  3943. * driver can already take care of this.
  3944. */
  3945. if (flags & BDRV_REQ_ZERO_WRITE) {
  3946. ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc,
  3947. BDRV_REQ_ZERO_WRITE, NULL);
  3948. if (ret >= 0) {
  3949. flags &= ~BDRV_REQ_ZERO_WRITE;
  3950. /* Ensure that we read zeroes and not backing file data */
  3951. subclusters_need_allocation = true;
  3952. }
  3953. } else {
  3954. ret = -1;
  3955. }
  3956. if (ret < 0) {
  3957. ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc, 0,
  3958. errp);
  3959. }
  3960. if (ret < 0) {
  3961. error_prepend(errp, "Failed to resize underlying file: ");
  3962. qcow2_free_clusters(bs, allocation_start,
  3963. nb_new_data_clusters * s->cluster_size,
  3964. QCOW2_DISCARD_OTHER);
  3965. goto fail;
  3966. }
  3967. /* Create the necessary L2 entries */
  3968. host_offset = allocation_start;
  3969. guest_offset = old_length;
  3970. while (nb_new_data_clusters) {
  3971. int64_t nb_clusters = MIN(
  3972. nb_new_data_clusters,
  3973. s->l2_slice_size - offset_to_l2_slice_index(s, guest_offset));
  3974. unsigned cow_start_length = offset_into_cluster(s, guest_offset);
  3975. QCowL2Meta allocation;
  3976. guest_offset = start_of_cluster(s, guest_offset);
  3977. allocation = (QCowL2Meta) {
  3978. .offset = guest_offset,
  3979. .alloc_offset = host_offset,
  3980. .nb_clusters = nb_clusters,
  3981. .cow_start = {
  3982. .offset = 0,
  3983. .nb_bytes = cow_start_length,
  3984. },
  3985. .cow_end = {
  3986. .offset = nb_clusters << s->cluster_bits,
  3987. .nb_bytes = 0,
  3988. },
  3989. .prealloc = !subclusters_need_allocation,
  3990. };
  3991. qemu_co_queue_init(&allocation.dependent_requests);
  3992. ret = qcow2_alloc_cluster_link_l2(bs, &allocation);
  3993. if (ret < 0) {
  3994. error_setg_errno(errp, -ret, "Failed to update L2 tables");
  3995. qcow2_free_clusters(bs, host_offset,
  3996. nb_new_data_clusters * s->cluster_size,
  3997. QCOW2_DISCARD_OTHER);
  3998. goto fail;
  3999. }
  4000. guest_offset += nb_clusters * s->cluster_size;
  4001. host_offset += nb_clusters * s->cluster_size;
  4002. nb_new_data_clusters -= nb_clusters;
  4003. }
  4004. break;
  4005. }
  4006. default:
  4007. g_assert_not_reached();
  4008. }
  4009. if ((flags & BDRV_REQ_ZERO_WRITE) && offset > old_length) {
  4010. uint64_t zero_start = QEMU_ALIGN_UP(old_length, s->subcluster_size);
  4011. /*
  4012. * Use zero clusters as much as we can. qcow2_subcluster_zeroize()
  4013. * requires a subcluster-aligned start. The end may be unaligned if
  4014. * it is at the end of the image (which it is here).
  4015. */
  4016. if (offset > zero_start) {
  4017. ret = qcow2_subcluster_zeroize(bs, zero_start, offset - zero_start,
  4018. 0);
  4019. if (ret < 0) {
  4020. error_setg_errno(errp, -ret, "Failed to zero out new clusters");
  4021. goto fail;
  4022. }
  4023. }
  4024. /* Write explicit zeros for the unaligned head */
  4025. if (zero_start > old_length) {
  4026. uint64_t len = MIN(zero_start, offset) - old_length;
  4027. uint8_t *buf = qemu_blockalign0(bs, len);
  4028. QEMUIOVector qiov;
  4029. qemu_iovec_init_buf(&qiov, buf, len);
  4030. qemu_co_mutex_unlock(&s->lock);
  4031. ret = qcow2_co_pwritev_part(bs, old_length, len, &qiov, 0, 0);
  4032. qemu_co_mutex_lock(&s->lock);
  4033. qemu_vfree(buf);
  4034. if (ret < 0) {
  4035. error_setg_errno(errp, -ret, "Failed to zero out the new area");
  4036. goto fail;
  4037. }
  4038. }
  4039. }
  4040. if (prealloc != PREALLOC_MODE_OFF) {
  4041. /* Flush metadata before actually changing the image size */
  4042. ret = qcow2_write_caches(bs);
  4043. if (ret < 0) {
  4044. error_setg_errno(errp, -ret,
  4045. "Failed to flush the preallocated area to disk");
  4046. goto fail;
  4047. }
  4048. }
  4049. bs->total_sectors = offset / BDRV_SECTOR_SIZE;
  4050. /* write updated header.size */
  4051. offset = cpu_to_be64(offset);
  4052. ret = bdrv_co_pwrite_sync(bs->file, offsetof(QCowHeader, size),
  4053. sizeof(offset), &offset, 0);
  4054. if (ret < 0) {
  4055. error_setg_errno(errp, -ret, "Failed to update the image size");
  4056. goto fail;
  4057. }
  4058. s->l1_vm_state_index = new_l1_size;
  4059. /* Update cache sizes */
  4060. options = qdict_clone_shallow(bs->options);
  4061. ret = qcow2_update_options(bs, options, s->flags, errp);
  4062. qobject_unref(options);
  4063. if (ret < 0) {
  4064. goto fail;
  4065. }
  4066. ret = 0;
  4067. fail:
  4068. qemu_co_mutex_unlock(&s->lock);
  4069. return ret;
  4070. }
  4071. static int coroutine_fn GRAPH_RDLOCK
  4072. qcow2_co_pwritev_compressed_task(BlockDriverState *bs,
  4073. uint64_t offset, uint64_t bytes,
  4074. QEMUIOVector *qiov, size_t qiov_offset)
  4075. {
  4076. BDRVQcow2State *s = bs->opaque;
  4077. int ret;
  4078. ssize_t out_len;
  4079. uint8_t *buf, *out_buf;
  4080. uint64_t cluster_offset;
  4081. assert(bytes == s->cluster_size || (bytes < s->cluster_size &&
  4082. (offset + bytes == bs->total_sectors << BDRV_SECTOR_BITS)));
  4083. buf = qemu_blockalign(bs, s->cluster_size);
  4084. if (bytes < s->cluster_size) {
  4085. /* Zero-pad last write if image size is not cluster aligned */
  4086. memset(buf + bytes, 0, s->cluster_size - bytes);
  4087. }
  4088. qemu_iovec_to_buf(qiov, qiov_offset, buf, bytes);
  4089. out_buf = g_malloc(s->cluster_size);
  4090. out_len = qcow2_co_compress(bs, out_buf, s->cluster_size - 1,
  4091. buf, s->cluster_size);
  4092. if (out_len == -ENOMEM) {
  4093. /* could not compress: write normal cluster */
  4094. ret = qcow2_co_pwritev_part(bs, offset, bytes, qiov, qiov_offset, 0);
  4095. if (ret < 0) {
  4096. goto fail;
  4097. }
  4098. goto success;
  4099. } else if (out_len < 0) {
  4100. ret = -EINVAL;
  4101. goto fail;
  4102. }
  4103. qemu_co_mutex_lock(&s->lock);
  4104. ret = qcow2_alloc_compressed_cluster_offset(bs, offset, out_len,
  4105. &cluster_offset);
  4106. if (ret < 0) {
  4107. qemu_co_mutex_unlock(&s->lock);
  4108. goto fail;
  4109. }
  4110. ret = qcow2_pre_write_overlap_check(bs, 0, cluster_offset, out_len, true);
  4111. qemu_co_mutex_unlock(&s->lock);
  4112. if (ret < 0) {
  4113. goto fail;
  4114. }
  4115. BLKDBG_CO_EVENT(s->data_file, BLKDBG_WRITE_COMPRESSED);
  4116. ret = bdrv_co_pwrite(s->data_file, cluster_offset, out_len, out_buf, 0);
  4117. if (ret < 0) {
  4118. goto fail;
  4119. }
  4120. success:
  4121. ret = 0;
  4122. fail:
  4123. qemu_vfree(buf);
  4124. g_free(out_buf);
  4125. return ret;
  4126. }
  4127. /*
  4128. * This function can count as GRAPH_RDLOCK because
  4129. * qcow2_co_pwritev_compressed_part() holds the graph lock and keeps it until
  4130. * this coroutine has terminated.
  4131. */
  4132. static int coroutine_fn GRAPH_RDLOCK
  4133. qcow2_co_pwritev_compressed_task_entry(AioTask *task)
  4134. {
  4135. Qcow2AioTask *t = container_of(task, Qcow2AioTask, task);
  4136. assert(!t->subcluster_type && !t->l2meta);
  4137. return qcow2_co_pwritev_compressed_task(t->bs, t->offset, t->bytes, t->qiov,
  4138. t->qiov_offset);
  4139. }
  4140. /*
  4141. * XXX: put compressed sectors first, then all the cluster aligned
  4142. * tables to avoid losing bytes in alignment
  4143. */
  4144. static int coroutine_fn GRAPH_RDLOCK
  4145. qcow2_co_pwritev_compressed_part(BlockDriverState *bs,
  4146. int64_t offset, int64_t bytes,
  4147. QEMUIOVector *qiov, size_t qiov_offset)
  4148. {
  4149. BDRVQcow2State *s = bs->opaque;
  4150. AioTaskPool *aio = NULL;
  4151. int ret = 0;
  4152. if (has_data_file(bs)) {
  4153. return -ENOTSUP;
  4154. }
  4155. if (bytes == 0) {
  4156. /*
  4157. * align end of file to a sector boundary to ease reading with
  4158. * sector based I/Os
  4159. */
  4160. int64_t len = bdrv_co_getlength(bs->file->bs);
  4161. if (len < 0) {
  4162. return len;
  4163. }
  4164. return bdrv_co_truncate(bs->file, len, false, PREALLOC_MODE_OFF, 0,
  4165. NULL);
  4166. }
  4167. if (offset_into_cluster(s, offset)) {
  4168. return -EINVAL;
  4169. }
  4170. if (offset_into_cluster(s, bytes) &&
  4171. (offset + bytes) != (bs->total_sectors << BDRV_SECTOR_BITS)) {
  4172. return -EINVAL;
  4173. }
  4174. while (bytes && aio_task_pool_status(aio) == 0) {
  4175. uint64_t chunk_size = MIN(bytes, s->cluster_size);
  4176. if (!aio && chunk_size != bytes) {
  4177. aio = aio_task_pool_new(QCOW2_MAX_WORKERS);
  4178. }
  4179. ret = qcow2_add_task(bs, aio, qcow2_co_pwritev_compressed_task_entry,
  4180. 0, 0, offset, chunk_size, qiov, qiov_offset, NULL);
  4181. if (ret < 0) {
  4182. break;
  4183. }
  4184. qiov_offset += chunk_size;
  4185. offset += chunk_size;
  4186. bytes -= chunk_size;
  4187. }
  4188. if (aio) {
  4189. aio_task_pool_wait_all(aio);
  4190. if (ret == 0) {
  4191. ret = aio_task_pool_status(aio);
  4192. }
  4193. g_free(aio);
  4194. }
  4195. return ret;
  4196. }
  4197. static int coroutine_fn GRAPH_RDLOCK
  4198. qcow2_co_preadv_compressed(BlockDriverState *bs,
  4199. uint64_t l2_entry,
  4200. uint64_t offset,
  4201. uint64_t bytes,
  4202. QEMUIOVector *qiov,
  4203. size_t qiov_offset)
  4204. {
  4205. BDRVQcow2State *s = bs->opaque;
  4206. int ret = 0, csize;
  4207. uint64_t coffset;
  4208. uint8_t *buf, *out_buf;
  4209. int offset_in_cluster = offset_into_cluster(s, offset);
  4210. qcow2_parse_compressed_l2_entry(bs, l2_entry, &coffset, &csize);
  4211. buf = g_try_malloc(csize);
  4212. if (!buf) {
  4213. return -ENOMEM;
  4214. }
  4215. out_buf = qemu_blockalign(bs, s->cluster_size);
  4216. BLKDBG_CO_EVENT(bs->file, BLKDBG_READ_COMPRESSED);
  4217. ret = bdrv_co_pread(bs->file, coffset, csize, buf, 0);
  4218. if (ret < 0) {
  4219. goto fail;
  4220. }
  4221. if (qcow2_co_decompress(bs, out_buf, s->cluster_size, buf, csize) < 0) {
  4222. ret = -EIO;
  4223. goto fail;
  4224. }
  4225. qemu_iovec_from_buf(qiov, qiov_offset, out_buf + offset_in_cluster, bytes);
  4226. fail:
  4227. qemu_vfree(out_buf);
  4228. g_free(buf);
  4229. return ret;
  4230. }
  4231. static int GRAPH_RDLOCK make_completely_empty(BlockDriverState *bs)
  4232. {
  4233. BDRVQcow2State *s = bs->opaque;
  4234. Error *local_err = NULL;
  4235. int ret, l1_clusters;
  4236. int64_t offset;
  4237. uint64_t *new_reftable = NULL;
  4238. uint64_t rt_entry, l1_size2;
  4239. struct {
  4240. uint64_t l1_offset;
  4241. uint64_t reftable_offset;
  4242. uint32_t reftable_clusters;
  4243. } QEMU_PACKED l1_ofs_rt_ofs_cls;
  4244. ret = qcow2_cache_empty(bs, s->l2_table_cache);
  4245. if (ret < 0) {
  4246. goto fail;
  4247. }
  4248. ret = qcow2_cache_empty(bs, s->refcount_block_cache);
  4249. if (ret < 0) {
  4250. goto fail;
  4251. }
  4252. /* Refcounts will be broken utterly */
  4253. ret = qcow2_mark_dirty(bs);
  4254. if (ret < 0) {
  4255. goto fail;
  4256. }
  4257. BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
  4258. l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / L1E_SIZE);
  4259. l1_size2 = (uint64_t)s->l1_size * L1E_SIZE;
  4260. /* After this call, neither the in-memory nor the on-disk refcount
  4261. * information accurately describe the actual references */
  4262. ret = bdrv_pwrite_zeroes(bs->file, s->l1_table_offset,
  4263. l1_clusters * s->cluster_size, 0);
  4264. if (ret < 0) {
  4265. goto fail_broken_refcounts;
  4266. }
  4267. memset(s->l1_table, 0, l1_size2);
  4268. BLKDBG_EVENT(bs->file, BLKDBG_EMPTY_IMAGE_PREPARE);
  4269. /* Overwrite enough clusters at the beginning of the sectors to place
  4270. * the refcount table, a refcount block and the L1 table in; this may
  4271. * overwrite parts of the existing refcount and L1 table, which is not
  4272. * an issue because the dirty flag is set, complete data loss is in fact
  4273. * desired and partial data loss is consequently fine as well */
  4274. ret = bdrv_pwrite_zeroes(bs->file, s->cluster_size,
  4275. (2 + l1_clusters) * s->cluster_size, 0);
  4276. /* This call (even if it failed overall) may have overwritten on-disk
  4277. * refcount structures; in that case, the in-memory refcount information
  4278. * will probably differ from the on-disk information which makes the BDS
  4279. * unusable */
  4280. if (ret < 0) {
  4281. goto fail_broken_refcounts;
  4282. }
  4283. BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
  4284. BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_UPDATE);
  4285. /* "Create" an empty reftable (one cluster) directly after the image
  4286. * header and an empty L1 table three clusters after the image header;
  4287. * the cluster between those two will be used as the first refblock */
  4288. l1_ofs_rt_ofs_cls.l1_offset = cpu_to_be64(3 * s->cluster_size);
  4289. l1_ofs_rt_ofs_cls.reftable_offset = cpu_to_be64(s->cluster_size);
  4290. l1_ofs_rt_ofs_cls.reftable_clusters = cpu_to_be32(1);
  4291. ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_table_offset),
  4292. sizeof(l1_ofs_rt_ofs_cls), &l1_ofs_rt_ofs_cls, 0);
  4293. if (ret < 0) {
  4294. goto fail_broken_refcounts;
  4295. }
  4296. s->l1_table_offset = 3 * s->cluster_size;
  4297. new_reftable = g_try_new0(uint64_t, s->cluster_size / REFTABLE_ENTRY_SIZE);
  4298. if (!new_reftable) {
  4299. ret = -ENOMEM;
  4300. goto fail_broken_refcounts;
  4301. }
  4302. s->refcount_table_offset = s->cluster_size;
  4303. s->refcount_table_size = s->cluster_size / REFTABLE_ENTRY_SIZE;
  4304. s->max_refcount_table_index = 0;
  4305. g_free(s->refcount_table);
  4306. s->refcount_table = new_reftable;
  4307. new_reftable = NULL;
  4308. /* Now the in-memory refcount information again corresponds to the on-disk
  4309. * information (reftable is empty and no refblocks (the refblock cache is
  4310. * empty)); however, this means some clusters (e.g. the image header) are
  4311. * referenced, but not refcounted, but the normal qcow2 code assumes that
  4312. * the in-memory information is always correct */
  4313. BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC);
  4314. /* Enter the first refblock into the reftable */
  4315. rt_entry = cpu_to_be64(2 * s->cluster_size);
  4316. ret = bdrv_pwrite_sync(bs->file, s->cluster_size, sizeof(rt_entry),
  4317. &rt_entry, 0);
  4318. if (ret < 0) {
  4319. goto fail_broken_refcounts;
  4320. }
  4321. s->refcount_table[0] = 2 * s->cluster_size;
  4322. s->free_cluster_index = 0;
  4323. assert(3 + l1_clusters <= s->refcount_block_size);
  4324. offset = qcow2_alloc_clusters(bs, 3 * s->cluster_size + l1_size2);
  4325. if (offset < 0) {
  4326. ret = offset;
  4327. goto fail_broken_refcounts;
  4328. } else if (offset > 0) {
  4329. error_report("First cluster in emptied image is in use");
  4330. abort();
  4331. }
  4332. /* Now finally the in-memory information corresponds to the on-disk
  4333. * structures and is correct */
  4334. ret = qcow2_mark_clean(bs);
  4335. if (ret < 0) {
  4336. goto fail;
  4337. }
  4338. ret = bdrv_truncate(bs->file, (3 + l1_clusters) * s->cluster_size, false,
  4339. PREALLOC_MODE_OFF, 0, &local_err);
  4340. if (ret < 0) {
  4341. error_report_err(local_err);
  4342. goto fail;
  4343. }
  4344. return 0;
  4345. fail_broken_refcounts:
  4346. /* The BDS is unusable at this point. If we wanted to make it usable, we
  4347. * would have to call qcow2_refcount_close(), qcow2_refcount_init(),
  4348. * qcow2_check_refcounts(), qcow2_refcount_close() and qcow2_refcount_init()
  4349. * again. However, because the functions which could have caused this error
  4350. * path to be taken are used by those functions as well, it's very likely
  4351. * that that sequence will fail as well. Therefore, just eject the BDS. */
  4352. bs->drv = NULL;
  4353. fail:
  4354. g_free(new_reftable);
  4355. return ret;
  4356. }
  4357. static int GRAPH_RDLOCK qcow2_make_empty(BlockDriverState *bs)
  4358. {
  4359. BDRVQcow2State *s = bs->opaque;
  4360. uint64_t offset, end_offset;
  4361. int step = QEMU_ALIGN_DOWN(INT_MAX, s->cluster_size);
  4362. int l1_clusters, ret = 0;
  4363. l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / L1E_SIZE);
  4364. if (s->qcow_version >= 3 && !s->snapshots && !s->nb_bitmaps &&
  4365. 3 + l1_clusters <= s->refcount_block_size &&
  4366. s->crypt_method_header != QCOW_CRYPT_LUKS &&
  4367. !has_data_file(bs)) {
  4368. /* The following function only works for qcow2 v3 images (it
  4369. * requires the dirty flag) and only as long as there are no
  4370. * features that reserve extra clusters (such as snapshots,
  4371. * LUKS header, or persistent bitmaps), because it completely
  4372. * empties the image. Furthermore, the L1 table and three
  4373. * additional clusters (image header, refcount table, one
  4374. * refcount block) have to fit inside one refcount block. It
  4375. * only resets the image file, i.e. does not work with an
  4376. * external data file. */
  4377. return make_completely_empty(bs);
  4378. }
  4379. /* This fallback code simply discards every active cluster; this is slow,
  4380. * but works in all cases */
  4381. end_offset = bs->total_sectors * BDRV_SECTOR_SIZE;
  4382. for (offset = 0; offset < end_offset; offset += step) {
  4383. /* As this function is generally used after committing an external
  4384. * snapshot, QCOW2_DISCARD_SNAPSHOT seems appropriate. Also, the
  4385. * default action for this kind of discard is to pass the discard,
  4386. * which will ideally result in an actually smaller image file, as
  4387. * is probably desired. */
  4388. ret = qcow2_cluster_discard(bs, offset, MIN(step, end_offset - offset),
  4389. QCOW2_DISCARD_SNAPSHOT, true);
  4390. if (ret < 0) {
  4391. break;
  4392. }
  4393. }
  4394. return ret;
  4395. }
  4396. static coroutine_fn GRAPH_RDLOCK int qcow2_co_flush_to_os(BlockDriverState *bs)
  4397. {
  4398. BDRVQcow2State *s = bs->opaque;
  4399. int ret;
  4400. qemu_co_mutex_lock(&s->lock);
  4401. ret = qcow2_write_caches(bs);
  4402. qemu_co_mutex_unlock(&s->lock);
  4403. return ret;
  4404. }
  4405. static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs,
  4406. Error **errp)
  4407. {
  4408. Error *local_err = NULL;
  4409. BlockMeasureInfo *info;
  4410. uint64_t required = 0; /* bytes that contribute to required size */
  4411. uint64_t virtual_size; /* disk size as seen by guest */
  4412. uint64_t refcount_bits;
  4413. uint64_t l2_tables;
  4414. uint64_t luks_payload_size = 0;
  4415. size_t cluster_size;
  4416. int version;
  4417. char *optstr;
  4418. PreallocMode prealloc;
  4419. bool has_backing_file;
  4420. bool has_luks;
  4421. bool extended_l2;
  4422. size_t l2e_size;
  4423. /* Parse image creation options */
  4424. extended_l2 = qemu_opt_get_bool_del(opts, BLOCK_OPT_EXTL2, false);
  4425. cluster_size = qcow2_opt_get_cluster_size_del(opts, extended_l2,
  4426. &local_err);
  4427. if (local_err) {
  4428. goto err;
  4429. }
  4430. version = qcow2_opt_get_version_del(opts, &local_err);
  4431. if (local_err) {
  4432. goto err;
  4433. }
  4434. refcount_bits = qcow2_opt_get_refcount_bits_del(opts, version, &local_err);
  4435. if (local_err) {
  4436. goto err;
  4437. }
  4438. optstr = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
  4439. prealloc = qapi_enum_parse(&PreallocMode_lookup, optstr,
  4440. PREALLOC_MODE_OFF, &local_err);
  4441. g_free(optstr);
  4442. if (local_err) {
  4443. goto err;
  4444. }
  4445. optstr = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
  4446. has_backing_file = !!optstr;
  4447. g_free(optstr);
  4448. optstr = qemu_opt_get_del(opts, BLOCK_OPT_ENCRYPT_FORMAT);
  4449. has_luks = optstr && strcmp(optstr, "luks") == 0;
  4450. g_free(optstr);
  4451. if (has_luks) {
  4452. g_autoptr(QCryptoBlockCreateOptions) create_opts = NULL;
  4453. QDict *cryptoopts = qcow2_extract_crypto_opts(opts, "luks", errp);
  4454. size_t headerlen;
  4455. create_opts = block_crypto_create_opts_init(cryptoopts, errp);
  4456. qobject_unref(cryptoopts);
  4457. if (!create_opts) {
  4458. goto err;
  4459. }
  4460. if (!qcrypto_block_calculate_payload_offset(create_opts,
  4461. "encrypt.",
  4462. &headerlen,
  4463. &local_err)) {
  4464. goto err;
  4465. }
  4466. luks_payload_size = ROUND_UP(headerlen, cluster_size);
  4467. }
  4468. virtual_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
  4469. virtual_size = ROUND_UP(virtual_size, cluster_size);
  4470. /* Check that virtual disk size is valid */
  4471. l2e_size = extended_l2 ? L2E_SIZE_EXTENDED : L2E_SIZE_NORMAL;
  4472. l2_tables = DIV_ROUND_UP(virtual_size / cluster_size,
  4473. cluster_size / l2e_size);
  4474. if (l2_tables * L1E_SIZE > QCOW_MAX_L1_SIZE) {
  4475. error_setg(&local_err, "The image size is too large "
  4476. "(try using a larger cluster size)");
  4477. goto err;
  4478. }
  4479. /* Account for input image */
  4480. if (in_bs) {
  4481. int64_t ssize = bdrv_getlength(in_bs);
  4482. if (ssize < 0) {
  4483. error_setg_errno(&local_err, -ssize,
  4484. "Unable to get image virtual_size");
  4485. goto err;
  4486. }
  4487. virtual_size = ROUND_UP(ssize, cluster_size);
  4488. if (has_backing_file) {
  4489. /* We don't how much of the backing chain is shared by the input
  4490. * image and the new image file. In the worst case the new image's
  4491. * backing file has nothing in common with the input image. Be
  4492. * conservative and assume all clusters need to be written.
  4493. */
  4494. required = virtual_size;
  4495. } else {
  4496. int64_t offset;
  4497. int64_t pnum = 0;
  4498. for (offset = 0; offset < ssize; offset += pnum) {
  4499. int ret;
  4500. ret = bdrv_block_status_above(in_bs, NULL, offset,
  4501. ssize - offset, &pnum, NULL,
  4502. NULL);
  4503. if (ret < 0) {
  4504. error_setg_errno(&local_err, -ret,
  4505. "Unable to get block status");
  4506. goto err;
  4507. }
  4508. if (ret & BDRV_BLOCK_ZERO) {
  4509. /* Skip zero regions (safe with no backing file) */
  4510. } else if ((ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) ==
  4511. (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) {
  4512. /* Extend pnum to end of cluster for next iteration */
  4513. pnum = ROUND_UP(offset + pnum, cluster_size) - offset;
  4514. /* Count clusters we've seen */
  4515. required += offset % cluster_size + pnum;
  4516. }
  4517. }
  4518. }
  4519. }
  4520. /* Take into account preallocation. Nothing special is needed for
  4521. * PREALLOC_MODE_METADATA since metadata is always counted.
  4522. */
  4523. if (prealloc == PREALLOC_MODE_FULL || prealloc == PREALLOC_MODE_FALLOC) {
  4524. required = virtual_size;
  4525. }
  4526. info = g_new0(BlockMeasureInfo, 1);
  4527. info->fully_allocated = luks_payload_size +
  4528. qcow2_calc_prealloc_size(virtual_size, cluster_size,
  4529. ctz32(refcount_bits), extended_l2);
  4530. /*
  4531. * Remove data clusters that are not required. This overestimates the
  4532. * required size because metadata needed for the fully allocated file is
  4533. * still counted. Show bitmaps only if both source and destination
  4534. * would support them.
  4535. */
  4536. info->required = info->fully_allocated - virtual_size + required;
  4537. info->has_bitmaps = version >= 3 && in_bs &&
  4538. bdrv_supports_persistent_dirty_bitmap(in_bs);
  4539. if (info->has_bitmaps) {
  4540. info->bitmaps = qcow2_get_persistent_dirty_bitmap_size(in_bs,
  4541. cluster_size);
  4542. }
  4543. return info;
  4544. err:
  4545. error_propagate(errp, local_err);
  4546. return NULL;
  4547. }
  4548. static int coroutine_fn
  4549. qcow2_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
  4550. {
  4551. BDRVQcow2State *s = bs->opaque;
  4552. bdi->cluster_size = s->cluster_size;
  4553. bdi->subcluster_size = s->subcluster_size;
  4554. bdi->vm_state_offset = qcow2_vm_state_offset(s);
  4555. bdi->is_dirty = s->incompatible_features & QCOW2_INCOMPAT_DIRTY;
  4556. return 0;
  4557. }
  4558. static ImageInfoSpecific * GRAPH_RDLOCK
  4559. qcow2_get_specific_info(BlockDriverState *bs, Error **errp)
  4560. {
  4561. BDRVQcow2State *s = bs->opaque;
  4562. ImageInfoSpecific *spec_info;
  4563. QCryptoBlockInfo *encrypt_info = NULL;
  4564. if (s->crypto != NULL) {
  4565. encrypt_info = qcrypto_block_get_info(s->crypto, errp);
  4566. if (!encrypt_info) {
  4567. return NULL;
  4568. }
  4569. }
  4570. spec_info = g_new(ImageInfoSpecific, 1);
  4571. *spec_info = (ImageInfoSpecific){
  4572. .type = IMAGE_INFO_SPECIFIC_KIND_QCOW2,
  4573. .u.qcow2.data = g_new0(ImageInfoSpecificQCow2, 1),
  4574. };
  4575. if (s->qcow_version == 2) {
  4576. *spec_info->u.qcow2.data = (ImageInfoSpecificQCow2){
  4577. .compat = g_strdup("0.10"),
  4578. .refcount_bits = s->refcount_bits,
  4579. };
  4580. } else if (s->qcow_version == 3) {
  4581. Qcow2BitmapInfoList *bitmaps;
  4582. if (!qcow2_get_bitmap_info_list(bs, &bitmaps, errp)) {
  4583. qapi_free_ImageInfoSpecific(spec_info);
  4584. qapi_free_QCryptoBlockInfo(encrypt_info);
  4585. return NULL;
  4586. }
  4587. *spec_info->u.qcow2.data = (ImageInfoSpecificQCow2){
  4588. .compat = g_strdup("1.1"),
  4589. .lazy_refcounts = s->compatible_features &
  4590. QCOW2_COMPAT_LAZY_REFCOUNTS,
  4591. .has_lazy_refcounts = true,
  4592. .corrupt = s->incompatible_features &
  4593. QCOW2_INCOMPAT_CORRUPT,
  4594. .has_corrupt = true,
  4595. .has_extended_l2 = true,
  4596. .extended_l2 = has_subclusters(s),
  4597. .refcount_bits = s->refcount_bits,
  4598. .has_bitmaps = !!bitmaps,
  4599. .bitmaps = bitmaps,
  4600. .data_file = g_strdup(s->image_data_file),
  4601. .has_data_file_raw = has_data_file(bs),
  4602. .data_file_raw = data_file_is_raw(bs),
  4603. .compression_type = s->compression_type,
  4604. };
  4605. } else {
  4606. /* if this assertion fails, this probably means a new version was
  4607. * added without having it covered here */
  4608. g_assert_not_reached();
  4609. }
  4610. if (encrypt_info) {
  4611. ImageInfoSpecificQCow2Encryption *qencrypt =
  4612. g_new(ImageInfoSpecificQCow2Encryption, 1);
  4613. switch (encrypt_info->format) {
  4614. case QCRYPTO_BLOCK_FORMAT_QCOW:
  4615. qencrypt->format = BLOCKDEV_QCOW2_ENCRYPTION_FORMAT_AES;
  4616. break;
  4617. case QCRYPTO_BLOCK_FORMAT_LUKS:
  4618. qencrypt->format = BLOCKDEV_QCOW2_ENCRYPTION_FORMAT_LUKS;
  4619. qencrypt->u.luks = encrypt_info->u.luks;
  4620. break;
  4621. default:
  4622. abort();
  4623. }
  4624. /* Since we did shallow copy above, erase any pointers
  4625. * in the original info */
  4626. memset(&encrypt_info->u, 0, sizeof(encrypt_info->u));
  4627. qapi_free_QCryptoBlockInfo(encrypt_info);
  4628. spec_info->u.qcow2.data->encrypt = qencrypt;
  4629. }
  4630. return spec_info;
  4631. }
  4632. static int coroutine_mixed_fn GRAPH_RDLOCK
  4633. qcow2_has_zero_init(BlockDriverState *bs)
  4634. {
  4635. BDRVQcow2State *s = bs->opaque;
  4636. bool preallocated;
  4637. if (qemu_in_coroutine()) {
  4638. qemu_co_mutex_lock(&s->lock);
  4639. }
  4640. /*
  4641. * Check preallocation status: Preallocated images have all L2
  4642. * tables allocated, nonpreallocated images have none. It is
  4643. * therefore enough to check the first one.
  4644. */
  4645. preallocated = s->l1_size > 0 && s->l1_table[0] != 0;
  4646. if (qemu_in_coroutine()) {
  4647. qemu_co_mutex_unlock(&s->lock);
  4648. }
  4649. if (!preallocated) {
  4650. return 1;
  4651. } else if (bs->encrypted) {
  4652. return 0;
  4653. } else {
  4654. return bdrv_has_zero_init(s->data_file->bs);
  4655. }
  4656. }
  4657. /*
  4658. * Check the request to vmstate. On success return
  4659. * qcow2_vm_state_offset(bs) + @pos
  4660. */
  4661. static int64_t qcow2_check_vmstate_request(BlockDriverState *bs,
  4662. QEMUIOVector *qiov, int64_t pos)
  4663. {
  4664. BDRVQcow2State *s = bs->opaque;
  4665. int64_t vmstate_offset = qcow2_vm_state_offset(s);
  4666. int ret;
  4667. /* Incoming requests must be OK */
  4668. bdrv_check_qiov_request(pos, qiov->size, qiov, 0, &error_abort);
  4669. if (INT64_MAX - pos < vmstate_offset) {
  4670. return -EIO;
  4671. }
  4672. pos += vmstate_offset;
  4673. ret = bdrv_check_qiov_request(pos, qiov->size, qiov, 0, NULL);
  4674. if (ret < 0) {
  4675. return ret;
  4676. }
  4677. return pos;
  4678. }
  4679. static int coroutine_fn GRAPH_RDLOCK
  4680. qcow2_co_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
  4681. {
  4682. int64_t offset = qcow2_check_vmstate_request(bs, qiov, pos);
  4683. if (offset < 0) {
  4684. return offset;
  4685. }
  4686. BLKDBG_CO_EVENT(bs->file, BLKDBG_VMSTATE_SAVE);
  4687. return bs->drv->bdrv_co_pwritev_part(bs, offset, qiov->size, qiov, 0, 0);
  4688. }
  4689. static int coroutine_fn GRAPH_RDLOCK
  4690. qcow2_co_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
  4691. {
  4692. int64_t offset = qcow2_check_vmstate_request(bs, qiov, pos);
  4693. if (offset < 0) {
  4694. return offset;
  4695. }
  4696. BLKDBG_CO_EVENT(bs->file, BLKDBG_VMSTATE_LOAD);
  4697. return bs->drv->bdrv_co_preadv_part(bs, offset, qiov->size, qiov, 0, 0);
  4698. }
  4699. static int GRAPH_RDLOCK qcow2_has_compressed_clusters(BlockDriverState *bs)
  4700. {
  4701. int64_t offset = 0;
  4702. int64_t bytes = bdrv_getlength(bs);
  4703. if (bytes < 0) {
  4704. return bytes;
  4705. }
  4706. while (bytes != 0) {
  4707. int ret;
  4708. QCow2SubclusterType type;
  4709. unsigned int cur_bytes = MIN(INT_MAX, bytes);
  4710. uint64_t host_offset;
  4711. ret = qcow2_get_host_offset(bs, offset, &cur_bytes, &host_offset,
  4712. &type);
  4713. if (ret < 0) {
  4714. return ret;
  4715. }
  4716. if (type == QCOW2_SUBCLUSTER_COMPRESSED) {
  4717. return 1;
  4718. }
  4719. offset += cur_bytes;
  4720. bytes -= cur_bytes;
  4721. }
  4722. return 0;
  4723. }
  4724. /*
  4725. * Downgrades an image's version. To achieve this, any incompatible features
  4726. * have to be removed.
  4727. */
  4728. static int GRAPH_RDLOCK
  4729. qcow2_downgrade(BlockDriverState *bs, int target_version,
  4730. BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
  4731. Error **errp)
  4732. {
  4733. BDRVQcow2State *s = bs->opaque;
  4734. int current_version = s->qcow_version;
  4735. int ret;
  4736. int i;
  4737. /* This is qcow2_downgrade(), not qcow2_upgrade() */
  4738. assert(target_version < current_version);
  4739. /* There are no other versions (now) that you can downgrade to */
  4740. assert(target_version == 2);
  4741. if (s->refcount_order != 4) {
  4742. error_setg(errp, "compat=0.10 requires refcount_bits=16");
  4743. return -ENOTSUP;
  4744. }
  4745. if (has_data_file(bs)) {
  4746. error_setg(errp, "Cannot downgrade an image with a data file");
  4747. return -ENOTSUP;
  4748. }
  4749. /*
  4750. * If any internal snapshot has a different size than the current
  4751. * image size, or VM state size that exceeds 32 bits, downgrading
  4752. * is unsafe. Even though we would still use v3-compliant output
  4753. * to preserve that data, other v2 programs might not realize
  4754. * those optional fields are important.
  4755. */
  4756. for (i = 0; i < s->nb_snapshots; i++) {
  4757. if (s->snapshots[i].vm_state_size > UINT32_MAX ||
  4758. s->snapshots[i].disk_size != bs->total_sectors * BDRV_SECTOR_SIZE) {
  4759. error_setg(errp, "Internal snapshots prevent downgrade of image");
  4760. return -ENOTSUP;
  4761. }
  4762. }
  4763. /* clear incompatible features */
  4764. if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
  4765. ret = qcow2_mark_clean(bs);
  4766. if (ret < 0) {
  4767. error_setg_errno(errp, -ret, "Failed to make the image clean");
  4768. return ret;
  4769. }
  4770. }
  4771. /* with QCOW2_INCOMPAT_CORRUPT, it is pretty much impossible to get here in
  4772. * the first place; if that happens nonetheless, returning -ENOTSUP is the
  4773. * best thing to do anyway */
  4774. if (s->incompatible_features & ~QCOW2_INCOMPAT_COMPRESSION) {
  4775. error_setg(errp, "Cannot downgrade an image with incompatible features "
  4776. "0x%" PRIx64 " set",
  4777. s->incompatible_features & ~QCOW2_INCOMPAT_COMPRESSION);
  4778. return -ENOTSUP;
  4779. }
  4780. /* since we can ignore compatible features, we can set them to 0 as well */
  4781. s->compatible_features = 0;
  4782. /* if lazy refcounts have been used, they have already been fixed through
  4783. * clearing the dirty flag */
  4784. /* clearing autoclear features is trivial */
  4785. s->autoclear_features = 0;
  4786. ret = qcow2_expand_zero_clusters(bs, status_cb, cb_opaque);
  4787. if (ret < 0) {
  4788. error_setg_errno(errp, -ret, "Failed to turn zero into data clusters");
  4789. return ret;
  4790. }
  4791. if (s->incompatible_features & QCOW2_INCOMPAT_COMPRESSION) {
  4792. ret = qcow2_has_compressed_clusters(bs);
  4793. if (ret < 0) {
  4794. error_setg(errp, "Failed to check block status");
  4795. return -EINVAL;
  4796. }
  4797. if (ret) {
  4798. error_setg(errp, "Cannot downgrade an image with zstd compression "
  4799. "type and existing compressed clusters");
  4800. return -ENOTSUP;
  4801. }
  4802. /*
  4803. * No compressed clusters for now, so just chose default zlib
  4804. * compression.
  4805. */
  4806. s->incompatible_features &= ~QCOW2_INCOMPAT_COMPRESSION;
  4807. s->compression_type = QCOW2_COMPRESSION_TYPE_ZLIB;
  4808. }
  4809. assert(s->incompatible_features == 0);
  4810. s->qcow_version = target_version;
  4811. ret = qcow2_update_header(bs);
  4812. if (ret < 0) {
  4813. s->qcow_version = current_version;
  4814. error_setg_errno(errp, -ret, "Failed to update the image header");
  4815. return ret;
  4816. }
  4817. return 0;
  4818. }
  4819. /*
  4820. * Upgrades an image's version. While newer versions encompass all
  4821. * features of older versions, some things may have to be presented
  4822. * differently.
  4823. */
  4824. static int GRAPH_RDLOCK
  4825. qcow2_upgrade(BlockDriverState *bs, int target_version,
  4826. BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
  4827. Error **errp)
  4828. {
  4829. BDRVQcow2State *s = bs->opaque;
  4830. bool need_snapshot_update;
  4831. int current_version = s->qcow_version;
  4832. int i;
  4833. int ret;
  4834. /* This is qcow2_upgrade(), not qcow2_downgrade() */
  4835. assert(target_version > current_version);
  4836. /* There are no other versions (yet) that you can upgrade to */
  4837. assert(target_version == 3);
  4838. status_cb(bs, 0, 2, cb_opaque);
  4839. /*
  4840. * In v2, snapshots do not need to have extra data. v3 requires
  4841. * the 64-bit VM state size and the virtual disk size to be
  4842. * present.
  4843. * qcow2_write_snapshots() will always write the list in the
  4844. * v3-compliant format.
  4845. */
  4846. need_snapshot_update = false;
  4847. for (i = 0; i < s->nb_snapshots; i++) {
  4848. if (s->snapshots[i].extra_data_size <
  4849. sizeof_field(QCowSnapshotExtraData, vm_state_size_large) +
  4850. sizeof_field(QCowSnapshotExtraData, disk_size))
  4851. {
  4852. need_snapshot_update = true;
  4853. break;
  4854. }
  4855. }
  4856. if (need_snapshot_update) {
  4857. ret = qcow2_write_snapshots(bs);
  4858. if (ret < 0) {
  4859. error_setg_errno(errp, -ret, "Failed to update the snapshot table");
  4860. return ret;
  4861. }
  4862. }
  4863. status_cb(bs, 1, 2, cb_opaque);
  4864. s->qcow_version = target_version;
  4865. ret = qcow2_update_header(bs);
  4866. if (ret < 0) {
  4867. s->qcow_version = current_version;
  4868. error_setg_errno(errp, -ret, "Failed to update the image header");
  4869. return ret;
  4870. }
  4871. status_cb(bs, 2, 2, cb_opaque);
  4872. return 0;
  4873. }
  4874. typedef enum Qcow2AmendOperation {
  4875. /* This is the value Qcow2AmendHelperCBInfo::last_operation will be
  4876. * statically initialized to so that the helper CB can discern the first
  4877. * invocation from an operation change */
  4878. QCOW2_NO_OPERATION = 0,
  4879. QCOW2_UPGRADING,
  4880. QCOW2_UPDATING_ENCRYPTION,
  4881. QCOW2_CHANGING_REFCOUNT_ORDER,
  4882. QCOW2_DOWNGRADING,
  4883. } Qcow2AmendOperation;
  4884. typedef struct Qcow2AmendHelperCBInfo {
  4885. /* The code coordinating the amend operations should only modify
  4886. * these four fields; the rest will be managed by the CB */
  4887. BlockDriverAmendStatusCB *original_status_cb;
  4888. void *original_cb_opaque;
  4889. Qcow2AmendOperation current_operation;
  4890. /* Total number of operations to perform (only set once) */
  4891. int total_operations;
  4892. /* The following fields are managed by the CB */
  4893. /* Number of operations completed */
  4894. int operations_completed;
  4895. /* Cumulative offset of all completed operations */
  4896. int64_t offset_completed;
  4897. Qcow2AmendOperation last_operation;
  4898. int64_t last_work_size;
  4899. } Qcow2AmendHelperCBInfo;
  4900. static void qcow2_amend_helper_cb(BlockDriverState *bs,
  4901. int64_t operation_offset,
  4902. int64_t operation_work_size, void *opaque)
  4903. {
  4904. Qcow2AmendHelperCBInfo *info = opaque;
  4905. int64_t current_work_size;
  4906. int64_t projected_work_size;
  4907. if (info->current_operation != info->last_operation) {
  4908. if (info->last_operation != QCOW2_NO_OPERATION) {
  4909. info->offset_completed += info->last_work_size;
  4910. info->operations_completed++;
  4911. }
  4912. info->last_operation = info->current_operation;
  4913. }
  4914. assert(info->total_operations > 0);
  4915. assert(info->operations_completed < info->total_operations);
  4916. info->last_work_size = operation_work_size;
  4917. current_work_size = info->offset_completed + operation_work_size;
  4918. /* current_work_size is the total work size for (operations_completed + 1)
  4919. * operations (which includes this one), so multiply it by the number of
  4920. * operations not covered and divide it by the number of operations
  4921. * covered to get a projection for the operations not covered */
  4922. projected_work_size = current_work_size * (info->total_operations -
  4923. info->operations_completed - 1)
  4924. / (info->operations_completed + 1);
  4925. info->original_status_cb(bs, info->offset_completed + operation_offset,
  4926. current_work_size + projected_work_size,
  4927. info->original_cb_opaque);
  4928. }
  4929. static int GRAPH_RDLOCK
  4930. qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
  4931. BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
  4932. bool force, Error **errp)
  4933. {
  4934. BDRVQcow2State *s = bs->opaque;
  4935. int old_version = s->qcow_version, new_version = old_version;
  4936. uint64_t new_size = 0;
  4937. const char *backing_file = NULL, *backing_format = NULL, *data_file = NULL;
  4938. bool lazy_refcounts = s->use_lazy_refcounts;
  4939. bool data_file_raw = data_file_is_raw(bs);
  4940. const char *compat = NULL;
  4941. int refcount_bits = s->refcount_bits;
  4942. int ret;
  4943. QemuOptDesc *desc = opts->list->desc;
  4944. Qcow2AmendHelperCBInfo helper_cb_info;
  4945. bool encryption_update = false;
  4946. while (desc && desc->name) {
  4947. if (!qemu_opt_find(opts, desc->name)) {
  4948. /* only change explicitly defined options */
  4949. desc++;
  4950. continue;
  4951. }
  4952. if (!strcmp(desc->name, BLOCK_OPT_COMPAT_LEVEL)) {
  4953. compat = qemu_opt_get(opts, BLOCK_OPT_COMPAT_LEVEL);
  4954. if (!compat) {
  4955. /* preserve default */
  4956. } else if (!strcmp(compat, "0.10") || !strcmp(compat, "v2")) {
  4957. new_version = 2;
  4958. } else if (!strcmp(compat, "1.1") || !strcmp(compat, "v3")) {
  4959. new_version = 3;
  4960. } else {
  4961. error_setg(errp, "Unknown compatibility level %s", compat);
  4962. return -EINVAL;
  4963. }
  4964. } else if (!strcmp(desc->name, BLOCK_OPT_SIZE)) {
  4965. new_size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
  4966. } else if (!strcmp(desc->name, BLOCK_OPT_BACKING_FILE)) {
  4967. backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
  4968. } else if (!strcmp(desc->name, BLOCK_OPT_BACKING_FMT)) {
  4969. backing_format = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
  4970. } else if (g_str_has_prefix(desc->name, "encrypt.")) {
  4971. if (!s->crypto) {
  4972. error_setg(errp,
  4973. "Can't amend encryption options - encryption not present");
  4974. return -EINVAL;
  4975. }
  4976. if (s->crypt_method_header != QCOW_CRYPT_LUKS) {
  4977. error_setg(errp,
  4978. "Only LUKS encryption options can be amended");
  4979. return -ENOTSUP;
  4980. }
  4981. encryption_update = true;
  4982. } else if (!strcmp(desc->name, BLOCK_OPT_LAZY_REFCOUNTS)) {
  4983. lazy_refcounts = qemu_opt_get_bool(opts, BLOCK_OPT_LAZY_REFCOUNTS,
  4984. lazy_refcounts);
  4985. } else if (!strcmp(desc->name, BLOCK_OPT_REFCOUNT_BITS)) {
  4986. refcount_bits = qemu_opt_get_number(opts, BLOCK_OPT_REFCOUNT_BITS,
  4987. refcount_bits);
  4988. if (refcount_bits <= 0 || refcount_bits > 64 ||
  4989. !is_power_of_2(refcount_bits))
  4990. {
  4991. error_setg(errp, "Refcount width must be a power of two and "
  4992. "may not exceed 64 bits");
  4993. return -EINVAL;
  4994. }
  4995. } else if (!strcmp(desc->name, BLOCK_OPT_DATA_FILE)) {
  4996. data_file = qemu_opt_get(opts, BLOCK_OPT_DATA_FILE);
  4997. if (data_file && !has_data_file(bs)) {
  4998. error_setg(errp, "data-file can only be set for images that "
  4999. "use an external data file");
  5000. return -EINVAL;
  5001. }
  5002. } else if (!strcmp(desc->name, BLOCK_OPT_DATA_FILE_RAW)) {
  5003. data_file_raw = qemu_opt_get_bool(opts, BLOCK_OPT_DATA_FILE_RAW,
  5004. data_file_raw);
  5005. if (data_file_raw && !data_file_is_raw(bs)) {
  5006. error_setg(errp, "data-file-raw cannot be set on existing "
  5007. "images");
  5008. return -EINVAL;
  5009. }
  5010. } else {
  5011. /* if this point is reached, this probably means a new option was
  5012. * added without having it covered here */
  5013. abort();
  5014. }
  5015. desc++;
  5016. }
  5017. helper_cb_info = (Qcow2AmendHelperCBInfo){
  5018. .original_status_cb = status_cb,
  5019. .original_cb_opaque = cb_opaque,
  5020. .total_operations = (new_version != old_version)
  5021. + (s->refcount_bits != refcount_bits) +
  5022. (encryption_update == true)
  5023. };
  5024. /* Upgrade first (some features may require compat=1.1) */
  5025. if (new_version > old_version) {
  5026. helper_cb_info.current_operation = QCOW2_UPGRADING;
  5027. ret = qcow2_upgrade(bs, new_version, &qcow2_amend_helper_cb,
  5028. &helper_cb_info, errp);
  5029. if (ret < 0) {
  5030. return ret;
  5031. }
  5032. }
  5033. if (encryption_update) {
  5034. QDict *amend_opts_dict;
  5035. QCryptoBlockAmendOptions *amend_opts;
  5036. helper_cb_info.current_operation = QCOW2_UPDATING_ENCRYPTION;
  5037. amend_opts_dict = qcow2_extract_crypto_opts(opts, "luks", errp);
  5038. if (!amend_opts_dict) {
  5039. return -EINVAL;
  5040. }
  5041. amend_opts = block_crypto_amend_opts_init(amend_opts_dict, errp);
  5042. qobject_unref(amend_opts_dict);
  5043. if (!amend_opts) {
  5044. return -EINVAL;
  5045. }
  5046. ret = qcrypto_block_amend_options(s->crypto,
  5047. qcow2_crypto_hdr_read_func,
  5048. qcow2_crypto_hdr_write_func,
  5049. bs,
  5050. amend_opts,
  5051. force,
  5052. errp);
  5053. qapi_free_QCryptoBlockAmendOptions(amend_opts);
  5054. if (ret < 0) {
  5055. return ret;
  5056. }
  5057. }
  5058. if (s->refcount_bits != refcount_bits) {
  5059. int refcount_order = ctz32(refcount_bits);
  5060. if (new_version < 3 && refcount_bits != 16) {
  5061. error_setg(errp, "Refcount widths other than 16 bits require "
  5062. "compatibility level 1.1 or above (use compat=1.1 or "
  5063. "greater)");
  5064. return -EINVAL;
  5065. }
  5066. helper_cb_info.current_operation = QCOW2_CHANGING_REFCOUNT_ORDER;
  5067. ret = qcow2_change_refcount_order(bs, refcount_order,
  5068. &qcow2_amend_helper_cb,
  5069. &helper_cb_info, errp);
  5070. if (ret < 0) {
  5071. return ret;
  5072. }
  5073. }
  5074. /* data-file-raw blocks backing files, so clear it first if requested */
  5075. if (data_file_raw) {
  5076. s->autoclear_features |= QCOW2_AUTOCLEAR_DATA_FILE_RAW;
  5077. } else {
  5078. s->autoclear_features &= ~QCOW2_AUTOCLEAR_DATA_FILE_RAW;
  5079. }
  5080. if (data_file) {
  5081. g_free(s->image_data_file);
  5082. s->image_data_file = *data_file ? g_strdup(data_file) : NULL;
  5083. }
  5084. ret = qcow2_update_header(bs);
  5085. if (ret < 0) {
  5086. error_setg_errno(errp, -ret, "Failed to update the image header");
  5087. return ret;
  5088. }
  5089. if (backing_file || backing_format) {
  5090. if (g_strcmp0(backing_file, s->image_backing_file) ||
  5091. g_strcmp0(backing_format, s->image_backing_format)) {
  5092. error_setg(errp, "Cannot amend the backing file");
  5093. error_append_hint(errp,
  5094. "You can use 'qemu-img rebase' instead.\n");
  5095. return -EINVAL;
  5096. }
  5097. }
  5098. if (s->use_lazy_refcounts != lazy_refcounts) {
  5099. if (lazy_refcounts) {
  5100. if (new_version < 3) {
  5101. error_setg(errp, "Lazy refcounts only supported with "
  5102. "compatibility level 1.1 and above (use compat=1.1 "
  5103. "or greater)");
  5104. return -EINVAL;
  5105. }
  5106. s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS;
  5107. ret = qcow2_update_header(bs);
  5108. if (ret < 0) {
  5109. s->compatible_features &= ~QCOW2_COMPAT_LAZY_REFCOUNTS;
  5110. error_setg_errno(errp, -ret, "Failed to update the image header");
  5111. return ret;
  5112. }
  5113. s->use_lazy_refcounts = true;
  5114. } else {
  5115. /* make image clean first */
  5116. ret = qcow2_mark_clean(bs);
  5117. if (ret < 0) {
  5118. error_setg_errno(errp, -ret, "Failed to make the image clean");
  5119. return ret;
  5120. }
  5121. /* now disallow lazy refcounts */
  5122. s->compatible_features &= ~QCOW2_COMPAT_LAZY_REFCOUNTS;
  5123. ret = qcow2_update_header(bs);
  5124. if (ret < 0) {
  5125. s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS;
  5126. error_setg_errno(errp, -ret, "Failed to update the image header");
  5127. return ret;
  5128. }
  5129. s->use_lazy_refcounts = false;
  5130. }
  5131. }
  5132. if (new_size) {
  5133. BlockBackend *blk = blk_new_with_bs(bs, BLK_PERM_RESIZE, BLK_PERM_ALL,
  5134. errp);
  5135. if (!blk) {
  5136. return -EPERM;
  5137. }
  5138. /*
  5139. * Amending image options should ensure that the image has
  5140. * exactly the given new values, so pass exact=true here.
  5141. */
  5142. ret = blk_truncate(blk, new_size, true, PREALLOC_MODE_OFF, 0, errp);
  5143. blk_unref(blk);
  5144. if (ret < 0) {
  5145. return ret;
  5146. }
  5147. }
  5148. /* Downgrade last (so unsupported features can be removed before) */
  5149. if (new_version < old_version) {
  5150. helper_cb_info.current_operation = QCOW2_DOWNGRADING;
  5151. ret = qcow2_downgrade(bs, new_version, &qcow2_amend_helper_cb,
  5152. &helper_cb_info, errp);
  5153. if (ret < 0) {
  5154. return ret;
  5155. }
  5156. }
  5157. return 0;
  5158. }
  5159. static int coroutine_fn qcow2_co_amend(BlockDriverState *bs,
  5160. BlockdevAmendOptions *opts,
  5161. bool force,
  5162. Error **errp)
  5163. {
  5164. BlockdevAmendOptionsQcow2 *qopts = &opts->u.qcow2;
  5165. BDRVQcow2State *s = bs->opaque;
  5166. int ret = 0;
  5167. if (qopts->encrypt) {
  5168. if (!s->crypto) {
  5169. error_setg(errp, "image is not encrypted, can't amend");
  5170. return -EOPNOTSUPP;
  5171. }
  5172. if (qopts->encrypt->format != QCRYPTO_BLOCK_FORMAT_LUKS) {
  5173. error_setg(errp,
  5174. "Amend can't be used to change the qcow2 encryption format");
  5175. return -EOPNOTSUPP;
  5176. }
  5177. if (s->crypt_method_header != QCOW_CRYPT_LUKS) {
  5178. error_setg(errp,
  5179. "Only LUKS encryption options can be amended for qcow2 with blockdev-amend");
  5180. return -EOPNOTSUPP;
  5181. }
  5182. ret = qcrypto_block_amend_options(s->crypto,
  5183. qcow2_crypto_hdr_read_func,
  5184. qcow2_crypto_hdr_write_func,
  5185. bs,
  5186. qopts->encrypt,
  5187. force,
  5188. errp);
  5189. }
  5190. return ret;
  5191. }
  5192. /*
  5193. * If offset or size are negative, respectively, they will not be included in
  5194. * the BLOCK_IMAGE_CORRUPTED event emitted.
  5195. * fatal will be ignored for read-only BDS; corruptions found there will always
  5196. * be considered non-fatal.
  5197. */
  5198. void qcow2_signal_corruption(BlockDriverState *bs, bool fatal, int64_t offset,
  5199. int64_t size, const char *message_format, ...)
  5200. {
  5201. BDRVQcow2State *s = bs->opaque;
  5202. const char *node_name;
  5203. char *message;
  5204. va_list ap;
  5205. fatal = fatal && bdrv_is_writable(bs);
  5206. if (s->signaled_corruption &&
  5207. (!fatal || (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT)))
  5208. {
  5209. return;
  5210. }
  5211. va_start(ap, message_format);
  5212. message = g_strdup_vprintf(message_format, ap);
  5213. va_end(ap);
  5214. if (fatal) {
  5215. fprintf(stderr, "qcow2: Marking image as corrupt: %s; further "
  5216. "corruption events will be suppressed\n", message);
  5217. } else {
  5218. fprintf(stderr, "qcow2: Image is corrupt: %s; further non-fatal "
  5219. "corruption events will be suppressed\n", message);
  5220. }
  5221. node_name = bdrv_get_node_name(bs);
  5222. qapi_event_send_block_image_corrupted(bdrv_get_device_name(bs),
  5223. *node_name ? node_name : NULL,
  5224. message, offset >= 0, offset,
  5225. size >= 0, size,
  5226. fatal);
  5227. g_free(message);
  5228. if (fatal) {
  5229. qcow2_mark_corrupt(bs);
  5230. bs->drv = NULL; /* make BDS unusable */
  5231. }
  5232. s->signaled_corruption = true;
  5233. }
  5234. #define QCOW_COMMON_OPTIONS \
  5235. { \
  5236. .name = BLOCK_OPT_SIZE, \
  5237. .type = QEMU_OPT_SIZE, \
  5238. .help = "Virtual disk size" \
  5239. }, \
  5240. { \
  5241. .name = BLOCK_OPT_COMPAT_LEVEL, \
  5242. .type = QEMU_OPT_STRING, \
  5243. .help = "Compatibility level (v2 [0.10] or v3 [1.1])" \
  5244. }, \
  5245. { \
  5246. .name = BLOCK_OPT_BACKING_FILE, \
  5247. .type = QEMU_OPT_STRING, \
  5248. .help = "File name of a base image" \
  5249. }, \
  5250. { \
  5251. .name = BLOCK_OPT_BACKING_FMT, \
  5252. .type = QEMU_OPT_STRING, \
  5253. .help = "Image format of the base image" \
  5254. }, \
  5255. { \
  5256. .name = BLOCK_OPT_DATA_FILE, \
  5257. .type = QEMU_OPT_STRING, \
  5258. .help = "File name of an external data file" \
  5259. }, \
  5260. { \
  5261. .name = BLOCK_OPT_DATA_FILE_RAW, \
  5262. .type = QEMU_OPT_BOOL, \
  5263. .help = "The external data file must stay valid " \
  5264. "as a raw image" \
  5265. }, \
  5266. { \
  5267. .name = BLOCK_OPT_LAZY_REFCOUNTS, \
  5268. .type = QEMU_OPT_BOOL, \
  5269. .help = "Postpone refcount updates", \
  5270. .def_value_str = "off" \
  5271. }, \
  5272. { \
  5273. .name = BLOCK_OPT_REFCOUNT_BITS, \
  5274. .type = QEMU_OPT_NUMBER, \
  5275. .help = "Width of a reference count entry in bits", \
  5276. .def_value_str = "16" \
  5277. }
  5278. static QemuOptsList qcow2_create_opts = {
  5279. .name = "qcow2-create-opts",
  5280. .head = QTAILQ_HEAD_INITIALIZER(qcow2_create_opts.head),
  5281. .desc = {
  5282. { \
  5283. .name = BLOCK_OPT_ENCRYPT, \
  5284. .type = QEMU_OPT_BOOL, \
  5285. .help = "Encrypt the image with format 'aes'. (Deprecated " \
  5286. "in favor of " BLOCK_OPT_ENCRYPT_FORMAT "=aes)", \
  5287. }, \
  5288. { \
  5289. .name = BLOCK_OPT_ENCRYPT_FORMAT, \
  5290. .type = QEMU_OPT_STRING, \
  5291. .help = "Encrypt the image, format choices: 'aes', 'luks'", \
  5292. }, \
  5293. BLOCK_CRYPTO_OPT_DEF_KEY_SECRET("encrypt.", \
  5294. "ID of secret providing qcow AES key or LUKS passphrase"), \
  5295. BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_ALG("encrypt."), \
  5296. BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_MODE("encrypt."), \
  5297. BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_ALG("encrypt."), \
  5298. BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_HASH_ALG("encrypt."), \
  5299. BLOCK_CRYPTO_OPT_DEF_LUKS_HASH_ALG("encrypt."), \
  5300. BLOCK_CRYPTO_OPT_DEF_LUKS_ITER_TIME("encrypt."), \
  5301. { \
  5302. .name = BLOCK_OPT_CLUSTER_SIZE, \
  5303. .type = QEMU_OPT_SIZE, \
  5304. .help = "qcow2 cluster size", \
  5305. .def_value_str = stringify(DEFAULT_CLUSTER_SIZE) \
  5306. }, \
  5307. { \
  5308. .name = BLOCK_OPT_EXTL2, \
  5309. .type = QEMU_OPT_BOOL, \
  5310. .help = "Extended L2 tables", \
  5311. .def_value_str = "off" \
  5312. }, \
  5313. { \
  5314. .name = BLOCK_OPT_PREALLOC, \
  5315. .type = QEMU_OPT_STRING, \
  5316. .help = "Preallocation mode (allowed values: off, " \
  5317. "metadata, falloc, full)" \
  5318. }, \
  5319. { \
  5320. .name = BLOCK_OPT_COMPRESSION_TYPE, \
  5321. .type = QEMU_OPT_STRING, \
  5322. .help = "Compression method used for image cluster " \
  5323. "compression", \
  5324. .def_value_str = "zlib" \
  5325. },
  5326. QCOW_COMMON_OPTIONS,
  5327. { /* end of list */ }
  5328. }
  5329. };
  5330. static QemuOptsList qcow2_amend_opts = {
  5331. .name = "qcow2-amend-opts",
  5332. .head = QTAILQ_HEAD_INITIALIZER(qcow2_amend_opts.head),
  5333. .desc = {
  5334. BLOCK_CRYPTO_OPT_DEF_LUKS_STATE("encrypt."),
  5335. BLOCK_CRYPTO_OPT_DEF_LUKS_KEYSLOT("encrypt."),
  5336. BLOCK_CRYPTO_OPT_DEF_LUKS_OLD_SECRET("encrypt."),
  5337. BLOCK_CRYPTO_OPT_DEF_LUKS_NEW_SECRET("encrypt."),
  5338. BLOCK_CRYPTO_OPT_DEF_LUKS_ITER_TIME("encrypt."),
  5339. QCOW_COMMON_OPTIONS,
  5340. { /* end of list */ }
  5341. }
  5342. };
  5343. static const char *const qcow2_strong_runtime_opts[] = {
  5344. "encrypt." BLOCK_CRYPTO_OPT_QCOW_KEY_SECRET,
  5345. NULL
  5346. };
  5347. BlockDriver bdrv_qcow2 = {
  5348. .format_name = "qcow2",
  5349. .instance_size = sizeof(BDRVQcow2State),
  5350. .bdrv_probe = qcow2_probe,
  5351. .bdrv_open = qcow2_open,
  5352. .bdrv_close = qcow2_close,
  5353. .bdrv_reopen_prepare = qcow2_reopen_prepare,
  5354. .bdrv_reopen_commit = qcow2_reopen_commit,
  5355. .bdrv_reopen_commit_post = qcow2_reopen_commit_post,
  5356. .bdrv_reopen_abort = qcow2_reopen_abort,
  5357. .bdrv_join_options = qcow2_join_options,
  5358. .bdrv_child_perm = bdrv_default_perms,
  5359. .bdrv_co_create_opts = qcow2_co_create_opts,
  5360. .bdrv_co_create = qcow2_co_create,
  5361. .bdrv_has_zero_init = qcow2_has_zero_init,
  5362. .bdrv_co_block_status = qcow2_co_block_status,
  5363. .bdrv_co_preadv_part = qcow2_co_preadv_part,
  5364. .bdrv_co_pwritev_part = qcow2_co_pwritev_part,
  5365. .bdrv_co_flush_to_os = qcow2_co_flush_to_os,
  5366. .bdrv_co_pwrite_zeroes = qcow2_co_pwrite_zeroes,
  5367. .bdrv_co_pdiscard = qcow2_co_pdiscard,
  5368. .bdrv_co_copy_range_from = qcow2_co_copy_range_from,
  5369. .bdrv_co_copy_range_to = qcow2_co_copy_range_to,
  5370. .bdrv_co_truncate = qcow2_co_truncate,
  5371. .bdrv_co_pwritev_compressed_part = qcow2_co_pwritev_compressed_part,
  5372. .bdrv_make_empty = qcow2_make_empty,
  5373. .bdrv_snapshot_create = qcow2_snapshot_create,
  5374. .bdrv_snapshot_goto = qcow2_snapshot_goto,
  5375. .bdrv_snapshot_delete = qcow2_snapshot_delete,
  5376. .bdrv_snapshot_list = qcow2_snapshot_list,
  5377. .bdrv_snapshot_load_tmp = qcow2_snapshot_load_tmp,
  5378. .bdrv_measure = qcow2_measure,
  5379. .bdrv_co_get_info = qcow2_co_get_info,
  5380. .bdrv_get_specific_info = qcow2_get_specific_info,
  5381. .bdrv_co_save_vmstate = qcow2_co_save_vmstate,
  5382. .bdrv_co_load_vmstate = qcow2_co_load_vmstate,
  5383. .is_format = true,
  5384. .supports_backing = true,
  5385. .bdrv_co_change_backing_file = qcow2_co_change_backing_file,
  5386. .bdrv_refresh_limits = qcow2_refresh_limits,
  5387. .bdrv_co_invalidate_cache = qcow2_co_invalidate_cache,
  5388. .bdrv_inactivate = qcow2_inactivate,
  5389. .create_opts = &qcow2_create_opts,
  5390. .amend_opts = &qcow2_amend_opts,
  5391. .strong_runtime_opts = qcow2_strong_runtime_opts,
  5392. .mutable_opts = mutable_opts,
  5393. .bdrv_co_check = qcow2_co_check,
  5394. .bdrv_amend_options = qcow2_amend_options,
  5395. .bdrv_co_amend = qcow2_co_amend,
  5396. .bdrv_detach_aio_context = qcow2_detach_aio_context,
  5397. .bdrv_attach_aio_context = qcow2_attach_aio_context,
  5398. .bdrv_supports_persistent_dirty_bitmap =
  5399. qcow2_supports_persistent_dirty_bitmap,
  5400. .bdrv_co_can_store_new_dirty_bitmap = qcow2_co_can_store_new_dirty_bitmap,
  5401. .bdrv_co_remove_persistent_dirty_bitmap =
  5402. qcow2_co_remove_persistent_dirty_bitmap,
  5403. };
  5404. static void bdrv_qcow2_init(void)
  5405. {
  5406. bdrv_register(&bdrv_qcow2);
  5407. }
  5408. block_init(bdrv_qcow2_init);