migration.c 145 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683
  1. /*
  2. * QEMU live migration
  3. *
  4. * Copyright IBM, Corp. 2008
  5. *
  6. * Authors:
  7. * Anthony Liguori <aliguori@us.ibm.com>
  8. *
  9. * This work is licensed under the terms of the GNU GPL, version 2. See
  10. * the COPYING file in the top-level directory.
  11. *
  12. * Contributions after 2012-01-13 are licensed under the terms of the
  13. * GNU GPL, version 2 or (at your option) any later version.
  14. */
  15. #include "qemu/osdep.h"
  16. #include "qemu/cutils.h"
  17. #include "qemu/error-report.h"
  18. #include "qemu/main-loop.h"
  19. #include "migration/blocker.h"
  20. #include "exec.h"
  21. #include "fd.h"
  22. #include "socket.h"
  23. #include "sysemu/runstate.h"
  24. #include "sysemu/sysemu.h"
  25. #include "sysemu/cpu-throttle.h"
  26. #include "rdma.h"
  27. #include "ram.h"
  28. #include "migration/global_state.h"
  29. #include "migration/misc.h"
  30. #include "migration.h"
  31. #include "savevm.h"
  32. #include "qemu-file.h"
  33. #include "channel.h"
  34. #include "migration/vmstate.h"
  35. #include "block/block.h"
  36. #include "qapi/error.h"
  37. #include "qapi/clone-visitor.h"
  38. #include "qapi/qapi-visit-migration.h"
  39. #include "qapi/qapi-visit-sockets.h"
  40. #include "qapi/qapi-commands-migration.h"
  41. #include "qapi/qapi-events-migration.h"
  42. #include "qapi/qmp/qerror.h"
  43. #include "qapi/qmp/qnull.h"
  44. #include "qemu/rcu.h"
  45. #include "block.h"
  46. #include "postcopy-ram.h"
  47. #include "qemu/thread.h"
  48. #include "trace.h"
  49. #include "exec/target_page.h"
  50. #include "io/channel-buffer.h"
  51. #include "io/channel-tls.h"
  52. #include "migration/colo.h"
  53. #include "hw/boards.h"
  54. #include "hw/qdev-properties.h"
  55. #include "hw/qdev-properties-system.h"
  56. #include "monitor/monitor.h"
  57. #include "net/announce.h"
  58. #include "qemu/queue.h"
  59. #include "multifd.h"
  60. #include "threadinfo.h"
  61. #include "qemu/yank.h"
  62. #include "sysemu/cpus.h"
  63. #include "yank_functions.h"
  64. #include "sysemu/qtest.h"
  65. #include "ui/qemu-spice.h"
  66. #define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */
  67. /* Amount of time to allocate to each "chunk" of bandwidth-throttled
  68. * data. */
  69. #define BUFFER_DELAY 100
  70. #define XFER_LIMIT_RATIO (1000 / BUFFER_DELAY)
  71. /* Time in milliseconds we are allowed to stop the source,
  72. * for sending the last part */
  73. #define DEFAULT_MIGRATE_SET_DOWNTIME 300
  74. /* Maximum migrate downtime set to 2000 seconds */
  75. #define MAX_MIGRATE_DOWNTIME_SECONDS 2000
  76. #define MAX_MIGRATE_DOWNTIME (MAX_MIGRATE_DOWNTIME_SECONDS * 1000)
  77. /* Default compression thread count */
  78. #define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8
  79. /* Default decompression thread count, usually decompression is at
  80. * least 4 times as fast as compression.*/
  81. #define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2
  82. /*0: means nocompress, 1: best speed, ... 9: best compress ratio */
  83. #define DEFAULT_MIGRATE_COMPRESS_LEVEL 1
  84. /* Define default autoconverge cpu throttle migration parameters */
  85. #define DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD 50
  86. #define DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL 20
  87. #define DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT 10
  88. #define DEFAULT_MIGRATE_MAX_CPU_THROTTLE 99
  89. /* Migration XBZRLE default cache size */
  90. #define DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE (64 * 1024 * 1024)
  91. /* The delay time (in ms) between two COLO checkpoints */
  92. #define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY (200 * 100)
  93. #define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2
  94. #define DEFAULT_MIGRATE_MULTIFD_COMPRESSION MULTIFD_COMPRESSION_NONE
  95. /* 0: means nocompress, 1: best speed, ... 9: best compress ratio */
  96. #define DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL 1
  97. /* 0: means nocompress, 1: best speed, ... 20: best compress ratio */
  98. #define DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL 1
  99. /* Background transfer rate for postcopy, 0 means unlimited, note
  100. * that page requests can still exceed this limit.
  101. */
  102. #define DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH 0
  103. /*
  104. * Parameters for self_announce_delay giving a stream of RARP/ARP
  105. * packets after migration.
  106. */
  107. #define DEFAULT_MIGRATE_ANNOUNCE_INITIAL 50
  108. #define DEFAULT_MIGRATE_ANNOUNCE_MAX 550
  109. #define DEFAULT_MIGRATE_ANNOUNCE_ROUNDS 5
  110. #define DEFAULT_MIGRATE_ANNOUNCE_STEP 100
  111. static NotifierList migration_state_notifiers =
  112. NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);
  113. /* Messages sent on the return path from destination to source */
  114. enum mig_rp_message_type {
  115. MIG_RP_MSG_INVALID = 0, /* Must be 0 */
  116. MIG_RP_MSG_SHUT, /* sibling will not send any more RP messages */
  117. MIG_RP_MSG_PONG, /* Response to a PING; data (seq: be32 ) */
  118. MIG_RP_MSG_REQ_PAGES_ID, /* data (start: be64, len: be32, id: string) */
  119. MIG_RP_MSG_REQ_PAGES, /* data (start: be64, len: be32) */
  120. MIG_RP_MSG_RECV_BITMAP, /* send recved_bitmap back to source */
  121. MIG_RP_MSG_RESUME_ACK, /* tell source that we are ready to resume */
  122. MIG_RP_MSG_MAX
  123. };
  124. /* Migration capabilities set */
  125. struct MigrateCapsSet {
  126. int size; /* Capability set size */
  127. MigrationCapability caps[]; /* Variadic array of capabilities */
  128. };
  129. typedef struct MigrateCapsSet MigrateCapsSet;
  130. /* Define and initialize MigrateCapsSet */
  131. #define INITIALIZE_MIGRATE_CAPS_SET(_name, ...) \
  132. MigrateCapsSet _name = { \
  133. .size = sizeof((int []) { __VA_ARGS__ }) / sizeof(int), \
  134. .caps = { __VA_ARGS__ } \
  135. }
  136. /* Background-snapshot compatibility check list */
  137. static const
  138. INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot,
  139. MIGRATION_CAPABILITY_POSTCOPY_RAM,
  140. MIGRATION_CAPABILITY_DIRTY_BITMAPS,
  141. MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME,
  142. MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE,
  143. MIGRATION_CAPABILITY_RETURN_PATH,
  144. MIGRATION_CAPABILITY_MULTIFD,
  145. MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER,
  146. MIGRATION_CAPABILITY_AUTO_CONVERGE,
  147. MIGRATION_CAPABILITY_RELEASE_RAM,
  148. MIGRATION_CAPABILITY_RDMA_PIN_ALL,
  149. MIGRATION_CAPABILITY_COMPRESS,
  150. MIGRATION_CAPABILITY_XBZRLE,
  151. MIGRATION_CAPABILITY_X_COLO,
  152. MIGRATION_CAPABILITY_VALIDATE_UUID,
  153. MIGRATION_CAPABILITY_ZERO_COPY_SEND);
  154. /* When we add fault tolerance, we could have several
  155. migrations at once. For now we don't need to add
  156. dynamic creation of migration */
  157. static MigrationState *current_migration;
  158. static MigrationIncomingState *current_incoming;
  159. static GSList *migration_blockers;
  160. static bool migration_object_check(MigrationState *ms, Error **errp);
  161. static int migration_maybe_pause(MigrationState *s,
  162. int *current_active_state,
  163. int new_state);
  164. static void migrate_fd_cancel(MigrationState *s);
  165. static bool migration_needs_multiple_sockets(void)
  166. {
  167. return migrate_use_multifd() || migrate_postcopy_preempt();
  168. }
  169. static bool uri_supports_multi_channels(const char *uri)
  170. {
  171. return strstart(uri, "tcp:", NULL) || strstart(uri, "unix:", NULL) ||
  172. strstart(uri, "vsock:", NULL);
  173. }
  174. static bool
  175. migration_channels_and_uri_compatible(const char *uri, Error **errp)
  176. {
  177. if (migration_needs_multiple_sockets() &&
  178. !uri_supports_multi_channels(uri)) {
  179. error_setg(errp, "Migration requires multi-channel URIs (e.g. tcp)");
  180. return false;
  181. }
  182. return true;
  183. }
  184. static gint page_request_addr_cmp(gconstpointer ap, gconstpointer bp)
  185. {
  186. uintptr_t a = (uintptr_t) ap, b = (uintptr_t) bp;
  187. return (a > b) - (a < b);
  188. }
  189. void migration_object_init(void)
  190. {
  191. /* This can only be called once. */
  192. assert(!current_migration);
  193. current_migration = MIGRATION_OBJ(object_new(TYPE_MIGRATION));
  194. /*
  195. * Init the migrate incoming object as well no matter whether
  196. * we'll use it or not.
  197. */
  198. assert(!current_incoming);
  199. current_incoming = g_new0(MigrationIncomingState, 1);
  200. current_incoming->state = MIGRATION_STATUS_NONE;
  201. current_incoming->postcopy_remote_fds =
  202. g_array_new(FALSE, TRUE, sizeof(struct PostCopyFD));
  203. qemu_mutex_init(&current_incoming->rp_mutex);
  204. qemu_mutex_init(&current_incoming->postcopy_prio_thread_mutex);
  205. qemu_event_init(&current_incoming->main_thread_load_event, false);
  206. qemu_sem_init(&current_incoming->postcopy_pause_sem_dst, 0);
  207. qemu_sem_init(&current_incoming->postcopy_pause_sem_fault, 0);
  208. qemu_sem_init(&current_incoming->postcopy_pause_sem_fast_load, 0);
  209. qemu_sem_init(&current_incoming->postcopy_qemufile_dst_done, 0);
  210. qemu_mutex_init(&current_incoming->page_request_mutex);
  211. current_incoming->page_requested = g_tree_new(page_request_addr_cmp);
  212. migration_object_check(current_migration, &error_fatal);
  213. blk_mig_init();
  214. ram_mig_init();
  215. dirty_bitmap_mig_init();
  216. }
  217. void migration_cancel(const Error *error)
  218. {
  219. if (error) {
  220. migrate_set_error(current_migration, error);
  221. }
  222. migrate_fd_cancel(current_migration);
  223. }
  224. void migration_shutdown(void)
  225. {
  226. /*
  227. * When the QEMU main thread exit, the COLO thread
  228. * may wait a semaphore. So, we should wakeup the
  229. * COLO thread before migration shutdown.
  230. */
  231. colo_shutdown();
  232. /*
  233. * Cancel the current migration - that will (eventually)
  234. * stop the migration using this structure
  235. */
  236. migration_cancel(NULL);
  237. object_unref(OBJECT(current_migration));
  238. /*
  239. * Cancel outgoing migration of dirty bitmaps. It should
  240. * at least unref used block nodes.
  241. */
  242. dirty_bitmap_mig_cancel_outgoing();
  243. /*
  244. * Cancel incoming migration of dirty bitmaps. Dirty bitmaps
  245. * are non-critical data, and their loss never considered as
  246. * something serious.
  247. */
  248. dirty_bitmap_mig_cancel_incoming();
  249. }
  250. /* For outgoing */
  251. MigrationState *migrate_get_current(void)
  252. {
  253. /* This can only be called after the object created. */
  254. assert(current_migration);
  255. return current_migration;
  256. }
  257. MigrationIncomingState *migration_incoming_get_current(void)
  258. {
  259. assert(current_incoming);
  260. return current_incoming;
  261. }
  262. void migration_incoming_transport_cleanup(MigrationIncomingState *mis)
  263. {
  264. if (mis->socket_address_list) {
  265. qapi_free_SocketAddressList(mis->socket_address_list);
  266. mis->socket_address_list = NULL;
  267. }
  268. if (mis->transport_cleanup) {
  269. mis->transport_cleanup(mis->transport_data);
  270. mis->transport_data = mis->transport_cleanup = NULL;
  271. }
  272. }
  273. void migration_incoming_state_destroy(void)
  274. {
  275. struct MigrationIncomingState *mis = migration_incoming_get_current();
  276. multifd_load_cleanup();
  277. if (mis->to_src_file) {
  278. /* Tell source that we are done */
  279. migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0);
  280. qemu_fclose(mis->to_src_file);
  281. mis->to_src_file = NULL;
  282. }
  283. if (mis->from_src_file) {
  284. migration_ioc_unregister_yank_from_file(mis->from_src_file);
  285. qemu_fclose(mis->from_src_file);
  286. mis->from_src_file = NULL;
  287. }
  288. if (mis->postcopy_remote_fds) {
  289. g_array_free(mis->postcopy_remote_fds, TRUE);
  290. mis->postcopy_remote_fds = NULL;
  291. }
  292. migration_incoming_transport_cleanup(mis);
  293. qemu_event_reset(&mis->main_thread_load_event);
  294. if (mis->page_requested) {
  295. g_tree_destroy(mis->page_requested);
  296. mis->page_requested = NULL;
  297. }
  298. if (mis->postcopy_qemufile_dst) {
  299. migration_ioc_unregister_yank_from_file(mis->postcopy_qemufile_dst);
  300. qemu_fclose(mis->postcopy_qemufile_dst);
  301. mis->postcopy_qemufile_dst = NULL;
  302. }
  303. yank_unregister_instance(MIGRATION_YANK_INSTANCE);
  304. }
  305. static void migrate_generate_event(int new_state)
  306. {
  307. if (migrate_use_events()) {
  308. qapi_event_send_migration(new_state);
  309. }
  310. }
  311. static bool migrate_late_block_activate(void)
  312. {
  313. MigrationState *s;
  314. s = migrate_get_current();
  315. return s->enabled_capabilities[
  316. MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE];
  317. }
  318. /*
  319. * Send a message on the return channel back to the source
  320. * of the migration.
  321. */
  322. static int migrate_send_rp_message(MigrationIncomingState *mis,
  323. enum mig_rp_message_type message_type,
  324. uint16_t len, void *data)
  325. {
  326. int ret = 0;
  327. trace_migrate_send_rp_message((int)message_type, len);
  328. QEMU_LOCK_GUARD(&mis->rp_mutex);
  329. /*
  330. * It's possible that the file handle got lost due to network
  331. * failures.
  332. */
  333. if (!mis->to_src_file) {
  334. ret = -EIO;
  335. return ret;
  336. }
  337. qemu_put_be16(mis->to_src_file, (unsigned int)message_type);
  338. qemu_put_be16(mis->to_src_file, len);
  339. qemu_put_buffer(mis->to_src_file, data, len);
  340. qemu_fflush(mis->to_src_file);
  341. /* It's possible that qemu file got error during sending */
  342. ret = qemu_file_get_error(mis->to_src_file);
  343. return ret;
  344. }
  345. /* Request one page from the source VM at the given start address.
  346. * rb: the RAMBlock to request the page in
  347. * Start: Address offset within the RB
  348. * Len: Length in bytes required - must be a multiple of pagesize
  349. */
  350. int migrate_send_rp_message_req_pages(MigrationIncomingState *mis,
  351. RAMBlock *rb, ram_addr_t start)
  352. {
  353. uint8_t bufc[12 + 1 + 255]; /* start (8), len (4), rbname up to 256 */
  354. size_t msglen = 12; /* start + len */
  355. size_t len = qemu_ram_pagesize(rb);
  356. enum mig_rp_message_type msg_type;
  357. const char *rbname;
  358. int rbname_len;
  359. *(uint64_t *)bufc = cpu_to_be64((uint64_t)start);
  360. *(uint32_t *)(bufc + 8) = cpu_to_be32((uint32_t)len);
  361. /*
  362. * We maintain the last ramblock that we requested for page. Note that we
  363. * don't need locking because this function will only be called within the
  364. * postcopy ram fault thread.
  365. */
  366. if (rb != mis->last_rb) {
  367. mis->last_rb = rb;
  368. rbname = qemu_ram_get_idstr(rb);
  369. rbname_len = strlen(rbname);
  370. assert(rbname_len < 256);
  371. bufc[msglen++] = rbname_len;
  372. memcpy(bufc + msglen, rbname, rbname_len);
  373. msglen += rbname_len;
  374. msg_type = MIG_RP_MSG_REQ_PAGES_ID;
  375. } else {
  376. msg_type = MIG_RP_MSG_REQ_PAGES;
  377. }
  378. return migrate_send_rp_message(mis, msg_type, msglen, bufc);
  379. }
  380. int migrate_send_rp_req_pages(MigrationIncomingState *mis,
  381. RAMBlock *rb, ram_addr_t start, uint64_t haddr)
  382. {
  383. void *aligned = (void *)(uintptr_t)ROUND_DOWN(haddr, qemu_ram_pagesize(rb));
  384. bool received = false;
  385. WITH_QEMU_LOCK_GUARD(&mis->page_request_mutex) {
  386. received = ramblock_recv_bitmap_test_byte_offset(rb, start);
  387. if (!received && !g_tree_lookup(mis->page_requested, aligned)) {
  388. /*
  389. * The page has not been received, and it's not yet in the page
  390. * request list. Queue it. Set the value of element to 1, so that
  391. * things like g_tree_lookup() will return TRUE (1) when found.
  392. */
  393. g_tree_insert(mis->page_requested, aligned, (gpointer)1);
  394. mis->page_requested_count++;
  395. trace_postcopy_page_req_add(aligned, mis->page_requested_count);
  396. }
  397. }
  398. /*
  399. * If the page is there, skip sending the message. We don't even need the
  400. * lock because as long as the page arrived, it'll be there forever.
  401. */
  402. if (received) {
  403. return 0;
  404. }
  405. return migrate_send_rp_message_req_pages(mis, rb, start);
  406. }
  407. static bool migration_colo_enabled;
  408. bool migration_incoming_colo_enabled(void)
  409. {
  410. return migration_colo_enabled;
  411. }
  412. void migration_incoming_disable_colo(void)
  413. {
  414. ram_block_discard_disable(false);
  415. migration_colo_enabled = false;
  416. }
  417. int migration_incoming_enable_colo(void)
  418. {
  419. if (ram_block_discard_disable(true)) {
  420. error_report("COLO: cannot disable RAM discard");
  421. return -EBUSY;
  422. }
  423. migration_colo_enabled = true;
  424. return 0;
  425. }
  426. void migrate_add_address(SocketAddress *address)
  427. {
  428. MigrationIncomingState *mis = migration_incoming_get_current();
  429. QAPI_LIST_PREPEND(mis->socket_address_list,
  430. QAPI_CLONE(SocketAddress, address));
  431. }
  432. static void qemu_start_incoming_migration(const char *uri, Error **errp)
  433. {
  434. const char *p = NULL;
  435. /* URI is not suitable for migration? */
  436. if (!migration_channels_and_uri_compatible(uri, errp)) {
  437. return;
  438. }
  439. qapi_event_send_migration(MIGRATION_STATUS_SETUP);
  440. if (strstart(uri, "tcp:", &p) ||
  441. strstart(uri, "unix:", NULL) ||
  442. strstart(uri, "vsock:", NULL)) {
  443. socket_start_incoming_migration(p ? p : uri, errp);
  444. #ifdef CONFIG_RDMA
  445. } else if (strstart(uri, "rdma:", &p)) {
  446. rdma_start_incoming_migration(p, errp);
  447. #endif
  448. } else if (strstart(uri, "exec:", &p)) {
  449. exec_start_incoming_migration(p, errp);
  450. } else if (strstart(uri, "fd:", &p)) {
  451. fd_start_incoming_migration(p, errp);
  452. } else {
  453. error_setg(errp, "unknown migration protocol: %s", uri);
  454. }
  455. }
  456. static void process_incoming_migration_bh(void *opaque)
  457. {
  458. Error *local_err = NULL;
  459. MigrationIncomingState *mis = opaque;
  460. /* If capability late_block_activate is set:
  461. * Only fire up the block code now if we're going to restart the
  462. * VM, else 'cont' will do it.
  463. * This causes file locking to happen; so we don't want it to happen
  464. * unless we really are starting the VM.
  465. */
  466. if (!migrate_late_block_activate() ||
  467. (autostart && (!global_state_received() ||
  468. global_state_get_runstate() == RUN_STATE_RUNNING))) {
  469. /* Make sure all file formats throw away their mutable metadata.
  470. * If we get an error here, just don't restart the VM yet. */
  471. bdrv_activate_all(&local_err);
  472. if (local_err) {
  473. error_report_err(local_err);
  474. local_err = NULL;
  475. autostart = false;
  476. }
  477. }
  478. /*
  479. * This must happen after all error conditions are dealt with and
  480. * we're sure the VM is going to be running on this host.
  481. */
  482. qemu_announce_self(&mis->announce_timer, migrate_announce_params());
  483. multifd_load_shutdown();
  484. dirty_bitmap_mig_before_vm_start();
  485. if (!global_state_received() ||
  486. global_state_get_runstate() == RUN_STATE_RUNNING) {
  487. if (autostart) {
  488. vm_start();
  489. } else {
  490. runstate_set(RUN_STATE_PAUSED);
  491. }
  492. } else if (migration_incoming_colo_enabled()) {
  493. migration_incoming_disable_colo();
  494. vm_start();
  495. } else {
  496. runstate_set(global_state_get_runstate());
  497. }
  498. /*
  499. * This must happen after any state changes since as soon as an external
  500. * observer sees this event they might start to prod at the VM assuming
  501. * it's ready to use.
  502. */
  503. migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
  504. MIGRATION_STATUS_COMPLETED);
  505. qemu_bh_delete(mis->bh);
  506. migration_incoming_state_destroy();
  507. }
  508. static void coroutine_fn
  509. process_incoming_migration_co(void *opaque)
  510. {
  511. MigrationIncomingState *mis = migration_incoming_get_current();
  512. PostcopyState ps;
  513. int ret;
  514. Error *local_err = NULL;
  515. assert(mis->from_src_file);
  516. mis->migration_incoming_co = qemu_coroutine_self();
  517. mis->largest_page_size = qemu_ram_pagesize_largest();
  518. postcopy_state_set(POSTCOPY_INCOMING_NONE);
  519. migrate_set_state(&mis->state, MIGRATION_STATUS_NONE,
  520. MIGRATION_STATUS_ACTIVE);
  521. ret = qemu_loadvm_state(mis->from_src_file);
  522. ps = postcopy_state_get();
  523. trace_process_incoming_migration_co_end(ret, ps);
  524. if (ps != POSTCOPY_INCOMING_NONE) {
  525. if (ps == POSTCOPY_INCOMING_ADVISE) {
  526. /*
  527. * Where a migration had postcopy enabled (and thus went to advise)
  528. * but managed to complete within the precopy period, we can use
  529. * the normal exit.
  530. */
  531. postcopy_ram_incoming_cleanup(mis);
  532. } else if (ret >= 0) {
  533. /*
  534. * Postcopy was started, cleanup should happen at the end of the
  535. * postcopy thread.
  536. */
  537. trace_process_incoming_migration_co_postcopy_end_main();
  538. return;
  539. }
  540. /* Else if something went wrong then just fall out of the normal exit */
  541. }
  542. /* we get COLO info, and know if we are in COLO mode */
  543. if (!ret && migration_incoming_colo_enabled()) {
  544. /* Make sure all file formats throw away their mutable metadata */
  545. bdrv_activate_all(&local_err);
  546. if (local_err) {
  547. error_report_err(local_err);
  548. goto fail;
  549. }
  550. qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming",
  551. colo_process_incoming_thread, mis, QEMU_THREAD_JOINABLE);
  552. mis->have_colo_incoming_thread = true;
  553. qemu_coroutine_yield();
  554. qemu_mutex_unlock_iothread();
  555. /* Wait checkpoint incoming thread exit before free resource */
  556. qemu_thread_join(&mis->colo_incoming_thread);
  557. qemu_mutex_lock_iothread();
  558. /* We hold the global iothread lock, so it is safe here */
  559. colo_release_ram_cache();
  560. }
  561. if (ret < 0) {
  562. error_report("load of migration failed: %s", strerror(-ret));
  563. goto fail;
  564. }
  565. mis->bh = qemu_bh_new(process_incoming_migration_bh, mis);
  566. qemu_bh_schedule(mis->bh);
  567. mis->migration_incoming_co = NULL;
  568. return;
  569. fail:
  570. local_err = NULL;
  571. migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
  572. MIGRATION_STATUS_FAILED);
  573. qemu_fclose(mis->from_src_file);
  574. multifd_load_cleanup();
  575. exit(EXIT_FAILURE);
  576. }
  577. /**
  578. * migration_incoming_setup: Setup incoming migration
  579. * @f: file for main migration channel
  580. * @errp: where to put errors
  581. *
  582. * Returns: %true on success, %false on error.
  583. */
  584. static bool migration_incoming_setup(QEMUFile *f, Error **errp)
  585. {
  586. MigrationIncomingState *mis = migration_incoming_get_current();
  587. if (!mis->from_src_file) {
  588. mis->from_src_file = f;
  589. }
  590. qemu_file_set_blocking(f, false);
  591. return true;
  592. }
  593. void migration_incoming_process(void)
  594. {
  595. Coroutine *co = qemu_coroutine_create(process_incoming_migration_co, NULL);
  596. qemu_coroutine_enter(co);
  597. }
  598. /* Returns true if recovered from a paused migration, otherwise false */
  599. static bool postcopy_try_recover(void)
  600. {
  601. MigrationIncomingState *mis = migration_incoming_get_current();
  602. if (mis->state == MIGRATION_STATUS_POSTCOPY_PAUSED) {
  603. /* Resumed from a paused postcopy migration */
  604. /* This should be set already in migration_incoming_setup() */
  605. assert(mis->from_src_file);
  606. /* Postcopy has standalone thread to do vm load */
  607. qemu_file_set_blocking(mis->from_src_file, true);
  608. /* Re-configure the return path */
  609. mis->to_src_file = qemu_file_get_return_path(mis->from_src_file);
  610. migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_PAUSED,
  611. MIGRATION_STATUS_POSTCOPY_RECOVER);
  612. /*
  613. * Here, we only wake up the main loading thread (while the
  614. * rest threads will still be waiting), so that we can receive
  615. * commands from source now, and answer it if needed. The
  616. * rest threads will be woken up afterwards until we are sure
  617. * that source is ready to reply to page requests.
  618. */
  619. qemu_sem_post(&mis->postcopy_pause_sem_dst);
  620. return true;
  621. }
  622. return false;
  623. }
  624. void migration_fd_process_incoming(QEMUFile *f, Error **errp)
  625. {
  626. if (!migration_incoming_setup(f, errp)) {
  627. return;
  628. }
  629. if (postcopy_try_recover()) {
  630. return;
  631. }
  632. migration_incoming_process();
  633. }
  634. /*
  635. * Returns true when we want to start a new incoming migration process,
  636. * false otherwise.
  637. */
  638. static bool migration_should_start_incoming(bool main_channel)
  639. {
  640. /* Multifd doesn't start unless all channels are established */
  641. if (migrate_use_multifd()) {
  642. return migration_has_all_channels();
  643. }
  644. /* Preempt channel only starts when the main channel is created */
  645. if (migrate_postcopy_preempt()) {
  646. return main_channel;
  647. }
  648. /*
  649. * For all the rest types of migration, we should only reach here when
  650. * it's the main channel that's being created, and we should always
  651. * proceed with this channel.
  652. */
  653. assert(main_channel);
  654. return true;
  655. }
  656. void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
  657. {
  658. MigrationIncomingState *mis = migration_incoming_get_current();
  659. Error *local_err = NULL;
  660. QEMUFile *f;
  661. bool default_channel = true;
  662. uint32_t channel_magic = 0;
  663. int ret = 0;
  664. if (migrate_use_multifd() && !migrate_postcopy_ram() &&
  665. qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) {
  666. /*
  667. * With multiple channels, it is possible that we receive channels
  668. * out of order on destination side, causing incorrect mapping of
  669. * source channels on destination side. Check channel MAGIC to
  670. * decide type of channel. Please note this is best effort, postcopy
  671. * preempt channel does not send any magic number so avoid it for
  672. * postcopy live migration. Also tls live migration already does
  673. * tls handshake while initializing main channel so with tls this
  674. * issue is not possible.
  675. */
  676. ret = migration_channel_read_peek(ioc, (void *)&channel_magic,
  677. sizeof(channel_magic), &local_err);
  678. if (ret != 0) {
  679. error_propagate(errp, local_err);
  680. return;
  681. }
  682. default_channel = (channel_magic == cpu_to_be32(QEMU_VM_FILE_MAGIC));
  683. } else {
  684. default_channel = !mis->from_src_file;
  685. }
  686. if (multifd_load_setup(errp) != 0) {
  687. error_setg(errp, "Failed to setup multifd channels");
  688. return;
  689. }
  690. if (default_channel) {
  691. f = qemu_file_new_input(ioc);
  692. if (!migration_incoming_setup(f, errp)) {
  693. return;
  694. }
  695. } else {
  696. /* Multiple connections */
  697. assert(migration_needs_multiple_sockets());
  698. if (migrate_use_multifd()) {
  699. multifd_recv_new_channel(ioc, &local_err);
  700. } else {
  701. assert(migrate_postcopy_preempt());
  702. f = qemu_file_new_input(ioc);
  703. postcopy_preempt_new_channel(mis, f);
  704. }
  705. if (local_err) {
  706. error_propagate(errp, local_err);
  707. return;
  708. }
  709. }
  710. if (migration_should_start_incoming(default_channel)) {
  711. /* If it's a recovery, we're done */
  712. if (postcopy_try_recover()) {
  713. return;
  714. }
  715. migration_incoming_process();
  716. }
  717. }
  718. /**
  719. * @migration_has_all_channels: We have received all channels that we need
  720. *
  721. * Returns true when we have got connections to all the channels that
  722. * we need for migration.
  723. */
  724. bool migration_has_all_channels(void)
  725. {
  726. MigrationIncomingState *mis = migration_incoming_get_current();
  727. if (!mis->from_src_file) {
  728. return false;
  729. }
  730. if (migrate_use_multifd()) {
  731. return multifd_recv_all_channels_created();
  732. }
  733. if (migrate_postcopy_preempt()) {
  734. return mis->postcopy_qemufile_dst != NULL;
  735. }
  736. return true;
  737. }
  738. /*
  739. * Send a 'SHUT' message on the return channel with the given value
  740. * to indicate that we've finished with the RP. Non-0 value indicates
  741. * error.
  742. */
  743. void migrate_send_rp_shut(MigrationIncomingState *mis,
  744. uint32_t value)
  745. {
  746. uint32_t buf;
  747. buf = cpu_to_be32(value);
  748. migrate_send_rp_message(mis, MIG_RP_MSG_SHUT, sizeof(buf), &buf);
  749. }
  750. /*
  751. * Send a 'PONG' message on the return channel with the given value
  752. * (normally in response to a 'PING')
  753. */
  754. void migrate_send_rp_pong(MigrationIncomingState *mis,
  755. uint32_t value)
  756. {
  757. uint32_t buf;
  758. buf = cpu_to_be32(value);
  759. migrate_send_rp_message(mis, MIG_RP_MSG_PONG, sizeof(buf), &buf);
  760. }
  761. void migrate_send_rp_recv_bitmap(MigrationIncomingState *mis,
  762. char *block_name)
  763. {
  764. char buf[512];
  765. int len;
  766. int64_t res;
  767. /*
  768. * First, we send the header part. It contains only the len of
  769. * idstr, and the idstr itself.
  770. */
  771. len = strlen(block_name);
  772. buf[0] = len;
  773. memcpy(buf + 1, block_name, len);
  774. if (mis->state != MIGRATION_STATUS_POSTCOPY_RECOVER) {
  775. error_report("%s: MSG_RP_RECV_BITMAP only used for recovery",
  776. __func__);
  777. return;
  778. }
  779. migrate_send_rp_message(mis, MIG_RP_MSG_RECV_BITMAP, len + 1, buf);
  780. /*
  781. * Next, we dump the received bitmap to the stream.
  782. *
  783. * TODO: currently we are safe since we are the only one that is
  784. * using the to_src_file handle (fault thread is still paused),
  785. * and it's ok even not taking the mutex. However the best way is
  786. * to take the lock before sending the message header, and release
  787. * the lock after sending the bitmap.
  788. */
  789. qemu_mutex_lock(&mis->rp_mutex);
  790. res = ramblock_recv_bitmap_send(mis->to_src_file, block_name);
  791. qemu_mutex_unlock(&mis->rp_mutex);
  792. trace_migrate_send_rp_recv_bitmap(block_name, res);
  793. }
  794. void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value)
  795. {
  796. uint32_t buf;
  797. buf = cpu_to_be32(value);
  798. migrate_send_rp_message(mis, MIG_RP_MSG_RESUME_ACK, sizeof(buf), &buf);
  799. }
  800. MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp)
  801. {
  802. MigrationCapabilityStatusList *head = NULL, **tail = &head;
  803. MigrationCapabilityStatus *caps;
  804. MigrationState *s = migrate_get_current();
  805. int i;
  806. for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
  807. #ifndef CONFIG_LIVE_BLOCK_MIGRATION
  808. if (i == MIGRATION_CAPABILITY_BLOCK) {
  809. continue;
  810. }
  811. #endif
  812. caps = g_malloc0(sizeof(*caps));
  813. caps->capability = i;
  814. caps->state = s->enabled_capabilities[i];
  815. QAPI_LIST_APPEND(tail, caps);
  816. }
  817. return head;
  818. }
  819. MigrationParameters *qmp_query_migrate_parameters(Error **errp)
  820. {
  821. MigrationParameters *params;
  822. MigrationState *s = migrate_get_current();
  823. /* TODO use QAPI_CLONE() instead of duplicating it inline */
  824. params = g_malloc0(sizeof(*params));
  825. params->has_compress_level = true;
  826. params->compress_level = s->parameters.compress_level;
  827. params->has_compress_threads = true;
  828. params->compress_threads = s->parameters.compress_threads;
  829. params->has_compress_wait_thread = true;
  830. params->compress_wait_thread = s->parameters.compress_wait_thread;
  831. params->has_decompress_threads = true;
  832. params->decompress_threads = s->parameters.decompress_threads;
  833. params->has_throttle_trigger_threshold = true;
  834. params->throttle_trigger_threshold = s->parameters.throttle_trigger_threshold;
  835. params->has_cpu_throttle_initial = true;
  836. params->cpu_throttle_initial = s->parameters.cpu_throttle_initial;
  837. params->has_cpu_throttle_increment = true;
  838. params->cpu_throttle_increment = s->parameters.cpu_throttle_increment;
  839. params->has_cpu_throttle_tailslow = true;
  840. params->cpu_throttle_tailslow = s->parameters.cpu_throttle_tailslow;
  841. params->tls_creds = g_strdup(s->parameters.tls_creds);
  842. params->tls_hostname = g_strdup(s->parameters.tls_hostname);
  843. params->tls_authz = g_strdup(s->parameters.tls_authz ?
  844. s->parameters.tls_authz : "");
  845. params->has_max_bandwidth = true;
  846. params->max_bandwidth = s->parameters.max_bandwidth;
  847. params->has_downtime_limit = true;
  848. params->downtime_limit = s->parameters.downtime_limit;
  849. params->has_x_checkpoint_delay = true;
  850. params->x_checkpoint_delay = s->parameters.x_checkpoint_delay;
  851. params->has_block_incremental = true;
  852. params->block_incremental = s->parameters.block_incremental;
  853. params->has_multifd_channels = true;
  854. params->multifd_channels = s->parameters.multifd_channels;
  855. params->has_multifd_compression = true;
  856. params->multifd_compression = s->parameters.multifd_compression;
  857. params->has_multifd_zlib_level = true;
  858. params->multifd_zlib_level = s->parameters.multifd_zlib_level;
  859. params->has_multifd_zstd_level = true;
  860. params->multifd_zstd_level = s->parameters.multifd_zstd_level;
  861. params->has_xbzrle_cache_size = true;
  862. params->xbzrle_cache_size = s->parameters.xbzrle_cache_size;
  863. params->has_max_postcopy_bandwidth = true;
  864. params->max_postcopy_bandwidth = s->parameters.max_postcopy_bandwidth;
  865. params->has_max_cpu_throttle = true;
  866. params->max_cpu_throttle = s->parameters.max_cpu_throttle;
  867. params->has_announce_initial = true;
  868. params->announce_initial = s->parameters.announce_initial;
  869. params->has_announce_max = true;
  870. params->announce_max = s->parameters.announce_max;
  871. params->has_announce_rounds = true;
  872. params->announce_rounds = s->parameters.announce_rounds;
  873. params->has_announce_step = true;
  874. params->announce_step = s->parameters.announce_step;
  875. if (s->parameters.has_block_bitmap_mapping) {
  876. params->has_block_bitmap_mapping = true;
  877. params->block_bitmap_mapping =
  878. QAPI_CLONE(BitmapMigrationNodeAliasList,
  879. s->parameters.block_bitmap_mapping);
  880. }
  881. return params;
  882. }
  883. void qmp_client_migrate_info(const char *protocol, const char *hostname,
  884. bool has_port, int64_t port,
  885. bool has_tls_port, int64_t tls_port,
  886. const char *cert_subject,
  887. Error **errp)
  888. {
  889. if (strcmp(protocol, "spice") == 0) {
  890. if (!qemu_using_spice(errp)) {
  891. return;
  892. }
  893. if (!has_port && !has_tls_port) {
  894. error_setg(errp, QERR_MISSING_PARAMETER, "port/tls-port");
  895. return;
  896. }
  897. if (qemu_spice.migrate_info(hostname,
  898. has_port ? port : -1,
  899. has_tls_port ? tls_port : -1,
  900. cert_subject)) {
  901. error_setg(errp, "Could not set up display for migration");
  902. return;
  903. }
  904. return;
  905. }
  906. error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "protocol", "'spice'");
  907. }
  908. AnnounceParameters *migrate_announce_params(void)
  909. {
  910. static AnnounceParameters ap;
  911. MigrationState *s = migrate_get_current();
  912. ap.initial = s->parameters.announce_initial;
  913. ap.max = s->parameters.announce_max;
  914. ap.rounds = s->parameters.announce_rounds;
  915. ap.step = s->parameters.announce_step;
  916. return &ap;
  917. }
  918. /*
  919. * Return true if we're already in the middle of a migration
  920. * (i.e. any of the active or setup states)
  921. */
  922. bool migration_is_setup_or_active(int state)
  923. {
  924. switch (state) {
  925. case MIGRATION_STATUS_ACTIVE:
  926. case MIGRATION_STATUS_POSTCOPY_ACTIVE:
  927. case MIGRATION_STATUS_POSTCOPY_PAUSED:
  928. case MIGRATION_STATUS_POSTCOPY_RECOVER:
  929. case MIGRATION_STATUS_SETUP:
  930. case MIGRATION_STATUS_PRE_SWITCHOVER:
  931. case MIGRATION_STATUS_DEVICE:
  932. case MIGRATION_STATUS_WAIT_UNPLUG:
  933. case MIGRATION_STATUS_COLO:
  934. return true;
  935. default:
  936. return false;
  937. }
  938. }
  939. bool migration_is_running(int state)
  940. {
  941. switch (state) {
  942. case MIGRATION_STATUS_ACTIVE:
  943. case MIGRATION_STATUS_POSTCOPY_ACTIVE:
  944. case MIGRATION_STATUS_POSTCOPY_PAUSED:
  945. case MIGRATION_STATUS_POSTCOPY_RECOVER:
  946. case MIGRATION_STATUS_SETUP:
  947. case MIGRATION_STATUS_PRE_SWITCHOVER:
  948. case MIGRATION_STATUS_DEVICE:
  949. case MIGRATION_STATUS_WAIT_UNPLUG:
  950. case MIGRATION_STATUS_CANCELLING:
  951. return true;
  952. default:
  953. return false;
  954. }
  955. }
  956. static bool migrate_show_downtime(MigrationState *s)
  957. {
  958. return (s->state == MIGRATION_STATUS_COMPLETED) || migration_in_postcopy();
  959. }
  960. static void populate_time_info(MigrationInfo *info, MigrationState *s)
  961. {
  962. info->has_status = true;
  963. info->has_setup_time = true;
  964. info->setup_time = s->setup_time;
  965. if (s->state == MIGRATION_STATUS_COMPLETED) {
  966. info->has_total_time = true;
  967. info->total_time = s->total_time;
  968. } else {
  969. info->has_total_time = true;
  970. info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) -
  971. s->start_time;
  972. }
  973. if (migrate_show_downtime(s)) {
  974. info->has_downtime = true;
  975. info->downtime = s->downtime;
  976. } else {
  977. info->has_expected_downtime = true;
  978. info->expected_downtime = s->expected_downtime;
  979. }
  980. }
  981. static void populate_ram_info(MigrationInfo *info, MigrationState *s)
  982. {
  983. size_t page_size = qemu_target_page_size();
  984. info->ram = g_malloc0(sizeof(*info->ram));
  985. info->ram->transferred = stat64_get(&ram_atomic_counters.transferred);
  986. info->ram->total = ram_bytes_total();
  987. info->ram->duplicate = stat64_get(&ram_atomic_counters.duplicate);
  988. /* legacy value. It is not used anymore */
  989. info->ram->skipped = 0;
  990. info->ram->normal = stat64_get(&ram_atomic_counters.normal);
  991. info->ram->normal_bytes = info->ram->normal * page_size;
  992. info->ram->mbps = s->mbps;
  993. info->ram->dirty_sync_count = ram_counters.dirty_sync_count;
  994. info->ram->dirty_sync_missed_zero_copy =
  995. ram_counters.dirty_sync_missed_zero_copy;
  996. info->ram->postcopy_requests = ram_counters.postcopy_requests;
  997. info->ram->page_size = page_size;
  998. info->ram->multifd_bytes = ram_counters.multifd_bytes;
  999. info->ram->pages_per_second = s->pages_per_second;
  1000. info->ram->precopy_bytes = ram_counters.precopy_bytes;
  1001. info->ram->downtime_bytes = ram_counters.downtime_bytes;
  1002. info->ram->postcopy_bytes = stat64_get(&ram_atomic_counters.postcopy_bytes);
  1003. if (migrate_use_xbzrle()) {
  1004. info->xbzrle_cache = g_malloc0(sizeof(*info->xbzrle_cache));
  1005. info->xbzrle_cache->cache_size = migrate_xbzrle_cache_size();
  1006. info->xbzrle_cache->bytes = xbzrle_counters.bytes;
  1007. info->xbzrle_cache->pages = xbzrle_counters.pages;
  1008. info->xbzrle_cache->cache_miss = xbzrle_counters.cache_miss;
  1009. info->xbzrle_cache->cache_miss_rate = xbzrle_counters.cache_miss_rate;
  1010. info->xbzrle_cache->encoding_rate = xbzrle_counters.encoding_rate;
  1011. info->xbzrle_cache->overflow = xbzrle_counters.overflow;
  1012. }
  1013. if (migrate_use_compression()) {
  1014. info->compression = g_malloc0(sizeof(*info->compression));
  1015. info->compression->pages = compression_counters.pages;
  1016. info->compression->busy = compression_counters.busy;
  1017. info->compression->busy_rate = compression_counters.busy_rate;
  1018. info->compression->compressed_size =
  1019. compression_counters.compressed_size;
  1020. info->compression->compression_rate =
  1021. compression_counters.compression_rate;
  1022. }
  1023. if (cpu_throttle_active()) {
  1024. info->has_cpu_throttle_percentage = true;
  1025. info->cpu_throttle_percentage = cpu_throttle_get_percentage();
  1026. }
  1027. if (s->state != MIGRATION_STATUS_COMPLETED) {
  1028. info->ram->remaining = ram_bytes_remaining();
  1029. info->ram->dirty_pages_rate = ram_counters.dirty_pages_rate;
  1030. }
  1031. }
  1032. static void populate_disk_info(MigrationInfo *info)
  1033. {
  1034. if (blk_mig_active()) {
  1035. info->disk = g_malloc0(sizeof(*info->disk));
  1036. info->disk->transferred = blk_mig_bytes_transferred();
  1037. info->disk->remaining = blk_mig_bytes_remaining();
  1038. info->disk->total = blk_mig_bytes_total();
  1039. }
  1040. }
  1041. static void fill_source_migration_info(MigrationInfo *info)
  1042. {
  1043. MigrationState *s = migrate_get_current();
  1044. int state = qatomic_read(&s->state);
  1045. GSList *cur_blocker = migration_blockers;
  1046. info->blocked_reasons = NULL;
  1047. /*
  1048. * There are two types of reasons a migration might be blocked;
  1049. * a) devices marked in VMState as non-migratable, and
  1050. * b) Explicit migration blockers
  1051. * We need to add both of them here.
  1052. */
  1053. qemu_savevm_non_migratable_list(&info->blocked_reasons);
  1054. while (cur_blocker) {
  1055. QAPI_LIST_PREPEND(info->blocked_reasons,
  1056. g_strdup(error_get_pretty(cur_blocker->data)));
  1057. cur_blocker = g_slist_next(cur_blocker);
  1058. }
  1059. info->has_blocked_reasons = info->blocked_reasons != NULL;
  1060. switch (state) {
  1061. case MIGRATION_STATUS_NONE:
  1062. /* no migration has happened ever */
  1063. /* do not overwrite destination migration status */
  1064. return;
  1065. case MIGRATION_STATUS_SETUP:
  1066. info->has_status = true;
  1067. info->has_total_time = false;
  1068. break;
  1069. case MIGRATION_STATUS_ACTIVE:
  1070. case MIGRATION_STATUS_CANCELLING:
  1071. case MIGRATION_STATUS_POSTCOPY_ACTIVE:
  1072. case MIGRATION_STATUS_PRE_SWITCHOVER:
  1073. case MIGRATION_STATUS_DEVICE:
  1074. case MIGRATION_STATUS_POSTCOPY_PAUSED:
  1075. case MIGRATION_STATUS_POSTCOPY_RECOVER:
  1076. /* TODO add some postcopy stats */
  1077. populate_time_info(info, s);
  1078. populate_ram_info(info, s);
  1079. populate_disk_info(info);
  1080. populate_vfio_info(info);
  1081. break;
  1082. case MIGRATION_STATUS_COLO:
  1083. info->has_status = true;
  1084. /* TODO: display COLO specific information (checkpoint info etc.) */
  1085. break;
  1086. case MIGRATION_STATUS_COMPLETED:
  1087. populate_time_info(info, s);
  1088. populate_ram_info(info, s);
  1089. populate_vfio_info(info);
  1090. break;
  1091. case MIGRATION_STATUS_FAILED:
  1092. info->has_status = true;
  1093. if (s->error) {
  1094. info->error_desc = g_strdup(error_get_pretty(s->error));
  1095. }
  1096. break;
  1097. case MIGRATION_STATUS_CANCELLED:
  1098. info->has_status = true;
  1099. break;
  1100. case MIGRATION_STATUS_WAIT_UNPLUG:
  1101. info->has_status = true;
  1102. break;
  1103. }
  1104. info->status = state;
  1105. }
  1106. typedef enum WriteTrackingSupport {
  1107. WT_SUPPORT_UNKNOWN = 0,
  1108. WT_SUPPORT_ABSENT,
  1109. WT_SUPPORT_AVAILABLE,
  1110. WT_SUPPORT_COMPATIBLE
  1111. } WriteTrackingSupport;
  1112. static
  1113. WriteTrackingSupport migrate_query_write_tracking(void)
  1114. {
  1115. /* Check if kernel supports required UFFD features */
  1116. if (!ram_write_tracking_available()) {
  1117. return WT_SUPPORT_ABSENT;
  1118. }
  1119. /*
  1120. * Check if current memory configuration is
  1121. * compatible with required UFFD features.
  1122. */
  1123. if (!ram_write_tracking_compatible()) {
  1124. return WT_SUPPORT_AVAILABLE;
  1125. }
  1126. return WT_SUPPORT_COMPATIBLE;
  1127. }
  1128. /**
  1129. * @migration_caps_check - check capability validity
  1130. *
  1131. * @cap_list: old capability list, array of bool
  1132. * @params: new capabilities to be applied soon
  1133. * @errp: set *errp if the check failed, with reason
  1134. *
  1135. * Returns true if check passed, otherwise false.
  1136. */
  1137. static bool migrate_caps_check(bool *cap_list,
  1138. MigrationCapabilityStatusList *params,
  1139. Error **errp)
  1140. {
  1141. MigrationCapabilityStatusList *cap;
  1142. bool old_postcopy_cap;
  1143. MigrationIncomingState *mis = migration_incoming_get_current();
  1144. old_postcopy_cap = cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM];
  1145. for (cap = params; cap; cap = cap->next) {
  1146. cap_list[cap->value->capability] = cap->value->state;
  1147. }
  1148. #ifndef CONFIG_LIVE_BLOCK_MIGRATION
  1149. if (cap_list[MIGRATION_CAPABILITY_BLOCK]) {
  1150. error_setg(errp, "QEMU compiled without old-style (blk/-b, inc/-i) "
  1151. "block migration");
  1152. error_append_hint(errp, "Use drive_mirror+NBD instead.\n");
  1153. return false;
  1154. }
  1155. #endif
  1156. #ifndef CONFIG_REPLICATION
  1157. if (cap_list[MIGRATION_CAPABILITY_X_COLO]) {
  1158. error_setg(errp, "QEMU compiled without replication module"
  1159. " can't enable COLO");
  1160. error_append_hint(errp, "Please enable replication before COLO.\n");
  1161. return false;
  1162. }
  1163. #endif
  1164. if (cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]) {
  1165. /* This check is reasonably expensive, so only when it's being
  1166. * set the first time, also it's only the destination that needs
  1167. * special support.
  1168. */
  1169. if (!old_postcopy_cap && runstate_check(RUN_STATE_INMIGRATE) &&
  1170. !postcopy_ram_supported_by_host(mis)) {
  1171. /* postcopy_ram_supported_by_host will have emitted a more
  1172. * detailed message
  1173. */
  1174. error_setg(errp, "Postcopy is not supported");
  1175. return false;
  1176. }
  1177. if (cap_list[MIGRATION_CAPABILITY_X_IGNORE_SHARED]) {
  1178. error_setg(errp, "Postcopy is not compatible with ignore-shared");
  1179. return false;
  1180. }
  1181. }
  1182. if (cap_list[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) {
  1183. WriteTrackingSupport wt_support;
  1184. int idx;
  1185. /*
  1186. * Check if 'background-snapshot' capability is supported by
  1187. * host kernel and compatible with guest memory configuration.
  1188. */
  1189. wt_support = migrate_query_write_tracking();
  1190. if (wt_support < WT_SUPPORT_AVAILABLE) {
  1191. error_setg(errp, "Background-snapshot is not supported by host kernel");
  1192. return false;
  1193. }
  1194. if (wt_support < WT_SUPPORT_COMPATIBLE) {
  1195. error_setg(errp, "Background-snapshot is not compatible "
  1196. "with guest memory configuration");
  1197. return false;
  1198. }
  1199. /*
  1200. * Check if there are any migration capabilities
  1201. * incompatible with 'background-snapshot'.
  1202. */
  1203. for (idx = 0; idx < check_caps_background_snapshot.size; idx++) {
  1204. int incomp_cap = check_caps_background_snapshot.caps[idx];
  1205. if (cap_list[incomp_cap]) {
  1206. error_setg(errp,
  1207. "Background-snapshot is not compatible with %s",
  1208. MigrationCapability_str(incomp_cap));
  1209. return false;
  1210. }
  1211. }
  1212. }
  1213. #ifdef CONFIG_LINUX
  1214. if (cap_list[MIGRATION_CAPABILITY_ZERO_COPY_SEND] &&
  1215. (!cap_list[MIGRATION_CAPABILITY_MULTIFD] ||
  1216. cap_list[MIGRATION_CAPABILITY_COMPRESS] ||
  1217. cap_list[MIGRATION_CAPABILITY_XBZRLE] ||
  1218. migrate_multifd_compression() ||
  1219. migrate_use_tls())) {
  1220. error_setg(errp,
  1221. "Zero copy only available for non-compressed non-TLS multifd migration");
  1222. return false;
  1223. }
  1224. #else
  1225. if (cap_list[MIGRATION_CAPABILITY_ZERO_COPY_SEND]) {
  1226. error_setg(errp,
  1227. "Zero copy currently only available on Linux");
  1228. return false;
  1229. }
  1230. #endif
  1231. if (cap_list[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]) {
  1232. if (!cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]) {
  1233. error_setg(errp, "Postcopy preempt requires postcopy-ram");
  1234. return false;
  1235. }
  1236. /*
  1237. * Preempt mode requires urgent pages to be sent in separate
  1238. * channel, OTOH compression logic will disorder all pages into
  1239. * different compression channels, which is not compatible with the
  1240. * preempt assumptions on channel assignments.
  1241. */
  1242. if (cap_list[MIGRATION_CAPABILITY_COMPRESS]) {
  1243. error_setg(errp, "Postcopy preempt not compatible with compress");
  1244. return false;
  1245. }
  1246. }
  1247. if (cap_list[MIGRATION_CAPABILITY_MULTIFD]) {
  1248. if (cap_list[MIGRATION_CAPABILITY_COMPRESS]) {
  1249. error_setg(errp, "Multifd is not compatible with compress");
  1250. return false;
  1251. }
  1252. }
  1253. return true;
  1254. }
  1255. static void fill_destination_migration_info(MigrationInfo *info)
  1256. {
  1257. MigrationIncomingState *mis = migration_incoming_get_current();
  1258. if (mis->socket_address_list) {
  1259. info->has_socket_address = true;
  1260. info->socket_address =
  1261. QAPI_CLONE(SocketAddressList, mis->socket_address_list);
  1262. }
  1263. switch (mis->state) {
  1264. case MIGRATION_STATUS_NONE:
  1265. return;
  1266. case MIGRATION_STATUS_SETUP:
  1267. case MIGRATION_STATUS_CANCELLING:
  1268. case MIGRATION_STATUS_CANCELLED:
  1269. case MIGRATION_STATUS_ACTIVE:
  1270. case MIGRATION_STATUS_POSTCOPY_ACTIVE:
  1271. case MIGRATION_STATUS_POSTCOPY_PAUSED:
  1272. case MIGRATION_STATUS_POSTCOPY_RECOVER:
  1273. case MIGRATION_STATUS_FAILED:
  1274. case MIGRATION_STATUS_COLO:
  1275. info->has_status = true;
  1276. break;
  1277. case MIGRATION_STATUS_COMPLETED:
  1278. info->has_status = true;
  1279. fill_destination_postcopy_migration_info(info);
  1280. break;
  1281. }
  1282. info->status = mis->state;
  1283. }
  1284. MigrationInfo *qmp_query_migrate(Error **errp)
  1285. {
  1286. MigrationInfo *info = g_malloc0(sizeof(*info));
  1287. fill_destination_migration_info(info);
  1288. fill_source_migration_info(info);
  1289. return info;
  1290. }
  1291. void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
  1292. Error **errp)
  1293. {
  1294. MigrationState *s = migrate_get_current();
  1295. MigrationCapabilityStatusList *cap;
  1296. bool cap_list[MIGRATION_CAPABILITY__MAX];
  1297. if (migration_is_running(s->state)) {
  1298. error_setg(errp, QERR_MIGRATION_ACTIVE);
  1299. return;
  1300. }
  1301. memcpy(cap_list, s->enabled_capabilities, sizeof(cap_list));
  1302. if (!migrate_caps_check(cap_list, params, errp)) {
  1303. return;
  1304. }
  1305. for (cap = params; cap; cap = cap->next) {
  1306. s->enabled_capabilities[cap->value->capability] = cap->value->state;
  1307. }
  1308. }
  1309. /*
  1310. * Check whether the parameters are valid. Error will be put into errp
  1311. * (if provided). Return true if valid, otherwise false.
  1312. */
  1313. static bool migrate_params_check(MigrationParameters *params, Error **errp)
  1314. {
  1315. if (params->has_compress_level &&
  1316. (params->compress_level > 9)) {
  1317. error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level",
  1318. "a value between 0 and 9");
  1319. return false;
  1320. }
  1321. if (params->has_compress_threads && (params->compress_threads < 1)) {
  1322. error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
  1323. "compress_threads",
  1324. "a value between 1 and 255");
  1325. return false;
  1326. }
  1327. if (params->has_decompress_threads && (params->decompress_threads < 1)) {
  1328. error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
  1329. "decompress_threads",
  1330. "a value between 1 and 255");
  1331. return false;
  1332. }
  1333. if (params->has_throttle_trigger_threshold &&
  1334. (params->throttle_trigger_threshold < 1 ||
  1335. params->throttle_trigger_threshold > 100)) {
  1336. error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
  1337. "throttle_trigger_threshold",
  1338. "an integer in the range of 1 to 100");
  1339. return false;
  1340. }
  1341. if (params->has_cpu_throttle_initial &&
  1342. (params->cpu_throttle_initial < 1 ||
  1343. params->cpu_throttle_initial > 99)) {
  1344. error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
  1345. "cpu_throttle_initial",
  1346. "an integer in the range of 1 to 99");
  1347. return false;
  1348. }
  1349. if (params->has_cpu_throttle_increment &&
  1350. (params->cpu_throttle_increment < 1 ||
  1351. params->cpu_throttle_increment > 99)) {
  1352. error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
  1353. "cpu_throttle_increment",
  1354. "an integer in the range of 1 to 99");
  1355. return false;
  1356. }
  1357. if (params->has_max_bandwidth && (params->max_bandwidth > SIZE_MAX)) {
  1358. error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
  1359. "max_bandwidth",
  1360. "an integer in the range of 0 to "stringify(SIZE_MAX)
  1361. " bytes/second");
  1362. return false;
  1363. }
  1364. if (params->has_downtime_limit &&
  1365. (params->downtime_limit > MAX_MIGRATE_DOWNTIME)) {
  1366. error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
  1367. "downtime_limit",
  1368. "an integer in the range of 0 to "
  1369. stringify(MAX_MIGRATE_DOWNTIME)" ms");
  1370. return false;
  1371. }
  1372. /* x_checkpoint_delay is now always positive */
  1373. if (params->has_multifd_channels && (params->multifd_channels < 1)) {
  1374. error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
  1375. "multifd_channels",
  1376. "a value between 1 and 255");
  1377. return false;
  1378. }
  1379. if (params->has_multifd_zlib_level &&
  1380. (params->multifd_zlib_level > 9)) {
  1381. error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zlib_level",
  1382. "a value between 0 and 9");
  1383. return false;
  1384. }
  1385. if (params->has_multifd_zstd_level &&
  1386. (params->multifd_zstd_level > 20)) {
  1387. error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zstd_level",
  1388. "a value between 0 and 20");
  1389. return false;
  1390. }
  1391. if (params->has_xbzrle_cache_size &&
  1392. (params->xbzrle_cache_size < qemu_target_page_size() ||
  1393. !is_power_of_2(params->xbzrle_cache_size))) {
  1394. error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
  1395. "xbzrle_cache_size",
  1396. "a power of two no less than the target page size");
  1397. return false;
  1398. }
  1399. if (params->has_max_cpu_throttle &&
  1400. (params->max_cpu_throttle < params->cpu_throttle_initial ||
  1401. params->max_cpu_throttle > 99)) {
  1402. error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
  1403. "max_cpu_throttle",
  1404. "an integer in the range of cpu_throttle_initial to 99");
  1405. return false;
  1406. }
  1407. if (params->has_announce_initial &&
  1408. params->announce_initial > 100000) {
  1409. error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
  1410. "announce_initial",
  1411. "a value between 0 and 100000");
  1412. return false;
  1413. }
  1414. if (params->has_announce_max &&
  1415. params->announce_max > 100000) {
  1416. error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
  1417. "announce_max",
  1418. "a value between 0 and 100000");
  1419. return false;
  1420. }
  1421. if (params->has_announce_rounds &&
  1422. params->announce_rounds > 1000) {
  1423. error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
  1424. "announce_rounds",
  1425. "a value between 0 and 1000");
  1426. return false;
  1427. }
  1428. if (params->has_announce_step &&
  1429. (params->announce_step < 1 ||
  1430. params->announce_step > 10000)) {
  1431. error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
  1432. "announce_step",
  1433. "a value between 0 and 10000");
  1434. return false;
  1435. }
  1436. if (params->has_block_bitmap_mapping &&
  1437. !check_dirty_bitmap_mig_alias_map(params->block_bitmap_mapping, errp)) {
  1438. error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: ");
  1439. return false;
  1440. }
  1441. #ifdef CONFIG_LINUX
  1442. if (migrate_use_zero_copy_send() &&
  1443. ((params->has_multifd_compression && params->multifd_compression) ||
  1444. (params->tls_creds && *params->tls_creds))) {
  1445. error_setg(errp,
  1446. "Zero copy only available for non-compressed non-TLS multifd migration");
  1447. return false;
  1448. }
  1449. #endif
  1450. return true;
  1451. }
  1452. static void migrate_params_test_apply(MigrateSetParameters *params,
  1453. MigrationParameters *dest)
  1454. {
  1455. *dest = migrate_get_current()->parameters;
  1456. /* TODO use QAPI_CLONE() instead of duplicating it inline */
  1457. if (params->has_compress_level) {
  1458. dest->compress_level = params->compress_level;
  1459. }
  1460. if (params->has_compress_threads) {
  1461. dest->compress_threads = params->compress_threads;
  1462. }
  1463. if (params->has_compress_wait_thread) {
  1464. dest->compress_wait_thread = params->compress_wait_thread;
  1465. }
  1466. if (params->has_decompress_threads) {
  1467. dest->decompress_threads = params->decompress_threads;
  1468. }
  1469. if (params->has_throttle_trigger_threshold) {
  1470. dest->throttle_trigger_threshold = params->throttle_trigger_threshold;
  1471. }
  1472. if (params->has_cpu_throttle_initial) {
  1473. dest->cpu_throttle_initial = params->cpu_throttle_initial;
  1474. }
  1475. if (params->has_cpu_throttle_increment) {
  1476. dest->cpu_throttle_increment = params->cpu_throttle_increment;
  1477. }
  1478. if (params->has_cpu_throttle_tailslow) {
  1479. dest->cpu_throttle_tailslow = params->cpu_throttle_tailslow;
  1480. }
  1481. if (params->tls_creds) {
  1482. assert(params->tls_creds->type == QTYPE_QSTRING);
  1483. dest->tls_creds = params->tls_creds->u.s;
  1484. }
  1485. if (params->tls_hostname) {
  1486. assert(params->tls_hostname->type == QTYPE_QSTRING);
  1487. dest->tls_hostname = params->tls_hostname->u.s;
  1488. }
  1489. if (params->has_max_bandwidth) {
  1490. dest->max_bandwidth = params->max_bandwidth;
  1491. }
  1492. if (params->has_downtime_limit) {
  1493. dest->downtime_limit = params->downtime_limit;
  1494. }
  1495. if (params->has_x_checkpoint_delay) {
  1496. dest->x_checkpoint_delay = params->x_checkpoint_delay;
  1497. }
  1498. if (params->has_block_incremental) {
  1499. dest->block_incremental = params->block_incremental;
  1500. }
  1501. if (params->has_multifd_channels) {
  1502. dest->multifd_channels = params->multifd_channels;
  1503. }
  1504. if (params->has_multifd_compression) {
  1505. dest->multifd_compression = params->multifd_compression;
  1506. }
  1507. if (params->has_xbzrle_cache_size) {
  1508. dest->xbzrle_cache_size = params->xbzrle_cache_size;
  1509. }
  1510. if (params->has_max_postcopy_bandwidth) {
  1511. dest->max_postcopy_bandwidth = params->max_postcopy_bandwidth;
  1512. }
  1513. if (params->has_max_cpu_throttle) {
  1514. dest->max_cpu_throttle = params->max_cpu_throttle;
  1515. }
  1516. if (params->has_announce_initial) {
  1517. dest->announce_initial = params->announce_initial;
  1518. }
  1519. if (params->has_announce_max) {
  1520. dest->announce_max = params->announce_max;
  1521. }
  1522. if (params->has_announce_rounds) {
  1523. dest->announce_rounds = params->announce_rounds;
  1524. }
  1525. if (params->has_announce_step) {
  1526. dest->announce_step = params->announce_step;
  1527. }
  1528. if (params->has_block_bitmap_mapping) {
  1529. dest->has_block_bitmap_mapping = true;
  1530. dest->block_bitmap_mapping = params->block_bitmap_mapping;
  1531. }
  1532. }
  1533. static void migrate_params_apply(MigrateSetParameters *params, Error **errp)
  1534. {
  1535. MigrationState *s = migrate_get_current();
  1536. /* TODO use QAPI_CLONE() instead of duplicating it inline */
  1537. if (params->has_compress_level) {
  1538. s->parameters.compress_level = params->compress_level;
  1539. }
  1540. if (params->has_compress_threads) {
  1541. s->parameters.compress_threads = params->compress_threads;
  1542. }
  1543. if (params->has_compress_wait_thread) {
  1544. s->parameters.compress_wait_thread = params->compress_wait_thread;
  1545. }
  1546. if (params->has_decompress_threads) {
  1547. s->parameters.decompress_threads = params->decompress_threads;
  1548. }
  1549. if (params->has_throttle_trigger_threshold) {
  1550. s->parameters.throttle_trigger_threshold = params->throttle_trigger_threshold;
  1551. }
  1552. if (params->has_cpu_throttle_initial) {
  1553. s->parameters.cpu_throttle_initial = params->cpu_throttle_initial;
  1554. }
  1555. if (params->has_cpu_throttle_increment) {
  1556. s->parameters.cpu_throttle_increment = params->cpu_throttle_increment;
  1557. }
  1558. if (params->has_cpu_throttle_tailslow) {
  1559. s->parameters.cpu_throttle_tailslow = params->cpu_throttle_tailslow;
  1560. }
  1561. if (params->tls_creds) {
  1562. g_free(s->parameters.tls_creds);
  1563. assert(params->tls_creds->type == QTYPE_QSTRING);
  1564. s->parameters.tls_creds = g_strdup(params->tls_creds->u.s);
  1565. }
  1566. if (params->tls_hostname) {
  1567. g_free(s->parameters.tls_hostname);
  1568. assert(params->tls_hostname->type == QTYPE_QSTRING);
  1569. s->parameters.tls_hostname = g_strdup(params->tls_hostname->u.s);
  1570. }
  1571. if (params->tls_authz) {
  1572. g_free(s->parameters.tls_authz);
  1573. assert(params->tls_authz->type == QTYPE_QSTRING);
  1574. s->parameters.tls_authz = g_strdup(params->tls_authz->u.s);
  1575. }
  1576. if (params->has_max_bandwidth) {
  1577. s->parameters.max_bandwidth = params->max_bandwidth;
  1578. if (s->to_dst_file && !migration_in_postcopy()) {
  1579. qemu_file_set_rate_limit(s->to_dst_file,
  1580. s->parameters.max_bandwidth / XFER_LIMIT_RATIO);
  1581. }
  1582. }
  1583. if (params->has_downtime_limit) {
  1584. s->parameters.downtime_limit = params->downtime_limit;
  1585. }
  1586. if (params->has_x_checkpoint_delay) {
  1587. s->parameters.x_checkpoint_delay = params->x_checkpoint_delay;
  1588. if (migration_in_colo_state()) {
  1589. colo_checkpoint_notify(s);
  1590. }
  1591. }
  1592. if (params->has_block_incremental) {
  1593. s->parameters.block_incremental = params->block_incremental;
  1594. }
  1595. if (params->has_multifd_channels) {
  1596. s->parameters.multifd_channels = params->multifd_channels;
  1597. }
  1598. if (params->has_multifd_compression) {
  1599. s->parameters.multifd_compression = params->multifd_compression;
  1600. }
  1601. if (params->has_xbzrle_cache_size) {
  1602. s->parameters.xbzrle_cache_size = params->xbzrle_cache_size;
  1603. xbzrle_cache_resize(params->xbzrle_cache_size, errp);
  1604. }
  1605. if (params->has_max_postcopy_bandwidth) {
  1606. s->parameters.max_postcopy_bandwidth = params->max_postcopy_bandwidth;
  1607. if (s->to_dst_file && migration_in_postcopy()) {
  1608. qemu_file_set_rate_limit(s->to_dst_file,
  1609. s->parameters.max_postcopy_bandwidth / XFER_LIMIT_RATIO);
  1610. }
  1611. }
  1612. if (params->has_max_cpu_throttle) {
  1613. s->parameters.max_cpu_throttle = params->max_cpu_throttle;
  1614. }
  1615. if (params->has_announce_initial) {
  1616. s->parameters.announce_initial = params->announce_initial;
  1617. }
  1618. if (params->has_announce_max) {
  1619. s->parameters.announce_max = params->announce_max;
  1620. }
  1621. if (params->has_announce_rounds) {
  1622. s->parameters.announce_rounds = params->announce_rounds;
  1623. }
  1624. if (params->has_announce_step) {
  1625. s->parameters.announce_step = params->announce_step;
  1626. }
  1627. if (params->has_block_bitmap_mapping) {
  1628. qapi_free_BitmapMigrationNodeAliasList(
  1629. s->parameters.block_bitmap_mapping);
  1630. s->parameters.has_block_bitmap_mapping = true;
  1631. s->parameters.block_bitmap_mapping =
  1632. QAPI_CLONE(BitmapMigrationNodeAliasList,
  1633. params->block_bitmap_mapping);
  1634. }
  1635. }
  1636. void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp)
  1637. {
  1638. MigrationParameters tmp;
  1639. /* TODO Rewrite "" to null instead */
  1640. if (params->tls_creds
  1641. && params->tls_creds->type == QTYPE_QNULL) {
  1642. qobject_unref(params->tls_creds->u.n);
  1643. params->tls_creds->type = QTYPE_QSTRING;
  1644. params->tls_creds->u.s = strdup("");
  1645. }
  1646. /* TODO Rewrite "" to null instead */
  1647. if (params->tls_hostname
  1648. && params->tls_hostname->type == QTYPE_QNULL) {
  1649. qobject_unref(params->tls_hostname->u.n);
  1650. params->tls_hostname->type = QTYPE_QSTRING;
  1651. params->tls_hostname->u.s = strdup("");
  1652. }
  1653. migrate_params_test_apply(params, &tmp);
  1654. if (!migrate_params_check(&tmp, errp)) {
  1655. /* Invalid parameter */
  1656. return;
  1657. }
  1658. migrate_params_apply(params, errp);
  1659. }
  1660. void qmp_migrate_start_postcopy(Error **errp)
  1661. {
  1662. MigrationState *s = migrate_get_current();
  1663. if (!migrate_postcopy()) {
  1664. error_setg(errp, "Enable postcopy with migrate_set_capability before"
  1665. " the start of migration");
  1666. return;
  1667. }
  1668. if (s->state == MIGRATION_STATUS_NONE) {
  1669. error_setg(errp, "Postcopy must be started after migration has been"
  1670. " started");
  1671. return;
  1672. }
  1673. /*
  1674. * we don't error if migration has finished since that would be racy
  1675. * with issuing this command.
  1676. */
  1677. qatomic_set(&s->start_postcopy, true);
  1678. }
  1679. /* shared migration helpers */
  1680. void migrate_set_state(int *state, int old_state, int new_state)
  1681. {
  1682. assert(new_state < MIGRATION_STATUS__MAX);
  1683. if (qatomic_cmpxchg(state, old_state, new_state) == old_state) {
  1684. trace_migrate_set_state(MigrationStatus_str(new_state));
  1685. migrate_generate_event(new_state);
  1686. }
  1687. }
  1688. static MigrationCapabilityStatus *migrate_cap_add(MigrationCapability index,
  1689. bool state)
  1690. {
  1691. MigrationCapabilityStatus *cap;
  1692. cap = g_new0(MigrationCapabilityStatus, 1);
  1693. cap->capability = index;
  1694. cap->state = state;
  1695. return cap;
  1696. }
  1697. void migrate_set_block_enabled(bool value, Error **errp)
  1698. {
  1699. MigrationCapabilityStatusList *cap = NULL;
  1700. QAPI_LIST_PREPEND(cap, migrate_cap_add(MIGRATION_CAPABILITY_BLOCK, value));
  1701. qmp_migrate_set_capabilities(cap, errp);
  1702. qapi_free_MigrationCapabilityStatusList(cap);
  1703. }
  1704. static void migrate_set_block_incremental(MigrationState *s, bool value)
  1705. {
  1706. s->parameters.block_incremental = value;
  1707. }
  1708. static void block_cleanup_parameters(MigrationState *s)
  1709. {
  1710. if (s->must_remove_block_options) {
  1711. /* setting to false can never fail */
  1712. migrate_set_block_enabled(false, &error_abort);
  1713. migrate_set_block_incremental(s, false);
  1714. s->must_remove_block_options = false;
  1715. }
  1716. }
  1717. static void migrate_fd_cleanup(MigrationState *s)
  1718. {
  1719. qemu_bh_delete(s->cleanup_bh);
  1720. s->cleanup_bh = NULL;
  1721. g_free(s->hostname);
  1722. s->hostname = NULL;
  1723. json_writer_free(s->vmdesc);
  1724. s->vmdesc = NULL;
  1725. qemu_savevm_state_cleanup();
  1726. if (s->to_dst_file) {
  1727. QEMUFile *tmp;
  1728. trace_migrate_fd_cleanup();
  1729. qemu_mutex_unlock_iothread();
  1730. if (s->migration_thread_running) {
  1731. qemu_thread_join(&s->thread);
  1732. s->migration_thread_running = false;
  1733. }
  1734. qemu_mutex_lock_iothread();
  1735. multifd_save_cleanup();
  1736. qemu_mutex_lock(&s->qemu_file_lock);
  1737. tmp = s->to_dst_file;
  1738. s->to_dst_file = NULL;
  1739. qemu_mutex_unlock(&s->qemu_file_lock);
  1740. /*
  1741. * Close the file handle without the lock to make sure the
  1742. * critical section won't block for long.
  1743. */
  1744. migration_ioc_unregister_yank_from_file(tmp);
  1745. qemu_fclose(tmp);
  1746. }
  1747. if (s->postcopy_qemufile_src) {
  1748. migration_ioc_unregister_yank_from_file(s->postcopy_qemufile_src);
  1749. qemu_fclose(s->postcopy_qemufile_src);
  1750. s->postcopy_qemufile_src = NULL;
  1751. }
  1752. assert(!migration_is_active(s));
  1753. if (s->state == MIGRATION_STATUS_CANCELLING) {
  1754. migrate_set_state(&s->state, MIGRATION_STATUS_CANCELLING,
  1755. MIGRATION_STATUS_CANCELLED);
  1756. }
  1757. if (s->error) {
  1758. /* It is used on info migrate. We can't free it */
  1759. error_report_err(error_copy(s->error));
  1760. }
  1761. notifier_list_notify(&migration_state_notifiers, s);
  1762. block_cleanup_parameters(s);
  1763. yank_unregister_instance(MIGRATION_YANK_INSTANCE);
  1764. }
  1765. static void migrate_fd_cleanup_schedule(MigrationState *s)
  1766. {
  1767. /*
  1768. * Ref the state for bh, because it may be called when
  1769. * there're already no other refs
  1770. */
  1771. object_ref(OBJECT(s));
  1772. qemu_bh_schedule(s->cleanup_bh);
  1773. }
  1774. static void migrate_fd_cleanup_bh(void *opaque)
  1775. {
  1776. MigrationState *s = opaque;
  1777. migrate_fd_cleanup(s);
  1778. object_unref(OBJECT(s));
  1779. }
  1780. void migrate_set_error(MigrationState *s, const Error *error)
  1781. {
  1782. QEMU_LOCK_GUARD(&s->error_mutex);
  1783. if (!s->error) {
  1784. s->error = error_copy(error);
  1785. }
  1786. }
  1787. static void migrate_error_free(MigrationState *s)
  1788. {
  1789. QEMU_LOCK_GUARD(&s->error_mutex);
  1790. if (s->error) {
  1791. error_free(s->error);
  1792. s->error = NULL;
  1793. }
  1794. }
  1795. void migrate_fd_error(MigrationState *s, const Error *error)
  1796. {
  1797. trace_migrate_fd_error(error_get_pretty(error));
  1798. assert(s->to_dst_file == NULL);
  1799. migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
  1800. MIGRATION_STATUS_FAILED);
  1801. migrate_set_error(s, error);
  1802. }
  1803. static void migrate_fd_cancel(MigrationState *s)
  1804. {
  1805. int old_state ;
  1806. QEMUFile *f = migrate_get_current()->to_dst_file;
  1807. trace_migrate_fd_cancel();
  1808. WITH_QEMU_LOCK_GUARD(&s->qemu_file_lock) {
  1809. if (s->rp_state.from_dst_file) {
  1810. /* shutdown the rp socket, so causing the rp thread to shutdown */
  1811. qemu_file_shutdown(s->rp_state.from_dst_file);
  1812. }
  1813. }
  1814. do {
  1815. old_state = s->state;
  1816. if (!migration_is_running(old_state)) {
  1817. break;
  1818. }
  1819. /* If the migration is paused, kick it out of the pause */
  1820. if (old_state == MIGRATION_STATUS_PRE_SWITCHOVER) {
  1821. qemu_sem_post(&s->pause_sem);
  1822. }
  1823. migrate_set_state(&s->state, old_state, MIGRATION_STATUS_CANCELLING);
  1824. } while (s->state != MIGRATION_STATUS_CANCELLING);
  1825. /*
  1826. * If we're unlucky the migration code might be stuck somewhere in a
  1827. * send/write while the network has failed and is waiting to timeout;
  1828. * if we've got shutdown(2) available then we can force it to quit.
  1829. * The outgoing qemu file gets closed in migrate_fd_cleanup that is
  1830. * called in a bh, so there is no race against this cancel.
  1831. */
  1832. if (s->state == MIGRATION_STATUS_CANCELLING && f) {
  1833. qemu_file_shutdown(f);
  1834. }
  1835. if (s->state == MIGRATION_STATUS_CANCELLING && s->block_inactive) {
  1836. Error *local_err = NULL;
  1837. bdrv_activate_all(&local_err);
  1838. if (local_err) {
  1839. error_report_err(local_err);
  1840. } else {
  1841. s->block_inactive = false;
  1842. }
  1843. }
  1844. }
  1845. void add_migration_state_change_notifier(Notifier *notify)
  1846. {
  1847. notifier_list_add(&migration_state_notifiers, notify);
  1848. }
  1849. void remove_migration_state_change_notifier(Notifier *notify)
  1850. {
  1851. notifier_remove(notify);
  1852. }
  1853. bool migration_in_setup(MigrationState *s)
  1854. {
  1855. return s->state == MIGRATION_STATUS_SETUP;
  1856. }
  1857. bool migration_has_finished(MigrationState *s)
  1858. {
  1859. return s->state == MIGRATION_STATUS_COMPLETED;
  1860. }
  1861. bool migration_has_failed(MigrationState *s)
  1862. {
  1863. return (s->state == MIGRATION_STATUS_CANCELLED ||
  1864. s->state == MIGRATION_STATUS_FAILED);
  1865. }
  1866. bool migration_in_postcopy(void)
  1867. {
  1868. MigrationState *s = migrate_get_current();
  1869. switch (s->state) {
  1870. case MIGRATION_STATUS_POSTCOPY_ACTIVE:
  1871. case MIGRATION_STATUS_POSTCOPY_PAUSED:
  1872. case MIGRATION_STATUS_POSTCOPY_RECOVER:
  1873. return true;
  1874. default:
  1875. return false;
  1876. }
  1877. }
  1878. bool migration_in_postcopy_after_devices(MigrationState *s)
  1879. {
  1880. return migration_in_postcopy() && s->postcopy_after_devices;
  1881. }
  1882. bool migration_in_incoming_postcopy(void)
  1883. {
  1884. PostcopyState ps = postcopy_state_get();
  1885. return ps >= POSTCOPY_INCOMING_DISCARD && ps < POSTCOPY_INCOMING_END;
  1886. }
  1887. bool migration_incoming_postcopy_advised(void)
  1888. {
  1889. PostcopyState ps = postcopy_state_get();
  1890. return ps >= POSTCOPY_INCOMING_ADVISE && ps < POSTCOPY_INCOMING_END;
  1891. }
  1892. bool migration_in_bg_snapshot(void)
  1893. {
  1894. MigrationState *s = migrate_get_current();
  1895. return migrate_background_snapshot() &&
  1896. migration_is_setup_or_active(s->state);
  1897. }
  1898. bool migration_is_idle(void)
  1899. {
  1900. MigrationState *s = current_migration;
  1901. if (!s) {
  1902. return true;
  1903. }
  1904. switch (s->state) {
  1905. case MIGRATION_STATUS_NONE:
  1906. case MIGRATION_STATUS_CANCELLED:
  1907. case MIGRATION_STATUS_COMPLETED:
  1908. case MIGRATION_STATUS_FAILED:
  1909. return true;
  1910. case MIGRATION_STATUS_SETUP:
  1911. case MIGRATION_STATUS_CANCELLING:
  1912. case MIGRATION_STATUS_ACTIVE:
  1913. case MIGRATION_STATUS_POSTCOPY_ACTIVE:
  1914. case MIGRATION_STATUS_COLO:
  1915. case MIGRATION_STATUS_PRE_SWITCHOVER:
  1916. case MIGRATION_STATUS_DEVICE:
  1917. case MIGRATION_STATUS_WAIT_UNPLUG:
  1918. return false;
  1919. case MIGRATION_STATUS__MAX:
  1920. g_assert_not_reached();
  1921. }
  1922. return false;
  1923. }
  1924. bool migration_is_active(MigrationState *s)
  1925. {
  1926. return (s->state == MIGRATION_STATUS_ACTIVE ||
  1927. s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
  1928. }
  1929. void migrate_init(MigrationState *s)
  1930. {
  1931. /*
  1932. * Reinitialise all migration state, except
  1933. * parameters/capabilities that the user set, and
  1934. * locks.
  1935. */
  1936. s->cleanup_bh = 0;
  1937. s->vm_start_bh = 0;
  1938. s->to_dst_file = NULL;
  1939. s->state = MIGRATION_STATUS_NONE;
  1940. s->rp_state.from_dst_file = NULL;
  1941. s->rp_state.error = false;
  1942. s->mbps = 0.0;
  1943. s->pages_per_second = 0.0;
  1944. s->downtime = 0;
  1945. s->expected_downtime = 0;
  1946. s->setup_time = 0;
  1947. s->start_postcopy = false;
  1948. s->postcopy_after_devices = false;
  1949. s->migration_thread_running = false;
  1950. error_free(s->error);
  1951. s->error = NULL;
  1952. s->hostname = NULL;
  1953. migrate_set_state(&s->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP);
  1954. s->start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
  1955. s->total_time = 0;
  1956. s->vm_was_running = false;
  1957. s->iteration_initial_bytes = 0;
  1958. s->threshold_size = 0;
  1959. }
  1960. int migrate_add_blocker_internal(Error *reason, Error **errp)
  1961. {
  1962. /* Snapshots are similar to migrations, so check RUN_STATE_SAVE_VM too. */
  1963. if (runstate_check(RUN_STATE_SAVE_VM) || !migration_is_idle()) {
  1964. error_propagate_prepend(errp, error_copy(reason),
  1965. "disallowing migration blocker "
  1966. "(migration/snapshot in progress) for: ");
  1967. return -EBUSY;
  1968. }
  1969. migration_blockers = g_slist_prepend(migration_blockers, reason);
  1970. return 0;
  1971. }
  1972. int migrate_add_blocker(Error *reason, Error **errp)
  1973. {
  1974. if (only_migratable) {
  1975. error_propagate_prepend(errp, error_copy(reason),
  1976. "disallowing migration blocker "
  1977. "(--only-migratable) for: ");
  1978. return -EACCES;
  1979. }
  1980. return migrate_add_blocker_internal(reason, errp);
  1981. }
  1982. void migrate_del_blocker(Error *reason)
  1983. {
  1984. migration_blockers = g_slist_remove(migration_blockers, reason);
  1985. }
  1986. void qmp_migrate_incoming(const char *uri, Error **errp)
  1987. {
  1988. Error *local_err = NULL;
  1989. static bool once = true;
  1990. if (!once) {
  1991. error_setg(errp, "The incoming migration has already been started");
  1992. return;
  1993. }
  1994. if (!runstate_check(RUN_STATE_INMIGRATE)) {
  1995. error_setg(errp, "'-incoming' was not specified on the command line");
  1996. return;
  1997. }
  1998. if (!yank_register_instance(MIGRATION_YANK_INSTANCE, errp)) {
  1999. return;
  2000. }
  2001. qemu_start_incoming_migration(uri, &local_err);
  2002. if (local_err) {
  2003. yank_unregister_instance(MIGRATION_YANK_INSTANCE);
  2004. error_propagate(errp, local_err);
  2005. return;
  2006. }
  2007. once = false;
  2008. }
  2009. void qmp_migrate_recover(const char *uri, Error **errp)
  2010. {
  2011. MigrationIncomingState *mis = migration_incoming_get_current();
  2012. /*
  2013. * Don't even bother to use ERRP_GUARD() as it _must_ always be set by
  2014. * callers (no one should ignore a recover failure); if there is, it's a
  2015. * programming error.
  2016. */
  2017. assert(errp);
  2018. if (mis->state != MIGRATION_STATUS_POSTCOPY_PAUSED) {
  2019. error_setg(errp, "Migrate recover can only be run "
  2020. "when postcopy is paused.");
  2021. return;
  2022. }
  2023. /* If there's an existing transport, release it */
  2024. migration_incoming_transport_cleanup(mis);
  2025. /*
  2026. * Note that this call will never start a real migration; it will
  2027. * only re-setup the migration stream and poke existing migration
  2028. * to continue using that newly established channel.
  2029. */
  2030. qemu_start_incoming_migration(uri, errp);
  2031. }
  2032. void qmp_migrate_pause(Error **errp)
  2033. {
  2034. MigrationState *ms = migrate_get_current();
  2035. MigrationIncomingState *mis = migration_incoming_get_current();
  2036. int ret;
  2037. if (ms->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
  2038. /* Source side, during postcopy */
  2039. qemu_mutex_lock(&ms->qemu_file_lock);
  2040. ret = qemu_file_shutdown(ms->to_dst_file);
  2041. qemu_mutex_unlock(&ms->qemu_file_lock);
  2042. if (ret) {
  2043. error_setg(errp, "Failed to pause source migration");
  2044. }
  2045. return;
  2046. }
  2047. if (mis->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
  2048. ret = qemu_file_shutdown(mis->from_src_file);
  2049. if (ret) {
  2050. error_setg(errp, "Failed to pause destination migration");
  2051. }
  2052. return;
  2053. }
  2054. error_setg(errp, "migrate-pause is currently only supported "
  2055. "during postcopy-active state");
  2056. }
  2057. bool migration_is_blocked(Error **errp)
  2058. {
  2059. if (qemu_savevm_state_blocked(errp)) {
  2060. return true;
  2061. }
  2062. if (migration_blockers) {
  2063. error_propagate(errp, error_copy(migration_blockers->data));
  2064. return true;
  2065. }
  2066. return false;
  2067. }
  2068. /* Returns true if continue to migrate, or false if error detected */
  2069. static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc,
  2070. bool resume, Error **errp)
  2071. {
  2072. Error *local_err = NULL;
  2073. if (resume) {
  2074. if (s->state != MIGRATION_STATUS_POSTCOPY_PAUSED) {
  2075. error_setg(errp, "Cannot resume if there is no "
  2076. "paused migration");
  2077. return false;
  2078. }
  2079. /*
  2080. * Postcopy recovery won't work well with release-ram
  2081. * capability since release-ram will drop the page buffer as
  2082. * long as the page is put into the send buffer. So if there
  2083. * is a network failure happened, any page buffers that have
  2084. * not yet reached the destination VM but have already been
  2085. * sent from the source VM will be lost forever. Let's refuse
  2086. * the client from resuming such a postcopy migration.
  2087. * Luckily release-ram was designed to only be used when src
  2088. * and destination VMs are on the same host, so it should be
  2089. * fine.
  2090. */
  2091. if (migrate_release_ram()) {
  2092. error_setg(errp, "Postcopy recovery cannot work "
  2093. "when release-ram capability is set");
  2094. return false;
  2095. }
  2096. /* This is a resume, skip init status */
  2097. return true;
  2098. }
  2099. if (migration_is_running(s->state)) {
  2100. error_setg(errp, QERR_MIGRATION_ACTIVE);
  2101. return false;
  2102. }
  2103. if (runstate_check(RUN_STATE_INMIGRATE)) {
  2104. error_setg(errp, "Guest is waiting for an incoming migration");
  2105. return false;
  2106. }
  2107. if (runstate_check(RUN_STATE_POSTMIGRATE)) {
  2108. error_setg(errp, "Can't migrate the vm that was paused due to "
  2109. "previous migration");
  2110. return false;
  2111. }
  2112. if (migration_is_blocked(errp)) {
  2113. return false;
  2114. }
  2115. if (blk || blk_inc) {
  2116. if (migrate_colo_enabled()) {
  2117. error_setg(errp, "No disk migration is required in COLO mode");
  2118. return false;
  2119. }
  2120. if (migrate_use_block() || migrate_use_block_incremental()) {
  2121. error_setg(errp, "Command options are incompatible with "
  2122. "current migration capabilities");
  2123. return false;
  2124. }
  2125. migrate_set_block_enabled(true, &local_err);
  2126. if (local_err) {
  2127. error_propagate(errp, local_err);
  2128. return false;
  2129. }
  2130. s->must_remove_block_options = true;
  2131. }
  2132. if (blk_inc) {
  2133. migrate_set_block_incremental(s, true);
  2134. }
  2135. migrate_init(s);
  2136. /*
  2137. * set ram_counters compression_counters memory to zero for a
  2138. * new migration
  2139. */
  2140. memset(&ram_counters, 0, sizeof(ram_counters));
  2141. memset(&compression_counters, 0, sizeof(compression_counters));
  2142. return true;
  2143. }
  2144. void qmp_migrate(const char *uri, bool has_blk, bool blk,
  2145. bool has_inc, bool inc, bool has_detach, bool detach,
  2146. bool has_resume, bool resume, Error **errp)
  2147. {
  2148. Error *local_err = NULL;
  2149. MigrationState *s = migrate_get_current();
  2150. const char *p = NULL;
  2151. /* URI is not suitable for migration? */
  2152. if (!migration_channels_and_uri_compatible(uri, errp)) {
  2153. return;
  2154. }
  2155. if (!migrate_prepare(s, has_blk && blk, has_inc && inc,
  2156. has_resume && resume, errp)) {
  2157. /* Error detected, put into errp */
  2158. return;
  2159. }
  2160. if (!(has_resume && resume)) {
  2161. if (!yank_register_instance(MIGRATION_YANK_INSTANCE, errp)) {
  2162. return;
  2163. }
  2164. }
  2165. if (strstart(uri, "tcp:", &p) ||
  2166. strstart(uri, "unix:", NULL) ||
  2167. strstart(uri, "vsock:", NULL)) {
  2168. socket_start_outgoing_migration(s, p ? p : uri, &local_err);
  2169. #ifdef CONFIG_RDMA
  2170. } else if (strstart(uri, "rdma:", &p)) {
  2171. rdma_start_outgoing_migration(s, p, &local_err);
  2172. #endif
  2173. } else if (strstart(uri, "exec:", &p)) {
  2174. exec_start_outgoing_migration(s, p, &local_err);
  2175. } else if (strstart(uri, "fd:", &p)) {
  2176. fd_start_outgoing_migration(s, p, &local_err);
  2177. } else {
  2178. if (!(has_resume && resume)) {
  2179. yank_unregister_instance(MIGRATION_YANK_INSTANCE);
  2180. }
  2181. error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "uri",
  2182. "a valid migration protocol");
  2183. migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
  2184. MIGRATION_STATUS_FAILED);
  2185. block_cleanup_parameters(s);
  2186. return;
  2187. }
  2188. if (local_err) {
  2189. if (!(has_resume && resume)) {
  2190. yank_unregister_instance(MIGRATION_YANK_INSTANCE);
  2191. }
  2192. migrate_fd_error(s, local_err);
  2193. error_propagate(errp, local_err);
  2194. return;
  2195. }
  2196. }
  2197. void qmp_migrate_cancel(Error **errp)
  2198. {
  2199. migration_cancel(NULL);
  2200. }
  2201. void qmp_migrate_continue(MigrationStatus state, Error **errp)
  2202. {
  2203. MigrationState *s = migrate_get_current();
  2204. if (s->state != state) {
  2205. error_setg(errp, "Migration not in expected state: %s",
  2206. MigrationStatus_str(s->state));
  2207. return;
  2208. }
  2209. qemu_sem_post(&s->pause_sem);
  2210. }
  2211. bool migrate_release_ram(void)
  2212. {
  2213. MigrationState *s;
  2214. s = migrate_get_current();
  2215. return s->enabled_capabilities[MIGRATION_CAPABILITY_RELEASE_RAM];
  2216. }
  2217. bool migrate_postcopy_ram(void)
  2218. {
  2219. MigrationState *s;
  2220. s = migrate_get_current();
  2221. return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM];
  2222. }
  2223. bool migrate_postcopy(void)
  2224. {
  2225. return migrate_postcopy_ram() || migrate_dirty_bitmaps();
  2226. }
  2227. bool migrate_auto_converge(void)
  2228. {
  2229. MigrationState *s;
  2230. s = migrate_get_current();
  2231. return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE];
  2232. }
  2233. bool migrate_zero_blocks(void)
  2234. {
  2235. MigrationState *s;
  2236. s = migrate_get_current();
  2237. return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS];
  2238. }
  2239. bool migrate_postcopy_blocktime(void)
  2240. {
  2241. MigrationState *s;
  2242. s = migrate_get_current();
  2243. return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME];
  2244. }
  2245. bool migrate_use_compression(void)
  2246. {
  2247. MigrationState *s;
  2248. s = migrate_get_current();
  2249. return s->enabled_capabilities[MIGRATION_CAPABILITY_COMPRESS];
  2250. }
  2251. int migrate_compress_level(void)
  2252. {
  2253. MigrationState *s;
  2254. s = migrate_get_current();
  2255. return s->parameters.compress_level;
  2256. }
  2257. int migrate_compress_threads(void)
  2258. {
  2259. MigrationState *s;
  2260. s = migrate_get_current();
  2261. return s->parameters.compress_threads;
  2262. }
  2263. int migrate_compress_wait_thread(void)
  2264. {
  2265. MigrationState *s;
  2266. s = migrate_get_current();
  2267. return s->parameters.compress_wait_thread;
  2268. }
  2269. int migrate_decompress_threads(void)
  2270. {
  2271. MigrationState *s;
  2272. s = migrate_get_current();
  2273. return s->parameters.decompress_threads;
  2274. }
  2275. bool migrate_dirty_bitmaps(void)
  2276. {
  2277. MigrationState *s;
  2278. s = migrate_get_current();
  2279. return s->enabled_capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS];
  2280. }
  2281. bool migrate_ignore_shared(void)
  2282. {
  2283. MigrationState *s;
  2284. s = migrate_get_current();
  2285. return s->enabled_capabilities[MIGRATION_CAPABILITY_X_IGNORE_SHARED];
  2286. }
  2287. bool migrate_validate_uuid(void)
  2288. {
  2289. MigrationState *s;
  2290. s = migrate_get_current();
  2291. return s->enabled_capabilities[MIGRATION_CAPABILITY_VALIDATE_UUID];
  2292. }
  2293. bool migrate_use_events(void)
  2294. {
  2295. MigrationState *s;
  2296. s = migrate_get_current();
  2297. return s->enabled_capabilities[MIGRATION_CAPABILITY_EVENTS];
  2298. }
  2299. bool migrate_use_multifd(void)
  2300. {
  2301. MigrationState *s;
  2302. s = migrate_get_current();
  2303. return s->enabled_capabilities[MIGRATION_CAPABILITY_MULTIFD];
  2304. }
  2305. bool migrate_pause_before_switchover(void)
  2306. {
  2307. MigrationState *s;
  2308. s = migrate_get_current();
  2309. return s->enabled_capabilities[
  2310. MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER];
  2311. }
  2312. int migrate_multifd_channels(void)
  2313. {
  2314. MigrationState *s;
  2315. s = migrate_get_current();
  2316. return s->parameters.multifd_channels;
  2317. }
  2318. MultiFDCompression migrate_multifd_compression(void)
  2319. {
  2320. MigrationState *s;
  2321. s = migrate_get_current();
  2322. assert(s->parameters.multifd_compression < MULTIFD_COMPRESSION__MAX);
  2323. return s->parameters.multifd_compression;
  2324. }
  2325. int migrate_multifd_zlib_level(void)
  2326. {
  2327. MigrationState *s;
  2328. s = migrate_get_current();
  2329. return s->parameters.multifd_zlib_level;
  2330. }
  2331. int migrate_multifd_zstd_level(void)
  2332. {
  2333. MigrationState *s;
  2334. s = migrate_get_current();
  2335. return s->parameters.multifd_zstd_level;
  2336. }
  2337. #ifdef CONFIG_LINUX
  2338. bool migrate_use_zero_copy_send(void)
  2339. {
  2340. MigrationState *s;
  2341. s = migrate_get_current();
  2342. return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND];
  2343. }
  2344. #endif
  2345. int migrate_use_tls(void)
  2346. {
  2347. MigrationState *s;
  2348. s = migrate_get_current();
  2349. return s->parameters.tls_creds && *s->parameters.tls_creds;
  2350. }
  2351. int migrate_use_xbzrle(void)
  2352. {
  2353. MigrationState *s;
  2354. s = migrate_get_current();
  2355. return s->enabled_capabilities[MIGRATION_CAPABILITY_XBZRLE];
  2356. }
  2357. uint64_t migrate_xbzrle_cache_size(void)
  2358. {
  2359. MigrationState *s;
  2360. s = migrate_get_current();
  2361. return s->parameters.xbzrle_cache_size;
  2362. }
  2363. static int64_t migrate_max_postcopy_bandwidth(void)
  2364. {
  2365. MigrationState *s;
  2366. s = migrate_get_current();
  2367. return s->parameters.max_postcopy_bandwidth;
  2368. }
  2369. bool migrate_use_block(void)
  2370. {
  2371. MigrationState *s;
  2372. s = migrate_get_current();
  2373. return s->enabled_capabilities[MIGRATION_CAPABILITY_BLOCK];
  2374. }
  2375. bool migrate_use_return_path(void)
  2376. {
  2377. MigrationState *s;
  2378. s = migrate_get_current();
  2379. return s->enabled_capabilities[MIGRATION_CAPABILITY_RETURN_PATH];
  2380. }
  2381. bool migrate_use_block_incremental(void)
  2382. {
  2383. MigrationState *s;
  2384. s = migrate_get_current();
  2385. return s->parameters.block_incremental;
  2386. }
  2387. bool migrate_background_snapshot(void)
  2388. {
  2389. MigrationState *s;
  2390. s = migrate_get_current();
  2391. return s->enabled_capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT];
  2392. }
  2393. bool migrate_postcopy_preempt(void)
  2394. {
  2395. MigrationState *s;
  2396. s = migrate_get_current();
  2397. return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT];
  2398. }
  2399. /* migration thread support */
  2400. /*
  2401. * Something bad happened to the RP stream, mark an error
  2402. * The caller shall print or trace something to indicate why
  2403. */
  2404. static void mark_source_rp_bad(MigrationState *s)
  2405. {
  2406. s->rp_state.error = true;
  2407. }
  2408. static struct rp_cmd_args {
  2409. ssize_t len; /* -1 = variable */
  2410. const char *name;
  2411. } rp_cmd_args[] = {
  2412. [MIG_RP_MSG_INVALID] = { .len = -1, .name = "INVALID" },
  2413. [MIG_RP_MSG_SHUT] = { .len = 4, .name = "SHUT" },
  2414. [MIG_RP_MSG_PONG] = { .len = 4, .name = "PONG" },
  2415. [MIG_RP_MSG_REQ_PAGES] = { .len = 12, .name = "REQ_PAGES" },
  2416. [MIG_RP_MSG_REQ_PAGES_ID] = { .len = -1, .name = "REQ_PAGES_ID" },
  2417. [MIG_RP_MSG_RECV_BITMAP] = { .len = -1, .name = "RECV_BITMAP" },
  2418. [MIG_RP_MSG_RESUME_ACK] = { .len = 4, .name = "RESUME_ACK" },
  2419. [MIG_RP_MSG_MAX] = { .len = -1, .name = "MAX" },
  2420. };
  2421. /*
  2422. * Process a request for pages received on the return path,
  2423. * We're allowed to send more than requested (e.g. to round to our page size)
  2424. * and we don't need to send pages that have already been sent.
  2425. */
  2426. static void migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname,
  2427. ram_addr_t start, size_t len)
  2428. {
  2429. long our_host_ps = qemu_real_host_page_size();
  2430. trace_migrate_handle_rp_req_pages(rbname, start, len);
  2431. /*
  2432. * Since we currently insist on matching page sizes, just sanity check
  2433. * we're being asked for whole host pages.
  2434. */
  2435. if (!QEMU_IS_ALIGNED(start, our_host_ps) ||
  2436. !QEMU_IS_ALIGNED(len, our_host_ps)) {
  2437. error_report("%s: Misaligned page request, start: " RAM_ADDR_FMT
  2438. " len: %zd", __func__, start, len);
  2439. mark_source_rp_bad(ms);
  2440. return;
  2441. }
  2442. if (ram_save_queue_pages(rbname, start, len)) {
  2443. mark_source_rp_bad(ms);
  2444. }
  2445. }
  2446. /* Return true to retry, false to quit */
  2447. static bool postcopy_pause_return_path_thread(MigrationState *s)
  2448. {
  2449. trace_postcopy_pause_return_path();
  2450. qemu_sem_wait(&s->postcopy_pause_rp_sem);
  2451. trace_postcopy_pause_return_path_continued();
  2452. return true;
  2453. }
  2454. static int migrate_handle_rp_recv_bitmap(MigrationState *s, char *block_name)
  2455. {
  2456. RAMBlock *block = qemu_ram_block_by_name(block_name);
  2457. if (!block) {
  2458. error_report("%s: invalid block name '%s'", __func__, block_name);
  2459. return -EINVAL;
  2460. }
  2461. /* Fetch the received bitmap and refresh the dirty bitmap */
  2462. return ram_dirty_bitmap_reload(s, block);
  2463. }
  2464. static int migrate_handle_rp_resume_ack(MigrationState *s, uint32_t value)
  2465. {
  2466. trace_source_return_path_thread_resume_ack(value);
  2467. if (value != MIGRATION_RESUME_ACK_VALUE) {
  2468. error_report("%s: illegal resume_ack value %"PRIu32,
  2469. __func__, value);
  2470. return -1;
  2471. }
  2472. /* Now both sides are active. */
  2473. migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_RECOVER,
  2474. MIGRATION_STATUS_POSTCOPY_ACTIVE);
  2475. /* Notify send thread that time to continue send pages */
  2476. qemu_sem_post(&s->rp_state.rp_sem);
  2477. return 0;
  2478. }
  2479. /*
  2480. * Release ms->rp_state.from_dst_file (and postcopy_qemufile_src if
  2481. * existed) in a safe way.
  2482. */
  2483. static void migration_release_dst_files(MigrationState *ms)
  2484. {
  2485. QEMUFile *file;
  2486. WITH_QEMU_LOCK_GUARD(&ms->qemu_file_lock) {
  2487. /*
  2488. * Reset the from_dst_file pointer first before releasing it, as we
  2489. * can't block within lock section
  2490. */
  2491. file = ms->rp_state.from_dst_file;
  2492. ms->rp_state.from_dst_file = NULL;
  2493. }
  2494. /*
  2495. * Do the same to postcopy fast path socket too if there is. No
  2496. * locking needed because this qemufile should only be managed by
  2497. * return path thread.
  2498. */
  2499. if (ms->postcopy_qemufile_src) {
  2500. migration_ioc_unregister_yank_from_file(ms->postcopy_qemufile_src);
  2501. qemu_file_shutdown(ms->postcopy_qemufile_src);
  2502. qemu_fclose(ms->postcopy_qemufile_src);
  2503. ms->postcopy_qemufile_src = NULL;
  2504. }
  2505. qemu_fclose(file);
  2506. }
  2507. /*
  2508. * Handles messages sent on the return path towards the source VM
  2509. *
  2510. */
  2511. static void *source_return_path_thread(void *opaque)
  2512. {
  2513. MigrationState *ms = opaque;
  2514. QEMUFile *rp = ms->rp_state.from_dst_file;
  2515. uint16_t header_len, header_type;
  2516. uint8_t buf[512];
  2517. uint32_t tmp32, sibling_error;
  2518. ram_addr_t start = 0; /* =0 to silence warning */
  2519. size_t len = 0, expected_len;
  2520. int res;
  2521. trace_source_return_path_thread_entry();
  2522. rcu_register_thread();
  2523. retry:
  2524. while (!ms->rp_state.error && !qemu_file_get_error(rp) &&
  2525. migration_is_setup_or_active(ms->state)) {
  2526. trace_source_return_path_thread_loop_top();
  2527. header_type = qemu_get_be16(rp);
  2528. header_len = qemu_get_be16(rp);
  2529. if (qemu_file_get_error(rp)) {
  2530. mark_source_rp_bad(ms);
  2531. goto out;
  2532. }
  2533. if (header_type >= MIG_RP_MSG_MAX ||
  2534. header_type == MIG_RP_MSG_INVALID) {
  2535. error_report("RP: Received invalid message 0x%04x length 0x%04x",
  2536. header_type, header_len);
  2537. mark_source_rp_bad(ms);
  2538. goto out;
  2539. }
  2540. if ((rp_cmd_args[header_type].len != -1 &&
  2541. header_len != rp_cmd_args[header_type].len) ||
  2542. header_len > sizeof(buf)) {
  2543. error_report("RP: Received '%s' message (0x%04x) with"
  2544. "incorrect length %d expecting %zu",
  2545. rp_cmd_args[header_type].name, header_type, header_len,
  2546. (size_t)rp_cmd_args[header_type].len);
  2547. mark_source_rp_bad(ms);
  2548. goto out;
  2549. }
  2550. /* We know we've got a valid header by this point */
  2551. res = qemu_get_buffer(rp, buf, header_len);
  2552. if (res != header_len) {
  2553. error_report("RP: Failed reading data for message 0x%04x"
  2554. " read %d expected %d",
  2555. header_type, res, header_len);
  2556. mark_source_rp_bad(ms);
  2557. goto out;
  2558. }
  2559. /* OK, we have the message and the data */
  2560. switch (header_type) {
  2561. case MIG_RP_MSG_SHUT:
  2562. sibling_error = ldl_be_p(buf);
  2563. trace_source_return_path_thread_shut(sibling_error);
  2564. if (sibling_error) {
  2565. error_report("RP: Sibling indicated error %d", sibling_error);
  2566. mark_source_rp_bad(ms);
  2567. }
  2568. /*
  2569. * We'll let the main thread deal with closing the RP
  2570. * we could do a shutdown(2) on it, but we're the only user
  2571. * anyway, so there's nothing gained.
  2572. */
  2573. goto out;
  2574. case MIG_RP_MSG_PONG:
  2575. tmp32 = ldl_be_p(buf);
  2576. trace_source_return_path_thread_pong(tmp32);
  2577. qemu_sem_post(&ms->rp_state.rp_pong_acks);
  2578. break;
  2579. case MIG_RP_MSG_REQ_PAGES:
  2580. start = ldq_be_p(buf);
  2581. len = ldl_be_p(buf + 8);
  2582. migrate_handle_rp_req_pages(ms, NULL, start, len);
  2583. break;
  2584. case MIG_RP_MSG_REQ_PAGES_ID:
  2585. expected_len = 12 + 1; /* header + termination */
  2586. if (header_len >= expected_len) {
  2587. start = ldq_be_p(buf);
  2588. len = ldl_be_p(buf + 8);
  2589. /* Now we expect an idstr */
  2590. tmp32 = buf[12]; /* Length of the following idstr */
  2591. buf[13 + tmp32] = '\0';
  2592. expected_len += tmp32;
  2593. }
  2594. if (header_len != expected_len) {
  2595. error_report("RP: Req_Page_id with length %d expecting %zd",
  2596. header_len, expected_len);
  2597. mark_source_rp_bad(ms);
  2598. goto out;
  2599. }
  2600. migrate_handle_rp_req_pages(ms, (char *)&buf[13], start, len);
  2601. break;
  2602. case MIG_RP_MSG_RECV_BITMAP:
  2603. if (header_len < 1) {
  2604. error_report("%s: missing block name", __func__);
  2605. mark_source_rp_bad(ms);
  2606. goto out;
  2607. }
  2608. /* Format: len (1B) + idstr (<255B). This ends the idstr. */
  2609. buf[buf[0] + 1] = '\0';
  2610. if (migrate_handle_rp_recv_bitmap(ms, (char *)(buf + 1))) {
  2611. mark_source_rp_bad(ms);
  2612. goto out;
  2613. }
  2614. break;
  2615. case MIG_RP_MSG_RESUME_ACK:
  2616. tmp32 = ldl_be_p(buf);
  2617. if (migrate_handle_rp_resume_ack(ms, tmp32)) {
  2618. mark_source_rp_bad(ms);
  2619. goto out;
  2620. }
  2621. break;
  2622. default:
  2623. break;
  2624. }
  2625. }
  2626. out:
  2627. res = qemu_file_get_error(rp);
  2628. if (res) {
  2629. if (res && migration_in_postcopy()) {
  2630. /*
  2631. * Maybe there is something we can do: it looks like a
  2632. * network down issue, and we pause for a recovery.
  2633. */
  2634. migration_release_dst_files(ms);
  2635. rp = NULL;
  2636. if (postcopy_pause_return_path_thread(ms)) {
  2637. /*
  2638. * Reload rp, reset the rest. Referencing it is safe since
  2639. * it's reset only by us above, or when migration completes
  2640. */
  2641. rp = ms->rp_state.from_dst_file;
  2642. ms->rp_state.error = false;
  2643. goto retry;
  2644. }
  2645. }
  2646. trace_source_return_path_thread_bad_end();
  2647. mark_source_rp_bad(ms);
  2648. }
  2649. trace_source_return_path_thread_end();
  2650. migration_release_dst_files(ms);
  2651. rcu_unregister_thread();
  2652. return NULL;
  2653. }
  2654. static int open_return_path_on_source(MigrationState *ms,
  2655. bool create_thread)
  2656. {
  2657. ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->to_dst_file);
  2658. if (!ms->rp_state.from_dst_file) {
  2659. return -1;
  2660. }
  2661. trace_open_return_path_on_source();
  2662. if (!create_thread) {
  2663. /* We're done */
  2664. return 0;
  2665. }
  2666. qemu_thread_create(&ms->rp_state.rp_thread, "return path",
  2667. source_return_path_thread, ms, QEMU_THREAD_JOINABLE);
  2668. ms->rp_state.rp_thread_created = true;
  2669. trace_open_return_path_on_source_continue();
  2670. return 0;
  2671. }
  2672. /* Returns 0 if the RP was ok, otherwise there was an error on the RP */
  2673. static int await_return_path_close_on_source(MigrationState *ms)
  2674. {
  2675. /*
  2676. * If this is a normal exit then the destination will send a SHUT and the
  2677. * rp_thread will exit, however if there's an error we need to cause
  2678. * it to exit.
  2679. */
  2680. if (qemu_file_get_error(ms->to_dst_file) && ms->rp_state.from_dst_file) {
  2681. /*
  2682. * shutdown(2), if we have it, will cause it to unblock if it's stuck
  2683. * waiting for the destination.
  2684. */
  2685. qemu_file_shutdown(ms->rp_state.from_dst_file);
  2686. mark_source_rp_bad(ms);
  2687. }
  2688. trace_await_return_path_close_on_source_joining();
  2689. qemu_thread_join(&ms->rp_state.rp_thread);
  2690. ms->rp_state.rp_thread_created = false;
  2691. trace_await_return_path_close_on_source_close();
  2692. return ms->rp_state.error;
  2693. }
  2694. static inline void
  2695. migration_wait_main_channel(MigrationState *ms)
  2696. {
  2697. /* Wait until one PONG message received */
  2698. qemu_sem_wait(&ms->rp_state.rp_pong_acks);
  2699. }
  2700. /*
  2701. * Switch from normal iteration to postcopy
  2702. * Returns non-0 on error
  2703. */
  2704. static int postcopy_start(MigrationState *ms)
  2705. {
  2706. int ret;
  2707. QIOChannelBuffer *bioc;
  2708. QEMUFile *fb;
  2709. int64_t time_at_stop = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
  2710. int64_t bandwidth = migrate_max_postcopy_bandwidth();
  2711. bool restart_block = false;
  2712. int cur_state = MIGRATION_STATUS_ACTIVE;
  2713. if (migrate_postcopy_preempt()) {
  2714. migration_wait_main_channel(ms);
  2715. if (postcopy_preempt_establish_channel(ms)) {
  2716. migrate_set_state(&ms->state, ms->state, MIGRATION_STATUS_FAILED);
  2717. return -1;
  2718. }
  2719. }
  2720. if (!migrate_pause_before_switchover()) {
  2721. migrate_set_state(&ms->state, MIGRATION_STATUS_ACTIVE,
  2722. MIGRATION_STATUS_POSTCOPY_ACTIVE);
  2723. }
  2724. trace_postcopy_start();
  2725. qemu_mutex_lock_iothread();
  2726. trace_postcopy_start_set_run();
  2727. qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
  2728. global_state_store();
  2729. ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
  2730. if (ret < 0) {
  2731. goto fail;
  2732. }
  2733. ret = migration_maybe_pause(ms, &cur_state,
  2734. MIGRATION_STATUS_POSTCOPY_ACTIVE);
  2735. if (ret < 0) {
  2736. goto fail;
  2737. }
  2738. ret = bdrv_inactivate_all();
  2739. if (ret < 0) {
  2740. goto fail;
  2741. }
  2742. restart_block = true;
  2743. /*
  2744. * Cause any non-postcopiable, but iterative devices to
  2745. * send out their final data.
  2746. */
  2747. qemu_savevm_state_complete_precopy(ms->to_dst_file, true, false);
  2748. /*
  2749. * in Finish migrate and with the io-lock held everything should
  2750. * be quiet, but we've potentially still got dirty pages and we
  2751. * need to tell the destination to throw any pages it's already received
  2752. * that are dirty
  2753. */
  2754. if (migrate_postcopy_ram()) {
  2755. ram_postcopy_send_discard_bitmap(ms);
  2756. }
  2757. /*
  2758. * send rest of state - note things that are doing postcopy
  2759. * will notice we're in POSTCOPY_ACTIVE and not actually
  2760. * wrap their state up here
  2761. */
  2762. /* 0 max-postcopy-bandwidth means unlimited */
  2763. if (!bandwidth) {
  2764. qemu_file_set_rate_limit(ms->to_dst_file, INT64_MAX);
  2765. } else {
  2766. qemu_file_set_rate_limit(ms->to_dst_file, bandwidth / XFER_LIMIT_RATIO);
  2767. }
  2768. if (migrate_postcopy_ram()) {
  2769. /* Ping just for debugging, helps line traces up */
  2770. qemu_savevm_send_ping(ms->to_dst_file, 2);
  2771. }
  2772. /*
  2773. * While loading the device state we may trigger page transfer
  2774. * requests and the fd must be free to process those, and thus
  2775. * the destination must read the whole device state off the fd before
  2776. * it starts processing it. Unfortunately the ad-hoc migration format
  2777. * doesn't allow the destination to know the size to read without fully
  2778. * parsing it through each devices load-state code (especially the open
  2779. * coded devices that use get/put).
  2780. * So we wrap the device state up in a package with a length at the start;
  2781. * to do this we use a qemu_buf to hold the whole of the device state.
  2782. */
  2783. bioc = qio_channel_buffer_new(4096);
  2784. qio_channel_set_name(QIO_CHANNEL(bioc), "migration-postcopy-buffer");
  2785. fb = qemu_file_new_output(QIO_CHANNEL(bioc));
  2786. object_unref(OBJECT(bioc));
  2787. /*
  2788. * Make sure the receiver can get incoming pages before we send the rest
  2789. * of the state
  2790. */
  2791. qemu_savevm_send_postcopy_listen(fb);
  2792. qemu_savevm_state_complete_precopy(fb, false, false);
  2793. if (migrate_postcopy_ram()) {
  2794. qemu_savevm_send_ping(fb, 3);
  2795. }
  2796. qemu_savevm_send_postcopy_run(fb);
  2797. /* <><> end of stuff going into the package */
  2798. /* Last point of recovery; as soon as we send the package the destination
  2799. * can open devices and potentially start running.
  2800. * Lets just check again we've not got any errors.
  2801. */
  2802. ret = qemu_file_get_error(ms->to_dst_file);
  2803. if (ret) {
  2804. error_report("postcopy_start: Migration stream errored (pre package)");
  2805. goto fail_closefb;
  2806. }
  2807. restart_block = false;
  2808. /* Now send that blob */
  2809. if (qemu_savevm_send_packaged(ms->to_dst_file, bioc->data, bioc->usage)) {
  2810. goto fail_closefb;
  2811. }
  2812. qemu_fclose(fb);
  2813. /* Send a notify to give a chance for anything that needs to happen
  2814. * at the transition to postcopy and after the device state; in particular
  2815. * spice needs to trigger a transition now
  2816. */
  2817. ms->postcopy_after_devices = true;
  2818. notifier_list_notify(&migration_state_notifiers, ms);
  2819. ms->downtime = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - time_at_stop;
  2820. qemu_mutex_unlock_iothread();
  2821. if (migrate_postcopy_ram()) {
  2822. /*
  2823. * Although this ping is just for debug, it could potentially be
  2824. * used for getting a better measurement of downtime at the source.
  2825. */
  2826. qemu_savevm_send_ping(ms->to_dst_file, 4);
  2827. }
  2828. if (migrate_release_ram()) {
  2829. ram_postcopy_migrated_memory_release(ms);
  2830. }
  2831. ret = qemu_file_get_error(ms->to_dst_file);
  2832. if (ret) {
  2833. error_report("postcopy_start: Migration stream errored");
  2834. migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
  2835. MIGRATION_STATUS_FAILED);
  2836. }
  2837. trace_postcopy_preempt_enabled(migrate_postcopy_preempt());
  2838. return ret;
  2839. fail_closefb:
  2840. qemu_fclose(fb);
  2841. fail:
  2842. migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
  2843. MIGRATION_STATUS_FAILED);
  2844. if (restart_block) {
  2845. /* A failure happened early enough that we know the destination hasn't
  2846. * accessed block devices, so we're safe to recover.
  2847. */
  2848. Error *local_err = NULL;
  2849. bdrv_activate_all(&local_err);
  2850. if (local_err) {
  2851. error_report_err(local_err);
  2852. }
  2853. }
  2854. qemu_mutex_unlock_iothread();
  2855. return -1;
  2856. }
  2857. /**
  2858. * migration_maybe_pause: Pause if required to by
  2859. * migrate_pause_before_switchover called with the iothread locked
  2860. * Returns: 0 on success
  2861. */
  2862. static int migration_maybe_pause(MigrationState *s,
  2863. int *current_active_state,
  2864. int new_state)
  2865. {
  2866. if (!migrate_pause_before_switchover()) {
  2867. return 0;
  2868. }
  2869. /* Since leaving this state is not atomic with posting the semaphore
  2870. * it's possible that someone could have issued multiple migrate_continue
  2871. * and the semaphore is incorrectly positive at this point;
  2872. * the docs say it's undefined to reinit a semaphore that's already
  2873. * init'd, so use timedwait to eat up any existing posts.
  2874. */
  2875. while (qemu_sem_timedwait(&s->pause_sem, 1) == 0) {
  2876. /* This block intentionally left blank */
  2877. }
  2878. /*
  2879. * If the migration is cancelled when it is in the completion phase,
  2880. * the migration state is set to MIGRATION_STATUS_CANCELLING.
  2881. * So we don't need to wait a semaphore, otherwise we would always
  2882. * wait for the 'pause_sem' semaphore.
  2883. */
  2884. if (s->state != MIGRATION_STATUS_CANCELLING) {
  2885. qemu_mutex_unlock_iothread();
  2886. migrate_set_state(&s->state, *current_active_state,
  2887. MIGRATION_STATUS_PRE_SWITCHOVER);
  2888. qemu_sem_wait(&s->pause_sem);
  2889. migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER,
  2890. new_state);
  2891. *current_active_state = new_state;
  2892. qemu_mutex_lock_iothread();
  2893. }
  2894. return s->state == new_state ? 0 : -EINVAL;
  2895. }
  2896. /**
  2897. * migration_completion: Used by migration_thread when there's not much left.
  2898. * The caller 'breaks' the loop when this returns.
  2899. *
  2900. * @s: Current migration state
  2901. */
  2902. static void migration_completion(MigrationState *s)
  2903. {
  2904. int ret;
  2905. int current_active_state = s->state;
  2906. if (s->state == MIGRATION_STATUS_ACTIVE) {
  2907. qemu_mutex_lock_iothread();
  2908. s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
  2909. qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
  2910. s->vm_was_running = runstate_is_running();
  2911. ret = global_state_store();
  2912. if (!ret) {
  2913. ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
  2914. trace_migration_completion_vm_stop(ret);
  2915. if (ret >= 0) {
  2916. ret = migration_maybe_pause(s, &current_active_state,
  2917. MIGRATION_STATUS_DEVICE);
  2918. }
  2919. if (ret >= 0) {
  2920. /*
  2921. * Inactivate disks except in COLO, and track that we
  2922. * have done so in order to remember to reactivate
  2923. * them if migration fails or is cancelled.
  2924. */
  2925. s->block_inactive = !migrate_colo_enabled();
  2926. qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX);
  2927. ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false,
  2928. s->block_inactive);
  2929. }
  2930. }
  2931. qemu_mutex_unlock_iothread();
  2932. if (ret < 0) {
  2933. goto fail;
  2934. }
  2935. } else if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
  2936. trace_migration_completion_postcopy_end();
  2937. qemu_mutex_lock_iothread();
  2938. qemu_savevm_state_complete_postcopy(s->to_dst_file);
  2939. qemu_mutex_unlock_iothread();
  2940. /*
  2941. * Shutdown the postcopy fast path thread. This is only needed
  2942. * when dest QEMU binary is old (7.1/7.2). QEMU 8.0+ doesn't need
  2943. * this.
  2944. */
  2945. if (migrate_postcopy_preempt() && s->preempt_pre_7_2) {
  2946. postcopy_preempt_shutdown_file(s);
  2947. }
  2948. trace_migration_completion_postcopy_end_after_complete();
  2949. } else {
  2950. goto fail;
  2951. }
  2952. /*
  2953. * If rp was opened we must clean up the thread before
  2954. * cleaning everything else up (since if there are no failures
  2955. * it will wait for the destination to send it's status in
  2956. * a SHUT command).
  2957. */
  2958. if (s->rp_state.rp_thread_created) {
  2959. int rp_error;
  2960. trace_migration_return_path_end_before();
  2961. rp_error = await_return_path_close_on_source(s);
  2962. trace_migration_return_path_end_after(rp_error);
  2963. if (rp_error) {
  2964. goto fail;
  2965. }
  2966. }
  2967. if (qemu_file_get_error(s->to_dst_file)) {
  2968. trace_migration_completion_file_err();
  2969. goto fail;
  2970. }
  2971. if (migrate_colo_enabled() && s->state == MIGRATION_STATUS_ACTIVE) {
  2972. /* COLO does not support postcopy */
  2973. migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
  2974. MIGRATION_STATUS_COLO);
  2975. } else {
  2976. migrate_set_state(&s->state, current_active_state,
  2977. MIGRATION_STATUS_COMPLETED);
  2978. }
  2979. return;
  2980. fail:
  2981. if (s->block_inactive && (s->state == MIGRATION_STATUS_ACTIVE ||
  2982. s->state == MIGRATION_STATUS_DEVICE)) {
  2983. /*
  2984. * If not doing postcopy, vm_start() will be called: let's
  2985. * regain control on images.
  2986. */
  2987. Error *local_err = NULL;
  2988. qemu_mutex_lock_iothread();
  2989. bdrv_activate_all(&local_err);
  2990. if (local_err) {
  2991. error_report_err(local_err);
  2992. } else {
  2993. s->block_inactive = false;
  2994. }
  2995. qemu_mutex_unlock_iothread();
  2996. }
  2997. migrate_set_state(&s->state, current_active_state,
  2998. MIGRATION_STATUS_FAILED);
  2999. }
  3000. /**
  3001. * bg_migration_completion: Used by bg_migration_thread when after all the
  3002. * RAM has been saved. The caller 'breaks' the loop when this returns.
  3003. *
  3004. * @s: Current migration state
  3005. */
  3006. static void bg_migration_completion(MigrationState *s)
  3007. {
  3008. int current_active_state = s->state;
  3009. /*
  3010. * Stop tracking RAM writes - un-protect memory, un-register UFFD
  3011. * memory ranges, flush kernel wait queues and wake up threads
  3012. * waiting for write fault to be resolved.
  3013. */
  3014. ram_write_tracking_stop();
  3015. if (s->state == MIGRATION_STATUS_ACTIVE) {
  3016. /*
  3017. * By this moment we have RAM content saved into the migration stream.
  3018. * The next step is to flush the non-RAM content (device state)
  3019. * right after the ram content. The device state has been stored into
  3020. * the temporary buffer before RAM saving started.
  3021. */
  3022. qemu_put_buffer(s->to_dst_file, s->bioc->data, s->bioc->usage);
  3023. qemu_fflush(s->to_dst_file);
  3024. } else if (s->state == MIGRATION_STATUS_CANCELLING) {
  3025. goto fail;
  3026. }
  3027. if (qemu_file_get_error(s->to_dst_file)) {
  3028. trace_migration_completion_file_err();
  3029. goto fail;
  3030. }
  3031. migrate_set_state(&s->state, current_active_state,
  3032. MIGRATION_STATUS_COMPLETED);
  3033. return;
  3034. fail:
  3035. migrate_set_state(&s->state, current_active_state,
  3036. MIGRATION_STATUS_FAILED);
  3037. }
  3038. bool migrate_colo_enabled(void)
  3039. {
  3040. MigrationState *s = migrate_get_current();
  3041. return s->enabled_capabilities[MIGRATION_CAPABILITY_X_COLO];
  3042. }
  3043. typedef enum MigThrError {
  3044. /* No error detected */
  3045. MIG_THR_ERR_NONE = 0,
  3046. /* Detected error, but resumed successfully */
  3047. MIG_THR_ERR_RECOVERED = 1,
  3048. /* Detected fatal error, need to exit */
  3049. MIG_THR_ERR_FATAL = 2,
  3050. } MigThrError;
  3051. static int postcopy_resume_handshake(MigrationState *s)
  3052. {
  3053. qemu_savevm_send_postcopy_resume(s->to_dst_file);
  3054. while (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) {
  3055. qemu_sem_wait(&s->rp_state.rp_sem);
  3056. }
  3057. if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
  3058. return 0;
  3059. }
  3060. return -1;
  3061. }
  3062. /* Return zero if success, or <0 for error */
  3063. static int postcopy_do_resume(MigrationState *s)
  3064. {
  3065. int ret;
  3066. /*
  3067. * Call all the resume_prepare() hooks, so that modules can be
  3068. * ready for the migration resume.
  3069. */
  3070. ret = qemu_savevm_state_resume_prepare(s);
  3071. if (ret) {
  3072. error_report("%s: resume_prepare() failure detected: %d",
  3073. __func__, ret);
  3074. return ret;
  3075. }
  3076. /*
  3077. * If preempt is enabled, re-establish the preempt channel. Note that
  3078. * we do it after resume prepare to make sure the main channel will be
  3079. * created before the preempt channel. E.g. with weak network, the
  3080. * dest QEMU may get messed up with the preempt and main channels on
  3081. * the order of connection setup. This guarantees the correct order.
  3082. */
  3083. ret = postcopy_preempt_establish_channel(s);
  3084. if (ret) {
  3085. error_report("%s: postcopy_preempt_establish_channel(): %d",
  3086. __func__, ret);
  3087. return ret;
  3088. }
  3089. /*
  3090. * Last handshake with destination on the resume (destination will
  3091. * switch to postcopy-active afterwards)
  3092. */
  3093. ret = postcopy_resume_handshake(s);
  3094. if (ret) {
  3095. error_report("%s: handshake failed: %d", __func__, ret);
  3096. return ret;
  3097. }
  3098. return 0;
  3099. }
  3100. /*
  3101. * We don't return until we are in a safe state to continue current
  3102. * postcopy migration. Returns MIG_THR_ERR_RECOVERED if recovered, or
  3103. * MIG_THR_ERR_FATAL if unrecovery failure happened.
  3104. */
  3105. static MigThrError postcopy_pause(MigrationState *s)
  3106. {
  3107. assert(s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
  3108. while (true) {
  3109. QEMUFile *file;
  3110. /*
  3111. * Current channel is possibly broken. Release it. Note that this is
  3112. * guaranteed even without lock because to_dst_file should only be
  3113. * modified by the migration thread. That also guarantees that the
  3114. * unregister of yank is safe too without the lock. It should be safe
  3115. * even to be within the qemu_file_lock, but we didn't do that to avoid
  3116. * taking more mutex (yank_lock) within qemu_file_lock. TL;DR: we make
  3117. * the qemu_file_lock critical section as small as possible.
  3118. */
  3119. assert(s->to_dst_file);
  3120. migration_ioc_unregister_yank_from_file(s->to_dst_file);
  3121. qemu_mutex_lock(&s->qemu_file_lock);
  3122. file = s->to_dst_file;
  3123. s->to_dst_file = NULL;
  3124. qemu_mutex_unlock(&s->qemu_file_lock);
  3125. qemu_file_shutdown(file);
  3126. qemu_fclose(file);
  3127. migrate_set_state(&s->state, s->state,
  3128. MIGRATION_STATUS_POSTCOPY_PAUSED);
  3129. error_report("Detected IO failure for postcopy. "
  3130. "Migration paused.");
  3131. /*
  3132. * We wait until things fixed up. Then someone will setup the
  3133. * status back for us.
  3134. */
  3135. while (s->state == MIGRATION_STATUS_POSTCOPY_PAUSED) {
  3136. qemu_sem_wait(&s->postcopy_pause_sem);
  3137. }
  3138. if (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) {
  3139. /* Woken up by a recover procedure. Give it a shot */
  3140. /*
  3141. * Firstly, let's wake up the return path now, with a new
  3142. * return path channel.
  3143. */
  3144. qemu_sem_post(&s->postcopy_pause_rp_sem);
  3145. /* Do the resume logic */
  3146. if (postcopy_do_resume(s) == 0) {
  3147. /* Let's continue! */
  3148. trace_postcopy_pause_continued();
  3149. return MIG_THR_ERR_RECOVERED;
  3150. } else {
  3151. /*
  3152. * Something wrong happened during the recovery, let's
  3153. * pause again. Pause is always better than throwing
  3154. * data away.
  3155. */
  3156. continue;
  3157. }
  3158. } else {
  3159. /* This is not right... Time to quit. */
  3160. return MIG_THR_ERR_FATAL;
  3161. }
  3162. }
  3163. }
  3164. static MigThrError migration_detect_error(MigrationState *s)
  3165. {
  3166. int ret;
  3167. int state = s->state;
  3168. Error *local_error = NULL;
  3169. if (state == MIGRATION_STATUS_CANCELLING ||
  3170. state == MIGRATION_STATUS_CANCELLED) {
  3171. /* End the migration, but don't set the state to failed */
  3172. return MIG_THR_ERR_FATAL;
  3173. }
  3174. /*
  3175. * Try to detect any file errors. Note that postcopy_qemufile_src will
  3176. * be NULL when postcopy preempt is not enabled.
  3177. */
  3178. ret = qemu_file_get_error_obj_any(s->to_dst_file,
  3179. s->postcopy_qemufile_src,
  3180. &local_error);
  3181. if (!ret) {
  3182. /* Everything is fine */
  3183. assert(!local_error);
  3184. return MIG_THR_ERR_NONE;
  3185. }
  3186. if (local_error) {
  3187. migrate_set_error(s, local_error);
  3188. error_free(local_error);
  3189. }
  3190. if (state == MIGRATION_STATUS_POSTCOPY_ACTIVE && ret) {
  3191. /*
  3192. * For postcopy, we allow the network to be down for a
  3193. * while. After that, it can be continued by a
  3194. * recovery phase.
  3195. */
  3196. return postcopy_pause(s);
  3197. } else {
  3198. /*
  3199. * For precopy (or postcopy with error outside IO), we fail
  3200. * with no time.
  3201. */
  3202. migrate_set_state(&s->state, state, MIGRATION_STATUS_FAILED);
  3203. trace_migration_thread_file_err();
  3204. /* Time to stop the migration, now. */
  3205. return MIG_THR_ERR_FATAL;
  3206. }
  3207. }
  3208. /* How many bytes have we transferred since the beginning of the migration */
  3209. static uint64_t migration_total_bytes(MigrationState *s)
  3210. {
  3211. return qemu_file_total_transferred(s->to_dst_file) +
  3212. ram_counters.multifd_bytes;
  3213. }
  3214. static void migration_calculate_complete(MigrationState *s)
  3215. {
  3216. uint64_t bytes = migration_total_bytes(s);
  3217. int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
  3218. int64_t transfer_time;
  3219. s->total_time = end_time - s->start_time;
  3220. if (!s->downtime) {
  3221. /*
  3222. * It's still not set, so we are precopy migration. For
  3223. * postcopy, downtime is calculated during postcopy_start().
  3224. */
  3225. s->downtime = end_time - s->downtime_start;
  3226. }
  3227. transfer_time = s->total_time - s->setup_time;
  3228. if (transfer_time) {
  3229. s->mbps = ((double) bytes * 8.0) / transfer_time / 1000;
  3230. }
  3231. }
  3232. static void update_iteration_initial_status(MigrationState *s)
  3233. {
  3234. /*
  3235. * Update these three fields at the same time to avoid mismatch info lead
  3236. * wrong speed calculation.
  3237. */
  3238. s->iteration_start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
  3239. s->iteration_initial_bytes = migration_total_bytes(s);
  3240. s->iteration_initial_pages = ram_get_total_transferred_pages();
  3241. }
  3242. static void migration_update_counters(MigrationState *s,
  3243. int64_t current_time)
  3244. {
  3245. uint64_t transferred, transferred_pages, time_spent;
  3246. uint64_t current_bytes; /* bytes transferred since the beginning */
  3247. double bandwidth;
  3248. if (current_time < s->iteration_start_time + BUFFER_DELAY) {
  3249. return;
  3250. }
  3251. current_bytes = migration_total_bytes(s);
  3252. transferred = current_bytes - s->iteration_initial_bytes;
  3253. time_spent = current_time - s->iteration_start_time;
  3254. bandwidth = (double)transferred / time_spent;
  3255. s->threshold_size = bandwidth * s->parameters.downtime_limit;
  3256. s->mbps = (((double) transferred * 8.0) /
  3257. ((double) time_spent / 1000.0)) / 1000.0 / 1000.0;
  3258. transferred_pages = ram_get_total_transferred_pages() -
  3259. s->iteration_initial_pages;
  3260. s->pages_per_second = (double) transferred_pages /
  3261. (((double) time_spent / 1000.0));
  3262. /*
  3263. * if we haven't sent anything, we don't want to
  3264. * recalculate. 10000 is a small enough number for our purposes
  3265. */
  3266. if (ram_counters.dirty_pages_rate && transferred > 10000) {
  3267. s->expected_downtime = ram_counters.remaining / bandwidth;
  3268. }
  3269. qemu_file_reset_rate_limit(s->to_dst_file);
  3270. update_iteration_initial_status(s);
  3271. trace_migrate_transferred(transferred, time_spent,
  3272. bandwidth, s->threshold_size);
  3273. }
  3274. /* Migration thread iteration status */
  3275. typedef enum {
  3276. MIG_ITERATE_RESUME, /* Resume current iteration */
  3277. MIG_ITERATE_SKIP, /* Skip current iteration */
  3278. MIG_ITERATE_BREAK, /* Break the loop */
  3279. } MigIterateState;
  3280. /*
  3281. * Return true if continue to the next iteration directly, false
  3282. * otherwise.
  3283. */
  3284. static MigIterateState migration_iteration_run(MigrationState *s)
  3285. {
  3286. uint64_t must_precopy, can_postcopy;
  3287. bool in_postcopy = s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE;
  3288. qemu_savevm_state_pending_estimate(&must_precopy, &can_postcopy);
  3289. uint64_t pending_size = must_precopy + can_postcopy;
  3290. trace_migrate_pending_estimate(pending_size, must_precopy, can_postcopy);
  3291. if (must_precopy <= s->threshold_size) {
  3292. qemu_savevm_state_pending_exact(&must_precopy, &can_postcopy);
  3293. pending_size = must_precopy + can_postcopy;
  3294. trace_migrate_pending_exact(pending_size, must_precopy, can_postcopy);
  3295. }
  3296. if (!pending_size || pending_size < s->threshold_size) {
  3297. trace_migration_thread_low_pending(pending_size);
  3298. migration_completion(s);
  3299. return MIG_ITERATE_BREAK;
  3300. }
  3301. /* Still a significant amount to transfer */
  3302. if (!in_postcopy && must_precopy <= s->threshold_size &&
  3303. qatomic_read(&s->start_postcopy)) {
  3304. if (postcopy_start(s)) {
  3305. error_report("%s: postcopy failed to start", __func__);
  3306. }
  3307. return MIG_ITERATE_SKIP;
  3308. }
  3309. /* Just another iteration step */
  3310. qemu_savevm_state_iterate(s->to_dst_file, in_postcopy);
  3311. return MIG_ITERATE_RESUME;
  3312. }
  3313. static void migration_iteration_finish(MigrationState *s)
  3314. {
  3315. /* If we enabled cpu throttling for auto-converge, turn it off. */
  3316. cpu_throttle_stop();
  3317. qemu_mutex_lock_iothread();
  3318. switch (s->state) {
  3319. case MIGRATION_STATUS_COMPLETED:
  3320. migration_calculate_complete(s);
  3321. runstate_set(RUN_STATE_POSTMIGRATE);
  3322. break;
  3323. case MIGRATION_STATUS_COLO:
  3324. if (!migrate_colo_enabled()) {
  3325. error_report("%s: critical error: calling COLO code without "
  3326. "COLO enabled", __func__);
  3327. }
  3328. migrate_start_colo_process(s);
  3329. s->vm_was_running = true;
  3330. /* Fallthrough */
  3331. case MIGRATION_STATUS_FAILED:
  3332. case MIGRATION_STATUS_CANCELLED:
  3333. case MIGRATION_STATUS_CANCELLING:
  3334. if (s->vm_was_running) {
  3335. if (!runstate_check(RUN_STATE_SHUTDOWN)) {
  3336. vm_start();
  3337. }
  3338. } else {
  3339. if (runstate_check(RUN_STATE_FINISH_MIGRATE)) {
  3340. runstate_set(RUN_STATE_POSTMIGRATE);
  3341. }
  3342. }
  3343. break;
  3344. default:
  3345. /* Should not reach here, but if so, forgive the VM. */
  3346. error_report("%s: Unknown ending state %d", __func__, s->state);
  3347. break;
  3348. }
  3349. migrate_fd_cleanup_schedule(s);
  3350. qemu_mutex_unlock_iothread();
  3351. }
  3352. static void bg_migration_iteration_finish(MigrationState *s)
  3353. {
  3354. qemu_mutex_lock_iothread();
  3355. switch (s->state) {
  3356. case MIGRATION_STATUS_COMPLETED:
  3357. migration_calculate_complete(s);
  3358. break;
  3359. case MIGRATION_STATUS_ACTIVE:
  3360. case MIGRATION_STATUS_FAILED:
  3361. case MIGRATION_STATUS_CANCELLED:
  3362. case MIGRATION_STATUS_CANCELLING:
  3363. break;
  3364. default:
  3365. /* Should not reach here, but if so, forgive the VM. */
  3366. error_report("%s: Unknown ending state %d", __func__, s->state);
  3367. break;
  3368. }
  3369. migrate_fd_cleanup_schedule(s);
  3370. qemu_mutex_unlock_iothread();
  3371. }
  3372. /*
  3373. * Return true if continue to the next iteration directly, false
  3374. * otherwise.
  3375. */
  3376. static MigIterateState bg_migration_iteration_run(MigrationState *s)
  3377. {
  3378. int res;
  3379. res = qemu_savevm_state_iterate(s->to_dst_file, false);
  3380. if (res > 0) {
  3381. bg_migration_completion(s);
  3382. return MIG_ITERATE_BREAK;
  3383. }
  3384. return MIG_ITERATE_RESUME;
  3385. }
  3386. void migration_make_urgent_request(void)
  3387. {
  3388. qemu_sem_post(&migrate_get_current()->rate_limit_sem);
  3389. }
  3390. void migration_consume_urgent_request(void)
  3391. {
  3392. qemu_sem_wait(&migrate_get_current()->rate_limit_sem);
  3393. }
  3394. /* Returns true if the rate limiting was broken by an urgent request */
  3395. bool migration_rate_limit(void)
  3396. {
  3397. int64_t now = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
  3398. MigrationState *s = migrate_get_current();
  3399. bool urgent = false;
  3400. migration_update_counters(s, now);
  3401. if (qemu_file_rate_limit(s->to_dst_file)) {
  3402. if (qemu_file_get_error(s->to_dst_file)) {
  3403. return false;
  3404. }
  3405. /*
  3406. * Wait for a delay to do rate limiting OR
  3407. * something urgent to post the semaphore.
  3408. */
  3409. int ms = s->iteration_start_time + BUFFER_DELAY - now;
  3410. trace_migration_rate_limit_pre(ms);
  3411. if (qemu_sem_timedwait(&s->rate_limit_sem, ms) == 0) {
  3412. /*
  3413. * We were woken by one or more urgent things but
  3414. * the timedwait will have consumed one of them.
  3415. * The service routine for the urgent wake will dec
  3416. * the semaphore itself for each item it consumes,
  3417. * so add this one we just eat back.
  3418. */
  3419. qemu_sem_post(&s->rate_limit_sem);
  3420. urgent = true;
  3421. }
  3422. trace_migration_rate_limit_post(urgent);
  3423. }
  3424. return urgent;
  3425. }
  3426. /*
  3427. * if failover devices are present, wait they are completely
  3428. * unplugged
  3429. */
  3430. static void qemu_savevm_wait_unplug(MigrationState *s, int old_state,
  3431. int new_state)
  3432. {
  3433. if (qemu_savevm_state_guest_unplug_pending()) {
  3434. migrate_set_state(&s->state, old_state, MIGRATION_STATUS_WAIT_UNPLUG);
  3435. while (s->state == MIGRATION_STATUS_WAIT_UNPLUG &&
  3436. qemu_savevm_state_guest_unplug_pending()) {
  3437. qemu_sem_timedwait(&s->wait_unplug_sem, 250);
  3438. }
  3439. if (s->state != MIGRATION_STATUS_WAIT_UNPLUG) {
  3440. int timeout = 120; /* 30 seconds */
  3441. /*
  3442. * migration has been canceled
  3443. * but as we have started an unplug we must wait the end
  3444. * to be able to plug back the card
  3445. */
  3446. while (timeout-- && qemu_savevm_state_guest_unplug_pending()) {
  3447. qemu_sem_timedwait(&s->wait_unplug_sem, 250);
  3448. }
  3449. if (qemu_savevm_state_guest_unplug_pending() &&
  3450. !qtest_enabled()) {
  3451. warn_report("migration: partially unplugged device on "
  3452. "failure");
  3453. }
  3454. }
  3455. migrate_set_state(&s->state, MIGRATION_STATUS_WAIT_UNPLUG, new_state);
  3456. } else {
  3457. migrate_set_state(&s->state, old_state, new_state);
  3458. }
  3459. }
  3460. /*
  3461. * Master migration thread on the source VM.
  3462. * It drives the migration and pumps the data down the outgoing channel.
  3463. */
  3464. static void *migration_thread(void *opaque)
  3465. {
  3466. MigrationState *s = opaque;
  3467. MigrationThread *thread = NULL;
  3468. int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
  3469. MigThrError thr_error;
  3470. bool urgent = false;
  3471. thread = MigrationThreadAdd("live_migration", qemu_get_thread_id());
  3472. rcu_register_thread();
  3473. object_ref(OBJECT(s));
  3474. update_iteration_initial_status(s);
  3475. qemu_savevm_state_header(s->to_dst_file);
  3476. /*
  3477. * If we opened the return path, we need to make sure dst has it
  3478. * opened as well.
  3479. */
  3480. if (s->rp_state.rp_thread_created) {
  3481. /* Now tell the dest that it should open its end so it can reply */
  3482. qemu_savevm_send_open_return_path(s->to_dst_file);
  3483. /* And do a ping that will make stuff easier to debug */
  3484. qemu_savevm_send_ping(s->to_dst_file, 1);
  3485. }
  3486. if (migrate_postcopy()) {
  3487. /*
  3488. * Tell the destination that we *might* want to do postcopy later;
  3489. * if the other end can't do postcopy it should fail now, nice and
  3490. * early.
  3491. */
  3492. qemu_savevm_send_postcopy_advise(s->to_dst_file);
  3493. }
  3494. if (migrate_colo_enabled()) {
  3495. /* Notify migration destination that we enable COLO */
  3496. qemu_savevm_send_colo_enable(s->to_dst_file);
  3497. }
  3498. qemu_savevm_state_setup(s->to_dst_file);
  3499. qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
  3500. MIGRATION_STATUS_ACTIVE);
  3501. s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
  3502. trace_migration_thread_setup_complete();
  3503. while (migration_is_active(s)) {
  3504. if (urgent || !qemu_file_rate_limit(s->to_dst_file)) {
  3505. MigIterateState iter_state = migration_iteration_run(s);
  3506. if (iter_state == MIG_ITERATE_SKIP) {
  3507. continue;
  3508. } else if (iter_state == MIG_ITERATE_BREAK) {
  3509. break;
  3510. }
  3511. }
  3512. /*
  3513. * Try to detect any kind of failures, and see whether we
  3514. * should stop the migration now.
  3515. */
  3516. thr_error = migration_detect_error(s);
  3517. if (thr_error == MIG_THR_ERR_FATAL) {
  3518. /* Stop migration */
  3519. break;
  3520. } else if (thr_error == MIG_THR_ERR_RECOVERED) {
  3521. /*
  3522. * Just recovered from a e.g. network failure, reset all
  3523. * the local variables. This is important to avoid
  3524. * breaking transferred_bytes and bandwidth calculation
  3525. */
  3526. update_iteration_initial_status(s);
  3527. }
  3528. urgent = migration_rate_limit();
  3529. }
  3530. trace_migration_thread_after_loop();
  3531. migration_iteration_finish(s);
  3532. object_unref(OBJECT(s));
  3533. rcu_unregister_thread();
  3534. MigrationThreadDel(thread);
  3535. return NULL;
  3536. }
  3537. static void bg_migration_vm_start_bh(void *opaque)
  3538. {
  3539. MigrationState *s = opaque;
  3540. qemu_bh_delete(s->vm_start_bh);
  3541. s->vm_start_bh = NULL;
  3542. vm_start();
  3543. s->downtime = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - s->downtime_start;
  3544. }
  3545. /**
  3546. * Background snapshot thread, based on live migration code.
  3547. * This is an alternative implementation of live migration mechanism
  3548. * introduced specifically to support background snapshots.
  3549. *
  3550. * It takes advantage of userfault_fd write protection mechanism introduced
  3551. * in v5.7 kernel. Compared to existing dirty page logging migration much
  3552. * lesser stream traffic is produced resulting in smaller snapshot images,
  3553. * simply cause of no page duplicates can get into the stream.
  3554. *
  3555. * Another key point is that generated vmstate stream reflects machine state
  3556. * 'frozen' at the beginning of snapshot creation compared to dirty page logging
  3557. * mechanism, which effectively results in that saved snapshot is the state of VM
  3558. * at the end of the process.
  3559. */
  3560. static void *bg_migration_thread(void *opaque)
  3561. {
  3562. MigrationState *s = opaque;
  3563. int64_t setup_start;
  3564. MigThrError thr_error;
  3565. QEMUFile *fb;
  3566. bool early_fail = true;
  3567. rcu_register_thread();
  3568. object_ref(OBJECT(s));
  3569. qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX);
  3570. setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
  3571. /*
  3572. * We want to save vmstate for the moment when migration has been
  3573. * initiated but also we want to save RAM content while VM is running.
  3574. * The RAM content should appear first in the vmstate. So, we first
  3575. * stash the non-RAM part of the vmstate to the temporary buffer,
  3576. * then write RAM part of the vmstate to the migration stream
  3577. * with vCPUs running and, finally, write stashed non-RAM part of
  3578. * the vmstate from the buffer to the migration stream.
  3579. */
  3580. s->bioc = qio_channel_buffer_new(512 * 1024);
  3581. qio_channel_set_name(QIO_CHANNEL(s->bioc), "vmstate-buffer");
  3582. fb = qemu_file_new_output(QIO_CHANNEL(s->bioc));
  3583. object_unref(OBJECT(s->bioc));
  3584. update_iteration_initial_status(s);
  3585. /*
  3586. * Prepare for tracking memory writes with UFFD-WP - populate
  3587. * RAM pages before protecting.
  3588. */
  3589. #ifdef __linux__
  3590. ram_write_tracking_prepare();
  3591. #endif
  3592. qemu_savevm_state_header(s->to_dst_file);
  3593. qemu_savevm_state_setup(s->to_dst_file);
  3594. qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
  3595. MIGRATION_STATUS_ACTIVE);
  3596. s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
  3597. trace_migration_thread_setup_complete();
  3598. s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
  3599. qemu_mutex_lock_iothread();
  3600. /*
  3601. * If VM is currently in suspended state, then, to make a valid runstate
  3602. * transition in vm_stop_force_state() we need to wakeup it up.
  3603. */
  3604. qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
  3605. s->vm_was_running = runstate_is_running();
  3606. if (global_state_store()) {
  3607. goto fail;
  3608. }
  3609. /* Forcibly stop VM before saving state of vCPUs and devices */
  3610. if (vm_stop_force_state(RUN_STATE_PAUSED)) {
  3611. goto fail;
  3612. }
  3613. /*
  3614. * Put vCPUs in sync with shadow context structures, then
  3615. * save their state to channel-buffer along with devices.
  3616. */
  3617. cpu_synchronize_all_states();
  3618. if (qemu_savevm_state_complete_precopy_non_iterable(fb, false, false)) {
  3619. goto fail;
  3620. }
  3621. /*
  3622. * Since we are going to get non-iterable state data directly
  3623. * from s->bioc->data, explicit flush is needed here.
  3624. */
  3625. qemu_fflush(fb);
  3626. /* Now initialize UFFD context and start tracking RAM writes */
  3627. if (ram_write_tracking_start()) {
  3628. goto fail;
  3629. }
  3630. early_fail = false;
  3631. /*
  3632. * Start VM from BH handler to avoid write-fault lock here.
  3633. * UFFD-WP protection for the whole RAM is already enabled so
  3634. * calling VM state change notifiers from vm_start() would initiate
  3635. * writes to virtio VQs memory which is in write-protected region.
  3636. */
  3637. s->vm_start_bh = qemu_bh_new(bg_migration_vm_start_bh, s);
  3638. qemu_bh_schedule(s->vm_start_bh);
  3639. qemu_mutex_unlock_iothread();
  3640. while (migration_is_active(s)) {
  3641. MigIterateState iter_state = bg_migration_iteration_run(s);
  3642. if (iter_state == MIG_ITERATE_SKIP) {
  3643. continue;
  3644. } else if (iter_state == MIG_ITERATE_BREAK) {
  3645. break;
  3646. }
  3647. /*
  3648. * Try to detect any kind of failures, and see whether we
  3649. * should stop the migration now.
  3650. */
  3651. thr_error = migration_detect_error(s);
  3652. if (thr_error == MIG_THR_ERR_FATAL) {
  3653. /* Stop migration */
  3654. break;
  3655. }
  3656. migration_update_counters(s, qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
  3657. }
  3658. trace_migration_thread_after_loop();
  3659. fail:
  3660. if (early_fail) {
  3661. migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
  3662. MIGRATION_STATUS_FAILED);
  3663. qemu_mutex_unlock_iothread();
  3664. }
  3665. bg_migration_iteration_finish(s);
  3666. qemu_fclose(fb);
  3667. object_unref(OBJECT(s));
  3668. rcu_unregister_thread();
  3669. return NULL;
  3670. }
  3671. void migrate_fd_connect(MigrationState *s, Error *error_in)
  3672. {
  3673. Error *local_err = NULL;
  3674. int64_t rate_limit;
  3675. bool resume = s->state == MIGRATION_STATUS_POSTCOPY_PAUSED;
  3676. /*
  3677. * If there's a previous error, free it and prepare for another one.
  3678. * Meanwhile if migration completes successfully, there won't have an error
  3679. * dumped when calling migrate_fd_cleanup().
  3680. */
  3681. migrate_error_free(s);
  3682. s->expected_downtime = s->parameters.downtime_limit;
  3683. if (resume) {
  3684. assert(s->cleanup_bh);
  3685. } else {
  3686. assert(!s->cleanup_bh);
  3687. s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup_bh, s);
  3688. }
  3689. if (error_in) {
  3690. migrate_fd_error(s, error_in);
  3691. if (resume) {
  3692. /*
  3693. * Don't do cleanup for resume if channel is invalid, but only dump
  3694. * the error. We wait for another channel connect from the user.
  3695. * The error_report still gives HMP user a hint on what failed.
  3696. * It's normally done in migrate_fd_cleanup(), but call it here
  3697. * explicitly.
  3698. */
  3699. error_report_err(error_copy(s->error));
  3700. } else {
  3701. migrate_fd_cleanup(s);
  3702. }
  3703. return;
  3704. }
  3705. if (resume) {
  3706. /* This is a resumed migration */
  3707. rate_limit = s->parameters.max_postcopy_bandwidth /
  3708. XFER_LIMIT_RATIO;
  3709. } else {
  3710. /* This is a fresh new migration */
  3711. rate_limit = s->parameters.max_bandwidth / XFER_LIMIT_RATIO;
  3712. /* Notify before starting migration thread */
  3713. notifier_list_notify(&migration_state_notifiers, s);
  3714. }
  3715. qemu_file_set_rate_limit(s->to_dst_file, rate_limit);
  3716. qemu_file_set_blocking(s->to_dst_file, true);
  3717. /*
  3718. * Open the return path. For postcopy, it is used exclusively. For
  3719. * precopy, only if user specified "return-path" capability would
  3720. * QEMU uses the return path.
  3721. */
  3722. if (migrate_postcopy_ram() || migrate_use_return_path()) {
  3723. if (open_return_path_on_source(s, !resume)) {
  3724. error_report("Unable to open return-path for postcopy");
  3725. migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED);
  3726. migrate_fd_cleanup(s);
  3727. return;
  3728. }
  3729. }
  3730. /*
  3731. * This needs to be done before resuming a postcopy. Note: for newer
  3732. * QEMUs we will delay the channel creation until postcopy_start(), to
  3733. * avoid disorder of channel creations.
  3734. */
  3735. if (migrate_postcopy_preempt() && s->preempt_pre_7_2) {
  3736. postcopy_preempt_setup(s);
  3737. }
  3738. if (resume) {
  3739. /* Wakeup the main migration thread to do the recovery */
  3740. migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_PAUSED,
  3741. MIGRATION_STATUS_POSTCOPY_RECOVER);
  3742. qemu_sem_post(&s->postcopy_pause_sem);
  3743. return;
  3744. }
  3745. if (multifd_save_setup(&local_err) != 0) {
  3746. error_report_err(local_err);
  3747. migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
  3748. MIGRATION_STATUS_FAILED);
  3749. migrate_fd_cleanup(s);
  3750. return;
  3751. }
  3752. if (migrate_background_snapshot()) {
  3753. qemu_thread_create(&s->thread, "bg_snapshot",
  3754. bg_migration_thread, s, QEMU_THREAD_JOINABLE);
  3755. } else {
  3756. qemu_thread_create(&s->thread, "live_migration",
  3757. migration_thread, s, QEMU_THREAD_JOINABLE);
  3758. }
  3759. s->migration_thread_running = true;
  3760. }
  3761. void migration_global_dump(Monitor *mon)
  3762. {
  3763. MigrationState *ms = migrate_get_current();
  3764. monitor_printf(mon, "globals:\n");
  3765. monitor_printf(mon, "store-global-state: %s\n",
  3766. ms->store_global_state ? "on" : "off");
  3767. monitor_printf(mon, "only-migratable: %s\n",
  3768. only_migratable ? "on" : "off");
  3769. monitor_printf(mon, "send-configuration: %s\n",
  3770. ms->send_configuration ? "on" : "off");
  3771. monitor_printf(mon, "send-section-footer: %s\n",
  3772. ms->send_section_footer ? "on" : "off");
  3773. monitor_printf(mon, "decompress-error-check: %s\n",
  3774. ms->decompress_error_check ? "on" : "off");
  3775. monitor_printf(mon, "clear-bitmap-shift: %u\n",
  3776. ms->clear_bitmap_shift);
  3777. }
  3778. #define DEFINE_PROP_MIG_CAP(name, x) \
  3779. DEFINE_PROP_BOOL(name, MigrationState, enabled_capabilities[x], false)
  3780. static Property migration_properties[] = {
  3781. DEFINE_PROP_BOOL("store-global-state", MigrationState,
  3782. store_global_state, true),
  3783. DEFINE_PROP_BOOL("send-configuration", MigrationState,
  3784. send_configuration, true),
  3785. DEFINE_PROP_BOOL("send-section-footer", MigrationState,
  3786. send_section_footer, true),
  3787. DEFINE_PROP_BOOL("decompress-error-check", MigrationState,
  3788. decompress_error_check, true),
  3789. DEFINE_PROP_UINT8("x-clear-bitmap-shift", MigrationState,
  3790. clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT),
  3791. DEFINE_PROP_BOOL("x-preempt-pre-7-2", MigrationState,
  3792. preempt_pre_7_2, false),
  3793. /* Migration parameters */
  3794. DEFINE_PROP_UINT8("x-compress-level", MigrationState,
  3795. parameters.compress_level,
  3796. DEFAULT_MIGRATE_COMPRESS_LEVEL),
  3797. DEFINE_PROP_UINT8("x-compress-threads", MigrationState,
  3798. parameters.compress_threads,
  3799. DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT),
  3800. DEFINE_PROP_BOOL("x-compress-wait-thread", MigrationState,
  3801. parameters.compress_wait_thread, true),
  3802. DEFINE_PROP_UINT8("x-decompress-threads", MigrationState,
  3803. parameters.decompress_threads,
  3804. DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT),
  3805. DEFINE_PROP_UINT8("x-throttle-trigger-threshold", MigrationState,
  3806. parameters.throttle_trigger_threshold,
  3807. DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD),
  3808. DEFINE_PROP_UINT8("x-cpu-throttle-initial", MigrationState,
  3809. parameters.cpu_throttle_initial,
  3810. DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL),
  3811. DEFINE_PROP_UINT8("x-cpu-throttle-increment", MigrationState,
  3812. parameters.cpu_throttle_increment,
  3813. DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT),
  3814. DEFINE_PROP_BOOL("x-cpu-throttle-tailslow", MigrationState,
  3815. parameters.cpu_throttle_tailslow, false),
  3816. DEFINE_PROP_SIZE("x-max-bandwidth", MigrationState,
  3817. parameters.max_bandwidth, MAX_THROTTLE),
  3818. DEFINE_PROP_UINT64("x-downtime-limit", MigrationState,
  3819. parameters.downtime_limit,
  3820. DEFAULT_MIGRATE_SET_DOWNTIME),
  3821. DEFINE_PROP_UINT32("x-checkpoint-delay", MigrationState,
  3822. parameters.x_checkpoint_delay,
  3823. DEFAULT_MIGRATE_X_CHECKPOINT_DELAY),
  3824. DEFINE_PROP_UINT8("multifd-channels", MigrationState,
  3825. parameters.multifd_channels,
  3826. DEFAULT_MIGRATE_MULTIFD_CHANNELS),
  3827. DEFINE_PROP_MULTIFD_COMPRESSION("multifd-compression", MigrationState,
  3828. parameters.multifd_compression,
  3829. DEFAULT_MIGRATE_MULTIFD_COMPRESSION),
  3830. DEFINE_PROP_UINT8("multifd-zlib-level", MigrationState,
  3831. parameters.multifd_zlib_level,
  3832. DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL),
  3833. DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState,
  3834. parameters.multifd_zstd_level,
  3835. DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL),
  3836. DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState,
  3837. parameters.xbzrle_cache_size,
  3838. DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE),
  3839. DEFINE_PROP_SIZE("max-postcopy-bandwidth", MigrationState,
  3840. parameters.max_postcopy_bandwidth,
  3841. DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH),
  3842. DEFINE_PROP_UINT8("max-cpu-throttle", MigrationState,
  3843. parameters.max_cpu_throttle,
  3844. DEFAULT_MIGRATE_MAX_CPU_THROTTLE),
  3845. DEFINE_PROP_SIZE("announce-initial", MigrationState,
  3846. parameters.announce_initial,
  3847. DEFAULT_MIGRATE_ANNOUNCE_INITIAL),
  3848. DEFINE_PROP_SIZE("announce-max", MigrationState,
  3849. parameters.announce_max,
  3850. DEFAULT_MIGRATE_ANNOUNCE_MAX),
  3851. DEFINE_PROP_SIZE("announce-rounds", MigrationState,
  3852. parameters.announce_rounds,
  3853. DEFAULT_MIGRATE_ANNOUNCE_ROUNDS),
  3854. DEFINE_PROP_SIZE("announce-step", MigrationState,
  3855. parameters.announce_step,
  3856. DEFAULT_MIGRATE_ANNOUNCE_STEP),
  3857. DEFINE_PROP_STRING("tls-creds", MigrationState, parameters.tls_creds),
  3858. DEFINE_PROP_STRING("tls-hostname", MigrationState, parameters.tls_hostname),
  3859. DEFINE_PROP_STRING("tls-authz", MigrationState, parameters.tls_authz),
  3860. /* Migration capabilities */
  3861. DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE),
  3862. DEFINE_PROP_MIG_CAP("x-rdma-pin-all", MIGRATION_CAPABILITY_RDMA_PIN_ALL),
  3863. DEFINE_PROP_MIG_CAP("x-auto-converge", MIGRATION_CAPABILITY_AUTO_CONVERGE),
  3864. DEFINE_PROP_MIG_CAP("x-zero-blocks", MIGRATION_CAPABILITY_ZERO_BLOCKS),
  3865. DEFINE_PROP_MIG_CAP("x-compress", MIGRATION_CAPABILITY_COMPRESS),
  3866. DEFINE_PROP_MIG_CAP("x-events", MIGRATION_CAPABILITY_EVENTS),
  3867. DEFINE_PROP_MIG_CAP("x-postcopy-ram", MIGRATION_CAPABILITY_POSTCOPY_RAM),
  3868. DEFINE_PROP_MIG_CAP("x-postcopy-preempt",
  3869. MIGRATION_CAPABILITY_POSTCOPY_PREEMPT),
  3870. DEFINE_PROP_MIG_CAP("x-colo", MIGRATION_CAPABILITY_X_COLO),
  3871. DEFINE_PROP_MIG_CAP("x-release-ram", MIGRATION_CAPABILITY_RELEASE_RAM),
  3872. DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK),
  3873. DEFINE_PROP_MIG_CAP("x-return-path", MIGRATION_CAPABILITY_RETURN_PATH),
  3874. DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD),
  3875. DEFINE_PROP_MIG_CAP("x-background-snapshot",
  3876. MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT),
  3877. #ifdef CONFIG_LINUX
  3878. DEFINE_PROP_MIG_CAP("x-zero-copy-send",
  3879. MIGRATION_CAPABILITY_ZERO_COPY_SEND),
  3880. #endif
  3881. DEFINE_PROP_END_OF_LIST(),
  3882. };
  3883. static void migration_class_init(ObjectClass *klass, void *data)
  3884. {
  3885. DeviceClass *dc = DEVICE_CLASS(klass);
  3886. dc->user_creatable = false;
  3887. device_class_set_props(dc, migration_properties);
  3888. }
  3889. static void migration_instance_finalize(Object *obj)
  3890. {
  3891. MigrationState *ms = MIGRATION_OBJ(obj);
  3892. qemu_mutex_destroy(&ms->error_mutex);
  3893. qemu_mutex_destroy(&ms->qemu_file_lock);
  3894. qemu_sem_destroy(&ms->wait_unplug_sem);
  3895. qemu_sem_destroy(&ms->rate_limit_sem);
  3896. qemu_sem_destroy(&ms->pause_sem);
  3897. qemu_sem_destroy(&ms->postcopy_pause_sem);
  3898. qemu_sem_destroy(&ms->postcopy_pause_rp_sem);
  3899. qemu_sem_destroy(&ms->rp_state.rp_sem);
  3900. qemu_sem_destroy(&ms->rp_state.rp_pong_acks);
  3901. qemu_sem_destroy(&ms->postcopy_qemufile_src_sem);
  3902. error_free(ms->error);
  3903. }
  3904. static void migration_instance_init(Object *obj)
  3905. {
  3906. MigrationState *ms = MIGRATION_OBJ(obj);
  3907. MigrationParameters *params = &ms->parameters;
  3908. ms->state = MIGRATION_STATUS_NONE;
  3909. ms->mbps = -1;
  3910. ms->pages_per_second = -1;
  3911. qemu_sem_init(&ms->pause_sem, 0);
  3912. qemu_mutex_init(&ms->error_mutex);
  3913. params->tls_hostname = g_strdup("");
  3914. params->tls_creds = g_strdup("");
  3915. /* Set has_* up only for parameter checks */
  3916. params->has_compress_level = true;
  3917. params->has_compress_threads = true;
  3918. params->has_compress_wait_thread = true;
  3919. params->has_decompress_threads = true;
  3920. params->has_throttle_trigger_threshold = true;
  3921. params->has_cpu_throttle_initial = true;
  3922. params->has_cpu_throttle_increment = true;
  3923. params->has_cpu_throttle_tailslow = true;
  3924. params->has_max_bandwidth = true;
  3925. params->has_downtime_limit = true;
  3926. params->has_x_checkpoint_delay = true;
  3927. params->has_block_incremental = true;
  3928. params->has_multifd_channels = true;
  3929. params->has_multifd_compression = true;
  3930. params->has_multifd_zlib_level = true;
  3931. params->has_multifd_zstd_level = true;
  3932. params->has_xbzrle_cache_size = true;
  3933. params->has_max_postcopy_bandwidth = true;
  3934. params->has_max_cpu_throttle = true;
  3935. params->has_announce_initial = true;
  3936. params->has_announce_max = true;
  3937. params->has_announce_rounds = true;
  3938. params->has_announce_step = true;
  3939. qemu_sem_init(&ms->postcopy_pause_sem, 0);
  3940. qemu_sem_init(&ms->postcopy_pause_rp_sem, 0);
  3941. qemu_sem_init(&ms->rp_state.rp_sem, 0);
  3942. qemu_sem_init(&ms->rp_state.rp_pong_acks, 0);
  3943. qemu_sem_init(&ms->rate_limit_sem, 0);
  3944. qemu_sem_init(&ms->wait_unplug_sem, 0);
  3945. qemu_sem_init(&ms->postcopy_qemufile_src_sem, 0);
  3946. qemu_mutex_init(&ms->qemu_file_lock);
  3947. }
  3948. /*
  3949. * Return true if check pass, false otherwise. Error will be put
  3950. * inside errp if provided.
  3951. */
  3952. static bool migration_object_check(MigrationState *ms, Error **errp)
  3953. {
  3954. MigrationCapabilityStatusList *head = NULL;
  3955. /* Assuming all off */
  3956. bool cap_list[MIGRATION_CAPABILITY__MAX] = { 0 }, ret;
  3957. int i;
  3958. if (!migrate_params_check(&ms->parameters, errp)) {
  3959. return false;
  3960. }
  3961. for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
  3962. if (ms->enabled_capabilities[i]) {
  3963. QAPI_LIST_PREPEND(head, migrate_cap_add(i, true));
  3964. }
  3965. }
  3966. ret = migrate_caps_check(cap_list, head, errp);
  3967. /* It works with head == NULL */
  3968. qapi_free_MigrationCapabilityStatusList(head);
  3969. return ret;
  3970. }
  3971. static const TypeInfo migration_type = {
  3972. .name = TYPE_MIGRATION,
  3973. /*
  3974. * NOTE: TYPE_MIGRATION is not really a device, as the object is
  3975. * not created using qdev_new(), it is not attached to the qdev
  3976. * device tree, and it is never realized.
  3977. *
  3978. * TODO: Make this TYPE_OBJECT once QOM provides something like
  3979. * TYPE_DEVICE's "-global" properties.
  3980. */
  3981. .parent = TYPE_DEVICE,
  3982. .class_init = migration_class_init,
  3983. .class_size = sizeof(MigrationClass),
  3984. .instance_size = sizeof(MigrationState),
  3985. .instance_init = migration_instance_init,
  3986. .instance_finalize = migration_instance_finalize,
  3987. };
  3988. static void register_migration_types(void)
  3989. {
  3990. type_register_static(&migration_type);
  3991. }
  3992. type_init(register_migration_types);