migration.c 144 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681
  1. /*
  2. * QEMU live migration
  3. *
  4. * Copyright IBM, Corp. 2008
  5. *
  6. * Authors:
  7. * Anthony Liguori <aliguori@us.ibm.com>
  8. *
  9. * This work is licensed under the terms of the GNU GPL, version 2. See
  10. * the COPYING file in the top-level directory.
  11. *
  12. * Contributions after 2012-01-13 are licensed under the terms of the
  13. * GNU GPL, version 2 or (at your option) any later version.
  14. */
  15. #include "qemu/osdep.h"
  16. #include "qemu/cutils.h"
  17. #include "qemu/error-report.h"
  18. #include "qemu/main-loop.h"
  19. #include "migration/blocker.h"
  20. #include "exec.h"
  21. #include "fd.h"
  22. #include "socket.h"
  23. #include "sysemu/runstate.h"
  24. #include "sysemu/sysemu.h"
  25. #include "sysemu/cpu-throttle.h"
  26. #include "rdma.h"
  27. #include "ram.h"
  28. #include "migration/global_state.h"
  29. #include "migration/misc.h"
  30. #include "migration.h"
  31. #include "savevm.h"
  32. #include "qemu-file.h"
  33. #include "channel.h"
  34. #include "migration/vmstate.h"
  35. #include "block/block.h"
  36. #include "qapi/error.h"
  37. #include "qapi/clone-visitor.h"
  38. #include "qapi/qapi-visit-migration.h"
  39. #include "qapi/qapi-visit-sockets.h"
  40. #include "qapi/qapi-commands-migration.h"
  41. #include "qapi/qapi-events-migration.h"
  42. #include "qapi/qmp/qerror.h"
  43. #include "qapi/qmp/qnull.h"
  44. #include "qemu/rcu.h"
  45. #include "block.h"
  46. #include "postcopy-ram.h"
  47. #include "qemu/thread.h"
  48. #include "trace.h"
  49. #include "exec/target_page.h"
  50. #include "io/channel-buffer.h"
  51. #include "io/channel-tls.h"
  52. #include "migration/colo.h"
  53. #include "hw/boards.h"
  54. #include "hw/qdev-properties.h"
  55. #include "hw/qdev-properties-system.h"
  56. #include "monitor/monitor.h"
  57. #include "net/announce.h"
  58. #include "qemu/queue.h"
  59. #include "multifd.h"
  60. #include "threadinfo.h"
  61. #include "qemu/yank.h"
  62. #include "sysemu/cpus.h"
  63. #include "yank_functions.h"
  64. #include "sysemu/qtest.h"
  65. #include "ui/qemu-spice.h"
  66. #define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */
  67. /* Amount of time to allocate to each "chunk" of bandwidth-throttled
  68. * data. */
  69. #define BUFFER_DELAY 100
  70. #define XFER_LIMIT_RATIO (1000 / BUFFER_DELAY)
  71. /* Time in milliseconds we are allowed to stop the source,
  72. * for sending the last part */
  73. #define DEFAULT_MIGRATE_SET_DOWNTIME 300
  74. /* Maximum migrate downtime set to 2000 seconds */
  75. #define MAX_MIGRATE_DOWNTIME_SECONDS 2000
  76. #define MAX_MIGRATE_DOWNTIME (MAX_MIGRATE_DOWNTIME_SECONDS * 1000)
  77. /* Default compression thread count */
  78. #define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8
  79. /* Default decompression thread count, usually decompression is at
  80. * least 4 times as fast as compression.*/
  81. #define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2
  82. /*0: means nocompress, 1: best speed, ... 9: best compress ratio */
  83. #define DEFAULT_MIGRATE_COMPRESS_LEVEL 1
  84. /* Define default autoconverge cpu throttle migration parameters */
  85. #define DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD 50
  86. #define DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL 20
  87. #define DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT 10
  88. #define DEFAULT_MIGRATE_MAX_CPU_THROTTLE 99
  89. /* Migration XBZRLE default cache size */
  90. #define DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE (64 * 1024 * 1024)
  91. /* The delay time (in ms) between two COLO checkpoints */
  92. #define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY (200 * 100)
  93. #define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2
  94. #define DEFAULT_MIGRATE_MULTIFD_COMPRESSION MULTIFD_COMPRESSION_NONE
  95. /* 0: means nocompress, 1: best speed, ... 9: best compress ratio */
  96. #define DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL 1
  97. /* 0: means nocompress, 1: best speed, ... 20: best compress ratio */
  98. #define DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL 1
  99. /* Background transfer rate for postcopy, 0 means unlimited, note
  100. * that page requests can still exceed this limit.
  101. */
  102. #define DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH 0
  103. /*
  104. * Parameters for self_announce_delay giving a stream of RARP/ARP
  105. * packets after migration.
  106. */
  107. #define DEFAULT_MIGRATE_ANNOUNCE_INITIAL 50
  108. #define DEFAULT_MIGRATE_ANNOUNCE_MAX 550
  109. #define DEFAULT_MIGRATE_ANNOUNCE_ROUNDS 5
  110. #define DEFAULT_MIGRATE_ANNOUNCE_STEP 100
  111. static NotifierList migration_state_notifiers =
  112. NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);
  113. /* Messages sent on the return path from destination to source */
  114. enum mig_rp_message_type {
  115. MIG_RP_MSG_INVALID = 0, /* Must be 0 */
  116. MIG_RP_MSG_SHUT, /* sibling will not send any more RP messages */
  117. MIG_RP_MSG_PONG, /* Response to a PING; data (seq: be32 ) */
  118. MIG_RP_MSG_REQ_PAGES_ID, /* data (start: be64, len: be32, id: string) */
  119. MIG_RP_MSG_REQ_PAGES, /* data (start: be64, len: be32) */
  120. MIG_RP_MSG_RECV_BITMAP, /* send recved_bitmap back to source */
  121. MIG_RP_MSG_RESUME_ACK, /* tell source that we are ready to resume */
  122. MIG_RP_MSG_MAX
  123. };
  124. /* Migration capabilities set */
  125. struct MigrateCapsSet {
  126. int size; /* Capability set size */
  127. MigrationCapability caps[]; /* Variadic array of capabilities */
  128. };
  129. typedef struct MigrateCapsSet MigrateCapsSet;
  130. /* Define and initialize MigrateCapsSet */
  131. #define INITIALIZE_MIGRATE_CAPS_SET(_name, ...) \
  132. MigrateCapsSet _name = { \
  133. .size = sizeof((int []) { __VA_ARGS__ }) / sizeof(int), \
  134. .caps = { __VA_ARGS__ } \
  135. }
  136. /* Background-snapshot compatibility check list */
  137. static const
  138. INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot,
  139. MIGRATION_CAPABILITY_POSTCOPY_RAM,
  140. MIGRATION_CAPABILITY_DIRTY_BITMAPS,
  141. MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME,
  142. MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE,
  143. MIGRATION_CAPABILITY_RETURN_PATH,
  144. MIGRATION_CAPABILITY_MULTIFD,
  145. MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER,
  146. MIGRATION_CAPABILITY_AUTO_CONVERGE,
  147. MIGRATION_CAPABILITY_RELEASE_RAM,
  148. MIGRATION_CAPABILITY_RDMA_PIN_ALL,
  149. MIGRATION_CAPABILITY_COMPRESS,
  150. MIGRATION_CAPABILITY_XBZRLE,
  151. MIGRATION_CAPABILITY_X_COLO,
  152. MIGRATION_CAPABILITY_VALIDATE_UUID,
  153. MIGRATION_CAPABILITY_ZERO_COPY_SEND);
  154. /* When we add fault tolerance, we could have several
  155. migrations at once. For now we don't need to add
  156. dynamic creation of migration */
  157. static MigrationState *current_migration;
  158. static MigrationIncomingState *current_incoming;
  159. static GSList *migration_blockers;
  160. static bool migration_object_check(MigrationState *ms, Error **errp);
  161. static int migration_maybe_pause(MigrationState *s,
  162. int *current_active_state,
  163. int new_state);
  164. static void migrate_fd_cancel(MigrationState *s);
  165. static bool migration_needs_multiple_sockets(void)
  166. {
  167. return migrate_use_multifd() || migrate_postcopy_preempt();
  168. }
  169. static bool uri_supports_multi_channels(const char *uri)
  170. {
  171. return strstart(uri, "tcp:", NULL) || strstart(uri, "unix:", NULL) ||
  172. strstart(uri, "vsock:", NULL);
  173. }
  174. static bool
  175. migration_channels_and_uri_compatible(const char *uri, Error **errp)
  176. {
  177. if (migration_needs_multiple_sockets() &&
  178. !uri_supports_multi_channels(uri)) {
  179. error_setg(errp, "Migration requires multi-channel URIs (e.g. tcp)");
  180. return false;
  181. }
  182. return true;
  183. }
  184. static gint page_request_addr_cmp(gconstpointer ap, gconstpointer bp)
  185. {
  186. uintptr_t a = (uintptr_t) ap, b = (uintptr_t) bp;
  187. return (a > b) - (a < b);
  188. }
  189. void migration_object_init(void)
  190. {
  191. /* This can only be called once. */
  192. assert(!current_migration);
  193. current_migration = MIGRATION_OBJ(object_new(TYPE_MIGRATION));
  194. /*
  195. * Init the migrate incoming object as well no matter whether
  196. * we'll use it or not.
  197. */
  198. assert(!current_incoming);
  199. current_incoming = g_new0(MigrationIncomingState, 1);
  200. current_incoming->state = MIGRATION_STATUS_NONE;
  201. current_incoming->postcopy_remote_fds =
  202. g_array_new(FALSE, TRUE, sizeof(struct PostCopyFD));
  203. qemu_mutex_init(&current_incoming->rp_mutex);
  204. qemu_mutex_init(&current_incoming->postcopy_prio_thread_mutex);
  205. qemu_event_init(&current_incoming->main_thread_load_event, false);
  206. qemu_sem_init(&current_incoming->postcopy_pause_sem_dst, 0);
  207. qemu_sem_init(&current_incoming->postcopy_pause_sem_fault, 0);
  208. qemu_sem_init(&current_incoming->postcopy_pause_sem_fast_load, 0);
  209. qemu_sem_init(&current_incoming->postcopy_qemufile_dst_done, 0);
  210. qemu_mutex_init(&current_incoming->page_request_mutex);
  211. current_incoming->page_requested = g_tree_new(page_request_addr_cmp);
  212. migration_object_check(current_migration, &error_fatal);
  213. blk_mig_init();
  214. ram_mig_init();
  215. dirty_bitmap_mig_init();
  216. }
  217. void migration_cancel(const Error *error)
  218. {
  219. if (error) {
  220. migrate_set_error(current_migration, error);
  221. }
  222. migrate_fd_cancel(current_migration);
  223. }
  224. void migration_shutdown(void)
  225. {
  226. /*
  227. * When the QEMU main thread exit, the COLO thread
  228. * may wait a semaphore. So, we should wakeup the
  229. * COLO thread before migration shutdown.
  230. */
  231. colo_shutdown();
  232. /*
  233. * Cancel the current migration - that will (eventually)
  234. * stop the migration using this structure
  235. */
  236. migration_cancel(NULL);
  237. object_unref(OBJECT(current_migration));
  238. /*
  239. * Cancel outgoing migration of dirty bitmaps. It should
  240. * at least unref used block nodes.
  241. */
  242. dirty_bitmap_mig_cancel_outgoing();
  243. /*
  244. * Cancel incoming migration of dirty bitmaps. Dirty bitmaps
  245. * are non-critical data, and their loss never considered as
  246. * something serious.
  247. */
  248. dirty_bitmap_mig_cancel_incoming();
  249. }
  250. /* For outgoing */
  251. MigrationState *migrate_get_current(void)
  252. {
  253. /* This can only be called after the object created. */
  254. assert(current_migration);
  255. return current_migration;
  256. }
  257. MigrationIncomingState *migration_incoming_get_current(void)
  258. {
  259. assert(current_incoming);
  260. return current_incoming;
  261. }
  262. void migration_incoming_transport_cleanup(MigrationIncomingState *mis)
  263. {
  264. if (mis->socket_address_list) {
  265. qapi_free_SocketAddressList(mis->socket_address_list);
  266. mis->socket_address_list = NULL;
  267. }
  268. if (mis->transport_cleanup) {
  269. mis->transport_cleanup(mis->transport_data);
  270. mis->transport_data = mis->transport_cleanup = NULL;
  271. }
  272. }
  273. void migration_incoming_state_destroy(void)
  274. {
  275. struct MigrationIncomingState *mis = migration_incoming_get_current();
  276. multifd_load_cleanup();
  277. if (mis->to_src_file) {
  278. /* Tell source that we are done */
  279. migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0);
  280. qemu_fclose(mis->to_src_file);
  281. mis->to_src_file = NULL;
  282. }
  283. if (mis->from_src_file) {
  284. migration_ioc_unregister_yank_from_file(mis->from_src_file);
  285. qemu_fclose(mis->from_src_file);
  286. mis->from_src_file = NULL;
  287. }
  288. if (mis->postcopy_remote_fds) {
  289. g_array_free(mis->postcopy_remote_fds, TRUE);
  290. mis->postcopy_remote_fds = NULL;
  291. }
  292. migration_incoming_transport_cleanup(mis);
  293. qemu_event_reset(&mis->main_thread_load_event);
  294. if (mis->page_requested) {
  295. g_tree_destroy(mis->page_requested);
  296. mis->page_requested = NULL;
  297. }
  298. if (mis->postcopy_qemufile_dst) {
  299. migration_ioc_unregister_yank_from_file(mis->postcopy_qemufile_dst);
  300. qemu_fclose(mis->postcopy_qemufile_dst);
  301. mis->postcopy_qemufile_dst = NULL;
  302. }
  303. yank_unregister_instance(MIGRATION_YANK_INSTANCE);
  304. }
  305. static void migrate_generate_event(int new_state)
  306. {
  307. if (migrate_use_events()) {
  308. qapi_event_send_migration(new_state);
  309. }
  310. }
  311. static bool migrate_late_block_activate(void)
  312. {
  313. MigrationState *s;
  314. s = migrate_get_current();
  315. return s->enabled_capabilities[
  316. MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE];
  317. }
  318. /*
  319. * Send a message on the return channel back to the source
  320. * of the migration.
  321. */
  322. static int migrate_send_rp_message(MigrationIncomingState *mis,
  323. enum mig_rp_message_type message_type,
  324. uint16_t len, void *data)
  325. {
  326. int ret = 0;
  327. trace_migrate_send_rp_message((int)message_type, len);
  328. QEMU_LOCK_GUARD(&mis->rp_mutex);
  329. /*
  330. * It's possible that the file handle got lost due to network
  331. * failures.
  332. */
  333. if (!mis->to_src_file) {
  334. ret = -EIO;
  335. return ret;
  336. }
  337. qemu_put_be16(mis->to_src_file, (unsigned int)message_type);
  338. qemu_put_be16(mis->to_src_file, len);
  339. qemu_put_buffer(mis->to_src_file, data, len);
  340. qemu_fflush(mis->to_src_file);
  341. /* It's possible that qemu file got error during sending */
  342. ret = qemu_file_get_error(mis->to_src_file);
  343. return ret;
  344. }
  345. /* Request one page from the source VM at the given start address.
  346. * rb: the RAMBlock to request the page in
  347. * Start: Address offset within the RB
  348. * Len: Length in bytes required - must be a multiple of pagesize
  349. */
  350. int migrate_send_rp_message_req_pages(MigrationIncomingState *mis,
  351. RAMBlock *rb, ram_addr_t start)
  352. {
  353. uint8_t bufc[12 + 1 + 255]; /* start (8), len (4), rbname up to 256 */
  354. size_t msglen = 12; /* start + len */
  355. size_t len = qemu_ram_pagesize(rb);
  356. enum mig_rp_message_type msg_type;
  357. const char *rbname;
  358. int rbname_len;
  359. *(uint64_t *)bufc = cpu_to_be64((uint64_t)start);
  360. *(uint32_t *)(bufc + 8) = cpu_to_be32((uint32_t)len);
  361. /*
  362. * We maintain the last ramblock that we requested for page. Note that we
  363. * don't need locking because this function will only be called within the
  364. * postcopy ram fault thread.
  365. */
  366. if (rb != mis->last_rb) {
  367. mis->last_rb = rb;
  368. rbname = qemu_ram_get_idstr(rb);
  369. rbname_len = strlen(rbname);
  370. assert(rbname_len < 256);
  371. bufc[msglen++] = rbname_len;
  372. memcpy(bufc + msglen, rbname, rbname_len);
  373. msglen += rbname_len;
  374. msg_type = MIG_RP_MSG_REQ_PAGES_ID;
  375. } else {
  376. msg_type = MIG_RP_MSG_REQ_PAGES;
  377. }
  378. return migrate_send_rp_message(mis, msg_type, msglen, bufc);
  379. }
  380. int migrate_send_rp_req_pages(MigrationIncomingState *mis,
  381. RAMBlock *rb, ram_addr_t start, uint64_t haddr)
  382. {
  383. void *aligned = (void *)(uintptr_t)ROUND_DOWN(haddr, qemu_ram_pagesize(rb));
  384. bool received = false;
  385. WITH_QEMU_LOCK_GUARD(&mis->page_request_mutex) {
  386. received = ramblock_recv_bitmap_test_byte_offset(rb, start);
  387. if (!received && !g_tree_lookup(mis->page_requested, aligned)) {
  388. /*
  389. * The page has not been received, and it's not yet in the page
  390. * request list. Queue it. Set the value of element to 1, so that
  391. * things like g_tree_lookup() will return TRUE (1) when found.
  392. */
  393. g_tree_insert(mis->page_requested, aligned, (gpointer)1);
  394. mis->page_requested_count++;
  395. trace_postcopy_page_req_add(aligned, mis->page_requested_count);
  396. }
  397. }
  398. /*
  399. * If the page is there, skip sending the message. We don't even need the
  400. * lock because as long as the page arrived, it'll be there forever.
  401. */
  402. if (received) {
  403. return 0;
  404. }
  405. return migrate_send_rp_message_req_pages(mis, rb, start);
  406. }
  407. static bool migration_colo_enabled;
  408. bool migration_incoming_colo_enabled(void)
  409. {
  410. return migration_colo_enabled;
  411. }
  412. void migration_incoming_disable_colo(void)
  413. {
  414. ram_block_discard_disable(false);
  415. migration_colo_enabled = false;
  416. }
  417. int migration_incoming_enable_colo(void)
  418. {
  419. if (ram_block_discard_disable(true)) {
  420. error_report("COLO: cannot disable RAM discard");
  421. return -EBUSY;
  422. }
  423. migration_colo_enabled = true;
  424. return 0;
  425. }
  426. void migrate_add_address(SocketAddress *address)
  427. {
  428. MigrationIncomingState *mis = migration_incoming_get_current();
  429. QAPI_LIST_PREPEND(mis->socket_address_list,
  430. QAPI_CLONE(SocketAddress, address));
  431. }
  432. static void qemu_start_incoming_migration(const char *uri, Error **errp)
  433. {
  434. const char *p = NULL;
  435. /* URI is not suitable for migration? */
  436. if (!migration_channels_and_uri_compatible(uri, errp)) {
  437. return;
  438. }
  439. qapi_event_send_migration(MIGRATION_STATUS_SETUP);
  440. if (strstart(uri, "tcp:", &p) ||
  441. strstart(uri, "unix:", NULL) ||
  442. strstart(uri, "vsock:", NULL)) {
  443. socket_start_incoming_migration(p ? p : uri, errp);
  444. #ifdef CONFIG_RDMA
  445. } else if (strstart(uri, "rdma:", &p)) {
  446. rdma_start_incoming_migration(p, errp);
  447. #endif
  448. } else if (strstart(uri, "exec:", &p)) {
  449. exec_start_incoming_migration(p, errp);
  450. } else if (strstart(uri, "fd:", &p)) {
  451. fd_start_incoming_migration(p, errp);
  452. } else {
  453. error_setg(errp, "unknown migration protocol: %s", uri);
  454. }
  455. }
  456. static void process_incoming_migration_bh(void *opaque)
  457. {
  458. Error *local_err = NULL;
  459. MigrationIncomingState *mis = opaque;
  460. /* If capability late_block_activate is set:
  461. * Only fire up the block code now if we're going to restart the
  462. * VM, else 'cont' will do it.
  463. * This causes file locking to happen; so we don't want it to happen
  464. * unless we really are starting the VM.
  465. */
  466. if (!migrate_late_block_activate() ||
  467. (autostart && (!global_state_received() ||
  468. global_state_get_runstate() == RUN_STATE_RUNNING))) {
  469. /* Make sure all file formats throw away their mutable metadata.
  470. * If we get an error here, just don't restart the VM yet. */
  471. bdrv_activate_all(&local_err);
  472. if (local_err) {
  473. error_report_err(local_err);
  474. local_err = NULL;
  475. autostart = false;
  476. }
  477. }
  478. /*
  479. * This must happen after all error conditions are dealt with and
  480. * we're sure the VM is going to be running on this host.
  481. */
  482. qemu_announce_self(&mis->announce_timer, migrate_announce_params());
  483. multifd_load_shutdown();
  484. dirty_bitmap_mig_before_vm_start();
  485. if (!global_state_received() ||
  486. global_state_get_runstate() == RUN_STATE_RUNNING) {
  487. if (autostart) {
  488. vm_start();
  489. } else {
  490. runstate_set(RUN_STATE_PAUSED);
  491. }
  492. } else if (migration_incoming_colo_enabled()) {
  493. migration_incoming_disable_colo();
  494. vm_start();
  495. } else {
  496. runstate_set(global_state_get_runstate());
  497. }
  498. /*
  499. * This must happen after any state changes since as soon as an external
  500. * observer sees this event they might start to prod at the VM assuming
  501. * it's ready to use.
  502. */
  503. migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
  504. MIGRATION_STATUS_COMPLETED);
  505. qemu_bh_delete(mis->bh);
  506. migration_incoming_state_destroy();
  507. }
  508. static void coroutine_fn
  509. process_incoming_migration_co(void *opaque)
  510. {
  511. MigrationIncomingState *mis = migration_incoming_get_current();
  512. PostcopyState ps;
  513. int ret;
  514. Error *local_err = NULL;
  515. assert(mis->from_src_file);
  516. mis->migration_incoming_co = qemu_coroutine_self();
  517. mis->largest_page_size = qemu_ram_pagesize_largest();
  518. postcopy_state_set(POSTCOPY_INCOMING_NONE);
  519. migrate_set_state(&mis->state, MIGRATION_STATUS_NONE,
  520. MIGRATION_STATUS_ACTIVE);
  521. ret = qemu_loadvm_state(mis->from_src_file);
  522. ps = postcopy_state_get();
  523. trace_process_incoming_migration_co_end(ret, ps);
  524. if (ps != POSTCOPY_INCOMING_NONE) {
  525. if (ps == POSTCOPY_INCOMING_ADVISE) {
  526. /*
  527. * Where a migration had postcopy enabled (and thus went to advise)
  528. * but managed to complete within the precopy period, we can use
  529. * the normal exit.
  530. */
  531. postcopy_ram_incoming_cleanup(mis);
  532. } else if (ret >= 0) {
  533. /*
  534. * Postcopy was started, cleanup should happen at the end of the
  535. * postcopy thread.
  536. */
  537. trace_process_incoming_migration_co_postcopy_end_main();
  538. return;
  539. }
  540. /* Else if something went wrong then just fall out of the normal exit */
  541. }
  542. /* we get COLO info, and know if we are in COLO mode */
  543. if (!ret && migration_incoming_colo_enabled()) {
  544. /* Make sure all file formats throw away their mutable metadata */
  545. bdrv_activate_all(&local_err);
  546. if (local_err) {
  547. error_report_err(local_err);
  548. goto fail;
  549. }
  550. qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming",
  551. colo_process_incoming_thread, mis, QEMU_THREAD_JOINABLE);
  552. mis->have_colo_incoming_thread = true;
  553. qemu_coroutine_yield();
  554. qemu_mutex_unlock_iothread();
  555. /* Wait checkpoint incoming thread exit before free resource */
  556. qemu_thread_join(&mis->colo_incoming_thread);
  557. qemu_mutex_lock_iothread();
  558. /* We hold the global iothread lock, so it is safe here */
  559. colo_release_ram_cache();
  560. }
  561. if (ret < 0) {
  562. error_report("load of migration failed: %s", strerror(-ret));
  563. goto fail;
  564. }
  565. mis->bh = qemu_bh_new(process_incoming_migration_bh, mis);
  566. qemu_bh_schedule(mis->bh);
  567. mis->migration_incoming_co = NULL;
  568. return;
  569. fail:
  570. local_err = NULL;
  571. migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
  572. MIGRATION_STATUS_FAILED);
  573. qemu_fclose(mis->from_src_file);
  574. multifd_load_cleanup();
  575. exit(EXIT_FAILURE);
  576. }
  577. /**
  578. * migration_incoming_setup: Setup incoming migration
  579. * @f: file for main migration channel
  580. * @errp: where to put errors
  581. *
  582. * Returns: %true on success, %false on error.
  583. */
  584. static bool migration_incoming_setup(QEMUFile *f, Error **errp)
  585. {
  586. MigrationIncomingState *mis = migration_incoming_get_current();
  587. if (!mis->from_src_file) {
  588. mis->from_src_file = f;
  589. }
  590. qemu_file_set_blocking(f, false);
  591. return true;
  592. }
  593. void migration_incoming_process(void)
  594. {
  595. Coroutine *co = qemu_coroutine_create(process_incoming_migration_co, NULL);
  596. qemu_coroutine_enter(co);
  597. }
  598. /* Returns true if recovered from a paused migration, otherwise false */
  599. static bool postcopy_try_recover(void)
  600. {
  601. MigrationIncomingState *mis = migration_incoming_get_current();
  602. if (mis->state == MIGRATION_STATUS_POSTCOPY_PAUSED) {
  603. /* Resumed from a paused postcopy migration */
  604. /* This should be set already in migration_incoming_setup() */
  605. assert(mis->from_src_file);
  606. /* Postcopy has standalone thread to do vm load */
  607. qemu_file_set_blocking(mis->from_src_file, true);
  608. /* Re-configure the return path */
  609. mis->to_src_file = qemu_file_get_return_path(mis->from_src_file);
  610. migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_PAUSED,
  611. MIGRATION_STATUS_POSTCOPY_RECOVER);
  612. /*
  613. * Here, we only wake up the main loading thread (while the
  614. * rest threads will still be waiting), so that we can receive
  615. * commands from source now, and answer it if needed. The
  616. * rest threads will be woken up afterwards until we are sure
  617. * that source is ready to reply to page requests.
  618. */
  619. qemu_sem_post(&mis->postcopy_pause_sem_dst);
  620. return true;
  621. }
  622. return false;
  623. }
  624. void migration_fd_process_incoming(QEMUFile *f, Error **errp)
  625. {
  626. if (!migration_incoming_setup(f, errp)) {
  627. return;
  628. }
  629. if (postcopy_try_recover()) {
  630. return;
  631. }
  632. migration_incoming_process();
  633. }
  634. /*
  635. * Returns true when we want to start a new incoming migration process,
  636. * false otherwise.
  637. */
  638. static bool migration_should_start_incoming(bool main_channel)
  639. {
  640. /* Multifd doesn't start unless all channels are established */
  641. if (migrate_use_multifd()) {
  642. return migration_has_all_channels();
  643. }
  644. /* Preempt channel only starts when the main channel is created */
  645. if (migrate_postcopy_preempt()) {
  646. return main_channel;
  647. }
  648. /*
  649. * For all the rest types of migration, we should only reach here when
  650. * it's the main channel that's being created, and we should always
  651. * proceed with this channel.
  652. */
  653. assert(main_channel);
  654. return true;
  655. }
  656. void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
  657. {
  658. MigrationIncomingState *mis = migration_incoming_get_current();
  659. Error *local_err = NULL;
  660. QEMUFile *f;
  661. bool default_channel = true;
  662. uint32_t channel_magic = 0;
  663. int ret = 0;
  664. if (migrate_use_multifd() && !migrate_postcopy_ram() &&
  665. qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) {
  666. /*
  667. * With multiple channels, it is possible that we receive channels
  668. * out of order on destination side, causing incorrect mapping of
  669. * source channels on destination side. Check channel MAGIC to
  670. * decide type of channel. Please note this is best effort, postcopy
  671. * preempt channel does not send any magic number so avoid it for
  672. * postcopy live migration. Also tls live migration already does
  673. * tls handshake while initializing main channel so with tls this
  674. * issue is not possible.
  675. */
  676. ret = migration_channel_read_peek(ioc, (void *)&channel_magic,
  677. sizeof(channel_magic), &local_err);
  678. if (ret != 0) {
  679. error_propagate(errp, local_err);
  680. return;
  681. }
  682. default_channel = (channel_magic == cpu_to_be32(QEMU_VM_FILE_MAGIC));
  683. } else {
  684. default_channel = !mis->from_src_file;
  685. }
  686. if (multifd_load_setup(errp) != 0) {
  687. error_setg(errp, "Failed to setup multifd channels");
  688. return;
  689. }
  690. if (default_channel) {
  691. f = qemu_file_new_input(ioc);
  692. if (!migration_incoming_setup(f, errp)) {
  693. return;
  694. }
  695. } else {
  696. /* Multiple connections */
  697. assert(migration_needs_multiple_sockets());
  698. if (migrate_use_multifd()) {
  699. multifd_recv_new_channel(ioc, &local_err);
  700. } else {
  701. assert(migrate_postcopy_preempt());
  702. f = qemu_file_new_input(ioc);
  703. postcopy_preempt_new_channel(mis, f);
  704. }
  705. if (local_err) {
  706. error_propagate(errp, local_err);
  707. return;
  708. }
  709. }
  710. if (migration_should_start_incoming(default_channel)) {
  711. /* If it's a recovery, we're done */
  712. if (postcopy_try_recover()) {
  713. return;
  714. }
  715. migration_incoming_process();
  716. }
  717. }
  718. /**
  719. * @migration_has_all_channels: We have received all channels that we need
  720. *
  721. * Returns true when we have got connections to all the channels that
  722. * we need for migration.
  723. */
  724. bool migration_has_all_channels(void)
  725. {
  726. MigrationIncomingState *mis = migration_incoming_get_current();
  727. if (!mis->from_src_file) {
  728. return false;
  729. }
  730. if (migrate_use_multifd()) {
  731. return multifd_recv_all_channels_created();
  732. }
  733. if (migrate_postcopy_preempt()) {
  734. return mis->postcopy_qemufile_dst != NULL;
  735. }
  736. return true;
  737. }
  738. /*
  739. * Send a 'SHUT' message on the return channel with the given value
  740. * to indicate that we've finished with the RP. Non-0 value indicates
  741. * error.
  742. */
  743. void migrate_send_rp_shut(MigrationIncomingState *mis,
  744. uint32_t value)
  745. {
  746. uint32_t buf;
  747. buf = cpu_to_be32(value);
  748. migrate_send_rp_message(mis, MIG_RP_MSG_SHUT, sizeof(buf), &buf);
  749. }
  750. /*
  751. * Send a 'PONG' message on the return channel with the given value
  752. * (normally in response to a 'PING')
  753. */
  754. void migrate_send_rp_pong(MigrationIncomingState *mis,
  755. uint32_t value)
  756. {
  757. uint32_t buf;
  758. buf = cpu_to_be32(value);
  759. migrate_send_rp_message(mis, MIG_RP_MSG_PONG, sizeof(buf), &buf);
  760. }
  761. void migrate_send_rp_recv_bitmap(MigrationIncomingState *mis,
  762. char *block_name)
  763. {
  764. char buf[512];
  765. int len;
  766. int64_t res;
  767. /*
  768. * First, we send the header part. It contains only the len of
  769. * idstr, and the idstr itself.
  770. */
  771. len = strlen(block_name);
  772. buf[0] = len;
  773. memcpy(buf + 1, block_name, len);
  774. if (mis->state != MIGRATION_STATUS_POSTCOPY_RECOVER) {
  775. error_report("%s: MSG_RP_RECV_BITMAP only used for recovery",
  776. __func__);
  777. return;
  778. }
  779. migrate_send_rp_message(mis, MIG_RP_MSG_RECV_BITMAP, len + 1, buf);
  780. /*
  781. * Next, we dump the received bitmap to the stream.
  782. *
  783. * TODO: currently we are safe since we are the only one that is
  784. * using the to_src_file handle (fault thread is still paused),
  785. * and it's ok even not taking the mutex. However the best way is
  786. * to take the lock before sending the message header, and release
  787. * the lock after sending the bitmap.
  788. */
  789. qemu_mutex_lock(&mis->rp_mutex);
  790. res = ramblock_recv_bitmap_send(mis->to_src_file, block_name);
  791. qemu_mutex_unlock(&mis->rp_mutex);
  792. trace_migrate_send_rp_recv_bitmap(block_name, res);
  793. }
  794. void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value)
  795. {
  796. uint32_t buf;
  797. buf = cpu_to_be32(value);
  798. migrate_send_rp_message(mis, MIG_RP_MSG_RESUME_ACK, sizeof(buf), &buf);
  799. }
  800. MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp)
  801. {
  802. MigrationCapabilityStatusList *head = NULL, **tail = &head;
  803. MigrationCapabilityStatus *caps;
  804. MigrationState *s = migrate_get_current();
  805. int i;
  806. for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
  807. #ifndef CONFIG_LIVE_BLOCK_MIGRATION
  808. if (i == MIGRATION_CAPABILITY_BLOCK) {
  809. continue;
  810. }
  811. #endif
  812. caps = g_malloc0(sizeof(*caps));
  813. caps->capability = i;
  814. caps->state = s->enabled_capabilities[i];
  815. QAPI_LIST_APPEND(tail, caps);
  816. }
  817. return head;
  818. }
  819. MigrationParameters *qmp_query_migrate_parameters(Error **errp)
  820. {
  821. MigrationParameters *params;
  822. MigrationState *s = migrate_get_current();
  823. /* TODO use QAPI_CLONE() instead of duplicating it inline */
  824. params = g_malloc0(sizeof(*params));
  825. params->has_compress_level = true;
  826. params->compress_level = s->parameters.compress_level;
  827. params->has_compress_threads = true;
  828. params->compress_threads = s->parameters.compress_threads;
  829. params->has_compress_wait_thread = true;
  830. params->compress_wait_thread = s->parameters.compress_wait_thread;
  831. params->has_decompress_threads = true;
  832. params->decompress_threads = s->parameters.decompress_threads;
  833. params->has_throttle_trigger_threshold = true;
  834. params->throttle_trigger_threshold = s->parameters.throttle_trigger_threshold;
  835. params->has_cpu_throttle_initial = true;
  836. params->cpu_throttle_initial = s->parameters.cpu_throttle_initial;
  837. params->has_cpu_throttle_increment = true;
  838. params->cpu_throttle_increment = s->parameters.cpu_throttle_increment;
  839. params->has_cpu_throttle_tailslow = true;
  840. params->cpu_throttle_tailslow = s->parameters.cpu_throttle_tailslow;
  841. params->tls_creds = g_strdup(s->parameters.tls_creds);
  842. params->tls_hostname = g_strdup(s->parameters.tls_hostname);
  843. params->tls_authz = g_strdup(s->parameters.tls_authz ?
  844. s->parameters.tls_authz : "");
  845. params->has_max_bandwidth = true;
  846. params->max_bandwidth = s->parameters.max_bandwidth;
  847. params->has_downtime_limit = true;
  848. params->downtime_limit = s->parameters.downtime_limit;
  849. params->has_x_checkpoint_delay = true;
  850. params->x_checkpoint_delay = s->parameters.x_checkpoint_delay;
  851. params->has_block_incremental = true;
  852. params->block_incremental = s->parameters.block_incremental;
  853. params->has_multifd_channels = true;
  854. params->multifd_channels = s->parameters.multifd_channels;
  855. params->has_multifd_compression = true;
  856. params->multifd_compression = s->parameters.multifd_compression;
  857. params->has_multifd_zlib_level = true;
  858. params->multifd_zlib_level = s->parameters.multifd_zlib_level;
  859. params->has_multifd_zstd_level = true;
  860. params->multifd_zstd_level = s->parameters.multifd_zstd_level;
  861. params->has_xbzrle_cache_size = true;
  862. params->xbzrle_cache_size = s->parameters.xbzrle_cache_size;
  863. params->has_max_postcopy_bandwidth = true;
  864. params->max_postcopy_bandwidth = s->parameters.max_postcopy_bandwidth;
  865. params->has_max_cpu_throttle = true;
  866. params->max_cpu_throttle = s->parameters.max_cpu_throttle;
  867. params->has_announce_initial = true;
  868. params->announce_initial = s->parameters.announce_initial;
  869. params->has_announce_max = true;
  870. params->announce_max = s->parameters.announce_max;
  871. params->has_announce_rounds = true;
  872. params->announce_rounds = s->parameters.announce_rounds;
  873. params->has_announce_step = true;
  874. params->announce_step = s->parameters.announce_step;
  875. if (s->parameters.has_block_bitmap_mapping) {
  876. params->has_block_bitmap_mapping = true;
  877. params->block_bitmap_mapping =
  878. QAPI_CLONE(BitmapMigrationNodeAliasList,
  879. s->parameters.block_bitmap_mapping);
  880. }
  881. return params;
  882. }
  883. void qmp_client_migrate_info(const char *protocol, const char *hostname,
  884. bool has_port, int64_t port,
  885. bool has_tls_port, int64_t tls_port,
  886. const char *cert_subject,
  887. Error **errp)
  888. {
  889. if (strcmp(protocol, "spice") == 0) {
  890. if (!qemu_using_spice(errp)) {
  891. return;
  892. }
  893. if (!has_port && !has_tls_port) {
  894. error_setg(errp, QERR_MISSING_PARAMETER, "port/tls-port");
  895. return;
  896. }
  897. if (qemu_spice.migrate_info(hostname,
  898. has_port ? port : -1,
  899. has_tls_port ? tls_port : -1,
  900. cert_subject)) {
  901. error_setg(errp, "Could not set up display for migration");
  902. return;
  903. }
  904. return;
  905. }
  906. error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "protocol", "'spice'");
  907. }
  908. AnnounceParameters *migrate_announce_params(void)
  909. {
  910. static AnnounceParameters ap;
  911. MigrationState *s = migrate_get_current();
  912. ap.initial = s->parameters.announce_initial;
  913. ap.max = s->parameters.announce_max;
  914. ap.rounds = s->parameters.announce_rounds;
  915. ap.step = s->parameters.announce_step;
  916. return &ap;
  917. }
  918. /*
  919. * Return true if we're already in the middle of a migration
  920. * (i.e. any of the active or setup states)
  921. */
  922. bool migration_is_setup_or_active(int state)
  923. {
  924. switch (state) {
  925. case MIGRATION_STATUS_ACTIVE:
  926. case MIGRATION_STATUS_POSTCOPY_ACTIVE:
  927. case MIGRATION_STATUS_POSTCOPY_PAUSED:
  928. case MIGRATION_STATUS_POSTCOPY_RECOVER:
  929. case MIGRATION_STATUS_SETUP:
  930. case MIGRATION_STATUS_PRE_SWITCHOVER:
  931. case MIGRATION_STATUS_DEVICE:
  932. case MIGRATION_STATUS_WAIT_UNPLUG:
  933. case MIGRATION_STATUS_COLO:
  934. return true;
  935. default:
  936. return false;
  937. }
  938. }
  939. bool migration_is_running(int state)
  940. {
  941. switch (state) {
  942. case MIGRATION_STATUS_ACTIVE:
  943. case MIGRATION_STATUS_POSTCOPY_ACTIVE:
  944. case MIGRATION_STATUS_POSTCOPY_PAUSED:
  945. case MIGRATION_STATUS_POSTCOPY_RECOVER:
  946. case MIGRATION_STATUS_SETUP:
  947. case MIGRATION_STATUS_PRE_SWITCHOVER:
  948. case MIGRATION_STATUS_DEVICE:
  949. case MIGRATION_STATUS_WAIT_UNPLUG:
  950. case MIGRATION_STATUS_CANCELLING:
  951. return true;
  952. default:
  953. return false;
  954. }
  955. }
  956. static bool migrate_show_downtime(MigrationState *s)
  957. {
  958. return (s->state == MIGRATION_STATUS_COMPLETED) || migration_in_postcopy();
  959. }
  960. static void populate_time_info(MigrationInfo *info, MigrationState *s)
  961. {
  962. info->has_status = true;
  963. info->has_setup_time = true;
  964. info->setup_time = s->setup_time;
  965. if (s->state == MIGRATION_STATUS_COMPLETED) {
  966. info->has_total_time = true;
  967. info->total_time = s->total_time;
  968. } else {
  969. info->has_total_time = true;
  970. info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) -
  971. s->start_time;
  972. }
  973. if (migrate_show_downtime(s)) {
  974. info->has_downtime = true;
  975. info->downtime = s->downtime;
  976. } else {
  977. info->has_expected_downtime = true;
  978. info->expected_downtime = s->expected_downtime;
  979. }
  980. }
  981. static void populate_ram_info(MigrationInfo *info, MigrationState *s)
  982. {
  983. size_t page_size = qemu_target_page_size();
  984. info->ram = g_malloc0(sizeof(*info->ram));
  985. info->ram->transferred = stat64_get(&ram_atomic_counters.transferred);
  986. info->ram->total = ram_bytes_total();
  987. info->ram->duplicate = stat64_get(&ram_atomic_counters.duplicate);
  988. /* legacy value. It is not used anymore */
  989. info->ram->skipped = 0;
  990. info->ram->normal = stat64_get(&ram_atomic_counters.normal);
  991. info->ram->normal_bytes = info->ram->normal * page_size;
  992. info->ram->mbps = s->mbps;
  993. info->ram->dirty_sync_count = ram_counters.dirty_sync_count;
  994. info->ram->dirty_sync_missed_zero_copy =
  995. ram_counters.dirty_sync_missed_zero_copy;
  996. info->ram->postcopy_requests = ram_counters.postcopy_requests;
  997. info->ram->page_size = page_size;
  998. info->ram->multifd_bytes = ram_counters.multifd_bytes;
  999. info->ram->pages_per_second = s->pages_per_second;
  1000. info->ram->precopy_bytes = ram_counters.precopy_bytes;
  1001. info->ram->downtime_bytes = ram_counters.downtime_bytes;
  1002. info->ram->postcopy_bytes = stat64_get(&ram_atomic_counters.postcopy_bytes);
  1003. if (migrate_use_xbzrle()) {
  1004. info->xbzrle_cache = g_malloc0(sizeof(*info->xbzrle_cache));
  1005. info->xbzrle_cache->cache_size = migrate_xbzrle_cache_size();
  1006. info->xbzrle_cache->bytes = xbzrle_counters.bytes;
  1007. info->xbzrle_cache->pages = xbzrle_counters.pages;
  1008. info->xbzrle_cache->cache_miss = xbzrle_counters.cache_miss;
  1009. info->xbzrle_cache->cache_miss_rate = xbzrle_counters.cache_miss_rate;
  1010. info->xbzrle_cache->encoding_rate = xbzrle_counters.encoding_rate;
  1011. info->xbzrle_cache->overflow = xbzrle_counters.overflow;
  1012. }
  1013. if (migrate_use_compression()) {
  1014. info->compression = g_malloc0(sizeof(*info->compression));
  1015. info->compression->pages = compression_counters.pages;
  1016. info->compression->busy = compression_counters.busy;
  1017. info->compression->busy_rate = compression_counters.busy_rate;
  1018. info->compression->compressed_size =
  1019. compression_counters.compressed_size;
  1020. info->compression->compression_rate =
  1021. compression_counters.compression_rate;
  1022. }
  1023. if (cpu_throttle_active()) {
  1024. info->has_cpu_throttle_percentage = true;
  1025. info->cpu_throttle_percentage = cpu_throttle_get_percentage();
  1026. }
  1027. if (s->state != MIGRATION_STATUS_COMPLETED) {
  1028. info->ram->remaining = ram_bytes_remaining();
  1029. info->ram->dirty_pages_rate = ram_counters.dirty_pages_rate;
  1030. }
  1031. }
  1032. static void populate_disk_info(MigrationInfo *info)
  1033. {
  1034. if (blk_mig_active()) {
  1035. info->disk = g_malloc0(sizeof(*info->disk));
  1036. info->disk->transferred = blk_mig_bytes_transferred();
  1037. info->disk->remaining = blk_mig_bytes_remaining();
  1038. info->disk->total = blk_mig_bytes_total();
  1039. }
  1040. }
  1041. static void fill_source_migration_info(MigrationInfo *info)
  1042. {
  1043. MigrationState *s = migrate_get_current();
  1044. int state = qatomic_read(&s->state);
  1045. GSList *cur_blocker = migration_blockers;
  1046. info->blocked_reasons = NULL;
  1047. /*
  1048. * There are two types of reasons a migration might be blocked;
  1049. * a) devices marked in VMState as non-migratable, and
  1050. * b) Explicit migration blockers
  1051. * We need to add both of them here.
  1052. */
  1053. qemu_savevm_non_migratable_list(&info->blocked_reasons);
  1054. while (cur_blocker) {
  1055. QAPI_LIST_PREPEND(info->blocked_reasons,
  1056. g_strdup(error_get_pretty(cur_blocker->data)));
  1057. cur_blocker = g_slist_next(cur_blocker);
  1058. }
  1059. info->has_blocked_reasons = info->blocked_reasons != NULL;
  1060. switch (state) {
  1061. case MIGRATION_STATUS_NONE:
  1062. /* no migration has happened ever */
  1063. /* do not overwrite destination migration status */
  1064. return;
  1065. case MIGRATION_STATUS_SETUP:
  1066. info->has_status = true;
  1067. info->has_total_time = false;
  1068. break;
  1069. case MIGRATION_STATUS_ACTIVE:
  1070. case MIGRATION_STATUS_CANCELLING:
  1071. case MIGRATION_STATUS_POSTCOPY_ACTIVE:
  1072. case MIGRATION_STATUS_PRE_SWITCHOVER:
  1073. case MIGRATION_STATUS_DEVICE:
  1074. case MIGRATION_STATUS_POSTCOPY_PAUSED:
  1075. case MIGRATION_STATUS_POSTCOPY_RECOVER:
  1076. /* TODO add some postcopy stats */
  1077. populate_time_info(info, s);
  1078. populate_ram_info(info, s);
  1079. populate_disk_info(info);
  1080. populate_vfio_info(info);
  1081. break;
  1082. case MIGRATION_STATUS_COLO:
  1083. info->has_status = true;
  1084. /* TODO: display COLO specific information (checkpoint info etc.) */
  1085. break;
  1086. case MIGRATION_STATUS_COMPLETED:
  1087. populate_time_info(info, s);
  1088. populate_ram_info(info, s);
  1089. populate_vfio_info(info);
  1090. break;
  1091. case MIGRATION_STATUS_FAILED:
  1092. info->has_status = true;
  1093. if (s->error) {
  1094. info->error_desc = g_strdup(error_get_pretty(s->error));
  1095. }
  1096. break;
  1097. case MIGRATION_STATUS_CANCELLED:
  1098. info->has_status = true;
  1099. break;
  1100. case MIGRATION_STATUS_WAIT_UNPLUG:
  1101. info->has_status = true;
  1102. break;
  1103. }
  1104. info->status = state;
  1105. }
  1106. typedef enum WriteTrackingSupport {
  1107. WT_SUPPORT_UNKNOWN = 0,
  1108. WT_SUPPORT_ABSENT,
  1109. WT_SUPPORT_AVAILABLE,
  1110. WT_SUPPORT_COMPATIBLE
  1111. } WriteTrackingSupport;
  1112. static
  1113. WriteTrackingSupport migrate_query_write_tracking(void)
  1114. {
  1115. /* Check if kernel supports required UFFD features */
  1116. if (!ram_write_tracking_available()) {
  1117. return WT_SUPPORT_ABSENT;
  1118. }
  1119. /*
  1120. * Check if current memory configuration is
  1121. * compatible with required UFFD features.
  1122. */
  1123. if (!ram_write_tracking_compatible()) {
  1124. return WT_SUPPORT_AVAILABLE;
  1125. }
  1126. return WT_SUPPORT_COMPATIBLE;
  1127. }
  1128. /**
  1129. * @migration_caps_check - check capability validity
  1130. *
  1131. * @cap_list: old capability list, array of bool
  1132. * @params: new capabilities to be applied soon
  1133. * @errp: set *errp if the check failed, with reason
  1134. *
  1135. * Returns true if check passed, otherwise false.
  1136. */
  1137. static bool migrate_caps_check(bool *cap_list,
  1138. MigrationCapabilityStatusList *params,
  1139. Error **errp)
  1140. {
  1141. MigrationCapabilityStatusList *cap;
  1142. bool old_postcopy_cap;
  1143. MigrationIncomingState *mis = migration_incoming_get_current();
  1144. old_postcopy_cap = cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM];
  1145. for (cap = params; cap; cap = cap->next) {
  1146. cap_list[cap->value->capability] = cap->value->state;
  1147. }
  1148. #ifndef CONFIG_LIVE_BLOCK_MIGRATION
  1149. if (cap_list[MIGRATION_CAPABILITY_BLOCK]) {
  1150. error_setg(errp, "QEMU compiled without old-style (blk/-b, inc/-i) "
  1151. "block migration");
  1152. error_append_hint(errp, "Use drive_mirror+NBD instead.\n");
  1153. return false;
  1154. }
  1155. #endif
  1156. #ifndef CONFIG_REPLICATION
  1157. if (cap_list[MIGRATION_CAPABILITY_X_COLO]) {
  1158. error_setg(errp, "QEMU compiled without replication module"
  1159. " can't enable COLO");
  1160. error_append_hint(errp, "Please enable replication before COLO.\n");
  1161. return false;
  1162. }
  1163. #endif
  1164. if (cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]) {
  1165. /* This check is reasonably expensive, so only when it's being
  1166. * set the first time, also it's only the destination that needs
  1167. * special support.
  1168. */
  1169. if (!old_postcopy_cap && runstate_check(RUN_STATE_INMIGRATE) &&
  1170. !postcopy_ram_supported_by_host(mis)) {
  1171. /* postcopy_ram_supported_by_host will have emitted a more
  1172. * detailed message
  1173. */
  1174. error_setg(errp, "Postcopy is not supported");
  1175. return false;
  1176. }
  1177. if (cap_list[MIGRATION_CAPABILITY_X_IGNORE_SHARED]) {
  1178. error_setg(errp, "Postcopy is not compatible with ignore-shared");
  1179. return false;
  1180. }
  1181. }
  1182. if (cap_list[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) {
  1183. WriteTrackingSupport wt_support;
  1184. int idx;
  1185. /*
  1186. * Check if 'background-snapshot' capability is supported by
  1187. * host kernel and compatible with guest memory configuration.
  1188. */
  1189. wt_support = migrate_query_write_tracking();
  1190. if (wt_support < WT_SUPPORT_AVAILABLE) {
  1191. error_setg(errp, "Background-snapshot is not supported by host kernel");
  1192. return false;
  1193. }
  1194. if (wt_support < WT_SUPPORT_COMPATIBLE) {
  1195. error_setg(errp, "Background-snapshot is not compatible "
  1196. "with guest memory configuration");
  1197. return false;
  1198. }
  1199. /*
  1200. * Check if there are any migration capabilities
  1201. * incompatible with 'background-snapshot'.
  1202. */
  1203. for (idx = 0; idx < check_caps_background_snapshot.size; idx++) {
  1204. int incomp_cap = check_caps_background_snapshot.caps[idx];
  1205. if (cap_list[incomp_cap]) {
  1206. error_setg(errp,
  1207. "Background-snapshot is not compatible with %s",
  1208. MigrationCapability_str(incomp_cap));
  1209. return false;
  1210. }
  1211. }
  1212. }
  1213. #ifdef CONFIG_LINUX
  1214. if (cap_list[MIGRATION_CAPABILITY_ZERO_COPY_SEND] &&
  1215. (!cap_list[MIGRATION_CAPABILITY_MULTIFD] ||
  1216. cap_list[MIGRATION_CAPABILITY_COMPRESS] ||
  1217. cap_list[MIGRATION_CAPABILITY_XBZRLE] ||
  1218. migrate_multifd_compression() ||
  1219. migrate_use_tls())) {
  1220. error_setg(errp,
  1221. "Zero copy only available for non-compressed non-TLS multifd migration");
  1222. return false;
  1223. }
  1224. #else
  1225. if (cap_list[MIGRATION_CAPABILITY_ZERO_COPY_SEND]) {
  1226. error_setg(errp,
  1227. "Zero copy currently only available on Linux");
  1228. return false;
  1229. }
  1230. #endif
  1231. if (cap_list[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]) {
  1232. if (!cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]) {
  1233. error_setg(errp, "Postcopy preempt requires postcopy-ram");
  1234. return false;
  1235. }
  1236. /*
  1237. * Preempt mode requires urgent pages to be sent in separate
  1238. * channel, OTOH compression logic will disorder all pages into
  1239. * different compression channels, which is not compatible with the
  1240. * preempt assumptions on channel assignments.
  1241. */
  1242. if (cap_list[MIGRATION_CAPABILITY_COMPRESS]) {
  1243. error_setg(errp, "Postcopy preempt not compatible with compress");
  1244. return false;
  1245. }
  1246. }
  1247. if (cap_list[MIGRATION_CAPABILITY_MULTIFD]) {
  1248. if (cap_list[MIGRATION_CAPABILITY_COMPRESS]) {
  1249. error_setg(errp, "Multifd is not compatible with compress");
  1250. return false;
  1251. }
  1252. }
  1253. return true;
  1254. }
  1255. static void fill_destination_migration_info(MigrationInfo *info)
  1256. {
  1257. MigrationIncomingState *mis = migration_incoming_get_current();
  1258. if (mis->socket_address_list) {
  1259. info->has_socket_address = true;
  1260. info->socket_address =
  1261. QAPI_CLONE(SocketAddressList, mis->socket_address_list);
  1262. }
  1263. switch (mis->state) {
  1264. case MIGRATION_STATUS_NONE:
  1265. return;
  1266. case MIGRATION_STATUS_SETUP:
  1267. case MIGRATION_STATUS_CANCELLING:
  1268. case MIGRATION_STATUS_CANCELLED:
  1269. case MIGRATION_STATUS_ACTIVE:
  1270. case MIGRATION_STATUS_POSTCOPY_ACTIVE:
  1271. case MIGRATION_STATUS_POSTCOPY_PAUSED:
  1272. case MIGRATION_STATUS_POSTCOPY_RECOVER:
  1273. case MIGRATION_STATUS_FAILED:
  1274. case MIGRATION_STATUS_COLO:
  1275. info->has_status = true;
  1276. break;
  1277. case MIGRATION_STATUS_COMPLETED:
  1278. info->has_status = true;
  1279. fill_destination_postcopy_migration_info(info);
  1280. break;
  1281. }
  1282. info->status = mis->state;
  1283. }
  1284. MigrationInfo *qmp_query_migrate(Error **errp)
  1285. {
  1286. MigrationInfo *info = g_malloc0(sizeof(*info));
  1287. fill_destination_migration_info(info);
  1288. fill_source_migration_info(info);
  1289. return info;
  1290. }
  1291. void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
  1292. Error **errp)
  1293. {
  1294. MigrationState *s = migrate_get_current();
  1295. MigrationCapabilityStatusList *cap;
  1296. bool cap_list[MIGRATION_CAPABILITY__MAX];
  1297. if (migration_is_running(s->state)) {
  1298. error_setg(errp, QERR_MIGRATION_ACTIVE);
  1299. return;
  1300. }
  1301. memcpy(cap_list, s->enabled_capabilities, sizeof(cap_list));
  1302. if (!migrate_caps_check(cap_list, params, errp)) {
  1303. return;
  1304. }
  1305. for (cap = params; cap; cap = cap->next) {
  1306. s->enabled_capabilities[cap->value->capability] = cap->value->state;
  1307. }
  1308. }
  1309. /*
  1310. * Check whether the parameters are valid. Error will be put into errp
  1311. * (if provided). Return true if valid, otherwise false.
  1312. */
  1313. static bool migrate_params_check(MigrationParameters *params, Error **errp)
  1314. {
  1315. if (params->has_compress_level &&
  1316. (params->compress_level > 9)) {
  1317. error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level",
  1318. "a value between 0 and 9");
  1319. return false;
  1320. }
  1321. if (params->has_compress_threads && (params->compress_threads < 1)) {
  1322. error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
  1323. "compress_threads",
  1324. "a value between 1 and 255");
  1325. return false;
  1326. }
  1327. if (params->has_decompress_threads && (params->decompress_threads < 1)) {
  1328. error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
  1329. "decompress_threads",
  1330. "a value between 1 and 255");
  1331. return false;
  1332. }
  1333. if (params->has_throttle_trigger_threshold &&
  1334. (params->throttle_trigger_threshold < 1 ||
  1335. params->throttle_trigger_threshold > 100)) {
  1336. error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
  1337. "throttle_trigger_threshold",
  1338. "an integer in the range of 1 to 100");
  1339. return false;
  1340. }
  1341. if (params->has_cpu_throttle_initial &&
  1342. (params->cpu_throttle_initial < 1 ||
  1343. params->cpu_throttle_initial > 99)) {
  1344. error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
  1345. "cpu_throttle_initial",
  1346. "an integer in the range of 1 to 99");
  1347. return false;
  1348. }
  1349. if (params->has_cpu_throttle_increment &&
  1350. (params->cpu_throttle_increment < 1 ||
  1351. params->cpu_throttle_increment > 99)) {
  1352. error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
  1353. "cpu_throttle_increment",
  1354. "an integer in the range of 1 to 99");
  1355. return false;
  1356. }
  1357. if (params->has_max_bandwidth && (params->max_bandwidth > SIZE_MAX)) {
  1358. error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
  1359. "max_bandwidth",
  1360. "an integer in the range of 0 to "stringify(SIZE_MAX)
  1361. " bytes/second");
  1362. return false;
  1363. }
  1364. if (params->has_downtime_limit &&
  1365. (params->downtime_limit > MAX_MIGRATE_DOWNTIME)) {
  1366. error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
  1367. "downtime_limit",
  1368. "an integer in the range of 0 to "
  1369. stringify(MAX_MIGRATE_DOWNTIME)" ms");
  1370. return false;
  1371. }
  1372. /* x_checkpoint_delay is now always positive */
  1373. if (params->has_multifd_channels && (params->multifd_channels < 1)) {
  1374. error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
  1375. "multifd_channels",
  1376. "a value between 1 and 255");
  1377. return false;
  1378. }
  1379. if (params->has_multifd_zlib_level &&
  1380. (params->multifd_zlib_level > 9)) {
  1381. error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zlib_level",
  1382. "a value between 0 and 9");
  1383. return false;
  1384. }
  1385. if (params->has_multifd_zstd_level &&
  1386. (params->multifd_zstd_level > 20)) {
  1387. error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zstd_level",
  1388. "a value between 0 and 20");
  1389. return false;
  1390. }
  1391. if (params->has_xbzrle_cache_size &&
  1392. (params->xbzrle_cache_size < qemu_target_page_size() ||
  1393. !is_power_of_2(params->xbzrle_cache_size))) {
  1394. error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
  1395. "xbzrle_cache_size",
  1396. "a power of two no less than the target page size");
  1397. return false;
  1398. }
  1399. if (params->has_max_cpu_throttle &&
  1400. (params->max_cpu_throttle < params->cpu_throttle_initial ||
  1401. params->max_cpu_throttle > 99)) {
  1402. error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
  1403. "max_cpu_throttle",
  1404. "an integer in the range of cpu_throttle_initial to 99");
  1405. return false;
  1406. }
  1407. if (params->has_announce_initial &&
  1408. params->announce_initial > 100000) {
  1409. error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
  1410. "announce_initial",
  1411. "a value between 0 and 100000");
  1412. return false;
  1413. }
  1414. if (params->has_announce_max &&
  1415. params->announce_max > 100000) {
  1416. error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
  1417. "announce_max",
  1418. "a value between 0 and 100000");
  1419. return false;
  1420. }
  1421. if (params->has_announce_rounds &&
  1422. params->announce_rounds > 1000) {
  1423. error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
  1424. "announce_rounds",
  1425. "a value between 0 and 1000");
  1426. return false;
  1427. }
  1428. if (params->has_announce_step &&
  1429. (params->announce_step < 1 ||
  1430. params->announce_step > 10000)) {
  1431. error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
  1432. "announce_step",
  1433. "a value between 0 and 10000");
  1434. return false;
  1435. }
  1436. if (params->has_block_bitmap_mapping &&
  1437. !check_dirty_bitmap_mig_alias_map(params->block_bitmap_mapping, errp)) {
  1438. error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: ");
  1439. return false;
  1440. }
  1441. #ifdef CONFIG_LINUX
  1442. if (migrate_use_zero_copy_send() &&
  1443. ((params->has_multifd_compression && params->multifd_compression) ||
  1444. (params->tls_creds && *params->tls_creds))) {
  1445. error_setg(errp,
  1446. "Zero copy only available for non-compressed non-TLS multifd migration");
  1447. return false;
  1448. }
  1449. #endif
  1450. return true;
  1451. }
  1452. static void migrate_params_test_apply(MigrateSetParameters *params,
  1453. MigrationParameters *dest)
  1454. {
  1455. *dest = migrate_get_current()->parameters;
  1456. /* TODO use QAPI_CLONE() instead of duplicating it inline */
  1457. if (params->has_compress_level) {
  1458. dest->compress_level = params->compress_level;
  1459. }
  1460. if (params->has_compress_threads) {
  1461. dest->compress_threads = params->compress_threads;
  1462. }
  1463. if (params->has_compress_wait_thread) {
  1464. dest->compress_wait_thread = params->compress_wait_thread;
  1465. }
  1466. if (params->has_decompress_threads) {
  1467. dest->decompress_threads = params->decompress_threads;
  1468. }
  1469. if (params->has_throttle_trigger_threshold) {
  1470. dest->throttle_trigger_threshold = params->throttle_trigger_threshold;
  1471. }
  1472. if (params->has_cpu_throttle_initial) {
  1473. dest->cpu_throttle_initial = params->cpu_throttle_initial;
  1474. }
  1475. if (params->has_cpu_throttle_increment) {
  1476. dest->cpu_throttle_increment = params->cpu_throttle_increment;
  1477. }
  1478. if (params->has_cpu_throttle_tailslow) {
  1479. dest->cpu_throttle_tailslow = params->cpu_throttle_tailslow;
  1480. }
  1481. if (params->tls_creds) {
  1482. assert(params->tls_creds->type == QTYPE_QSTRING);
  1483. dest->tls_creds = params->tls_creds->u.s;
  1484. }
  1485. if (params->tls_hostname) {
  1486. assert(params->tls_hostname->type == QTYPE_QSTRING);
  1487. dest->tls_hostname = params->tls_hostname->u.s;
  1488. }
  1489. if (params->has_max_bandwidth) {
  1490. dest->max_bandwidth = params->max_bandwidth;
  1491. }
  1492. if (params->has_downtime_limit) {
  1493. dest->downtime_limit = params->downtime_limit;
  1494. }
  1495. if (params->has_x_checkpoint_delay) {
  1496. dest->x_checkpoint_delay = params->x_checkpoint_delay;
  1497. }
  1498. if (params->has_block_incremental) {
  1499. dest->block_incremental = params->block_incremental;
  1500. }
  1501. if (params->has_multifd_channels) {
  1502. dest->multifd_channels = params->multifd_channels;
  1503. }
  1504. if (params->has_multifd_compression) {
  1505. dest->multifd_compression = params->multifd_compression;
  1506. }
  1507. if (params->has_xbzrle_cache_size) {
  1508. dest->xbzrle_cache_size = params->xbzrle_cache_size;
  1509. }
  1510. if (params->has_max_postcopy_bandwidth) {
  1511. dest->max_postcopy_bandwidth = params->max_postcopy_bandwidth;
  1512. }
  1513. if (params->has_max_cpu_throttle) {
  1514. dest->max_cpu_throttle = params->max_cpu_throttle;
  1515. }
  1516. if (params->has_announce_initial) {
  1517. dest->announce_initial = params->announce_initial;
  1518. }
  1519. if (params->has_announce_max) {
  1520. dest->announce_max = params->announce_max;
  1521. }
  1522. if (params->has_announce_rounds) {
  1523. dest->announce_rounds = params->announce_rounds;
  1524. }
  1525. if (params->has_announce_step) {
  1526. dest->announce_step = params->announce_step;
  1527. }
  1528. if (params->has_block_bitmap_mapping) {
  1529. dest->has_block_bitmap_mapping = true;
  1530. dest->block_bitmap_mapping = params->block_bitmap_mapping;
  1531. }
  1532. }
  1533. static void migrate_params_apply(MigrateSetParameters *params, Error **errp)
  1534. {
  1535. MigrationState *s = migrate_get_current();
  1536. /* TODO use QAPI_CLONE() instead of duplicating it inline */
  1537. if (params->has_compress_level) {
  1538. s->parameters.compress_level = params->compress_level;
  1539. }
  1540. if (params->has_compress_threads) {
  1541. s->parameters.compress_threads = params->compress_threads;
  1542. }
  1543. if (params->has_compress_wait_thread) {
  1544. s->parameters.compress_wait_thread = params->compress_wait_thread;
  1545. }
  1546. if (params->has_decompress_threads) {
  1547. s->parameters.decompress_threads = params->decompress_threads;
  1548. }
  1549. if (params->has_throttle_trigger_threshold) {
  1550. s->parameters.throttle_trigger_threshold = params->throttle_trigger_threshold;
  1551. }
  1552. if (params->has_cpu_throttle_initial) {
  1553. s->parameters.cpu_throttle_initial = params->cpu_throttle_initial;
  1554. }
  1555. if (params->has_cpu_throttle_increment) {
  1556. s->parameters.cpu_throttle_increment = params->cpu_throttle_increment;
  1557. }
  1558. if (params->has_cpu_throttle_tailslow) {
  1559. s->parameters.cpu_throttle_tailslow = params->cpu_throttle_tailslow;
  1560. }
  1561. if (params->tls_creds) {
  1562. g_free(s->parameters.tls_creds);
  1563. assert(params->tls_creds->type == QTYPE_QSTRING);
  1564. s->parameters.tls_creds = g_strdup(params->tls_creds->u.s);
  1565. }
  1566. if (params->tls_hostname) {
  1567. g_free(s->parameters.tls_hostname);
  1568. assert(params->tls_hostname->type == QTYPE_QSTRING);
  1569. s->parameters.tls_hostname = g_strdup(params->tls_hostname->u.s);
  1570. }
  1571. if (params->tls_authz) {
  1572. g_free(s->parameters.tls_authz);
  1573. assert(params->tls_authz->type == QTYPE_QSTRING);
  1574. s->parameters.tls_authz = g_strdup(params->tls_authz->u.s);
  1575. }
  1576. if (params->has_max_bandwidth) {
  1577. s->parameters.max_bandwidth = params->max_bandwidth;
  1578. if (s->to_dst_file && !migration_in_postcopy()) {
  1579. qemu_file_set_rate_limit(s->to_dst_file,
  1580. s->parameters.max_bandwidth / XFER_LIMIT_RATIO);
  1581. }
  1582. }
  1583. if (params->has_downtime_limit) {
  1584. s->parameters.downtime_limit = params->downtime_limit;
  1585. }
  1586. if (params->has_x_checkpoint_delay) {
  1587. s->parameters.x_checkpoint_delay = params->x_checkpoint_delay;
  1588. if (migration_in_colo_state()) {
  1589. colo_checkpoint_notify(s);
  1590. }
  1591. }
  1592. if (params->has_block_incremental) {
  1593. s->parameters.block_incremental = params->block_incremental;
  1594. }
  1595. if (params->has_multifd_channels) {
  1596. s->parameters.multifd_channels = params->multifd_channels;
  1597. }
  1598. if (params->has_multifd_compression) {
  1599. s->parameters.multifd_compression = params->multifd_compression;
  1600. }
  1601. if (params->has_xbzrle_cache_size) {
  1602. s->parameters.xbzrle_cache_size = params->xbzrle_cache_size;
  1603. xbzrle_cache_resize(params->xbzrle_cache_size, errp);
  1604. }
  1605. if (params->has_max_postcopy_bandwidth) {
  1606. s->parameters.max_postcopy_bandwidth = params->max_postcopy_bandwidth;
  1607. if (s->to_dst_file && migration_in_postcopy()) {
  1608. qemu_file_set_rate_limit(s->to_dst_file,
  1609. s->parameters.max_postcopy_bandwidth / XFER_LIMIT_RATIO);
  1610. }
  1611. }
  1612. if (params->has_max_cpu_throttle) {
  1613. s->parameters.max_cpu_throttle = params->max_cpu_throttle;
  1614. }
  1615. if (params->has_announce_initial) {
  1616. s->parameters.announce_initial = params->announce_initial;
  1617. }
  1618. if (params->has_announce_max) {
  1619. s->parameters.announce_max = params->announce_max;
  1620. }
  1621. if (params->has_announce_rounds) {
  1622. s->parameters.announce_rounds = params->announce_rounds;
  1623. }
  1624. if (params->has_announce_step) {
  1625. s->parameters.announce_step = params->announce_step;
  1626. }
  1627. if (params->has_block_bitmap_mapping) {
  1628. qapi_free_BitmapMigrationNodeAliasList(
  1629. s->parameters.block_bitmap_mapping);
  1630. s->parameters.has_block_bitmap_mapping = true;
  1631. s->parameters.block_bitmap_mapping =
  1632. QAPI_CLONE(BitmapMigrationNodeAliasList,
  1633. params->block_bitmap_mapping);
  1634. }
  1635. }
  1636. void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp)
  1637. {
  1638. MigrationParameters tmp;
  1639. /* TODO Rewrite "" to null instead */
  1640. if (params->tls_creds
  1641. && params->tls_creds->type == QTYPE_QNULL) {
  1642. qobject_unref(params->tls_creds->u.n);
  1643. params->tls_creds->type = QTYPE_QSTRING;
  1644. params->tls_creds->u.s = strdup("");
  1645. }
  1646. /* TODO Rewrite "" to null instead */
  1647. if (params->tls_hostname
  1648. && params->tls_hostname->type == QTYPE_QNULL) {
  1649. qobject_unref(params->tls_hostname->u.n);
  1650. params->tls_hostname->type = QTYPE_QSTRING;
  1651. params->tls_hostname->u.s = strdup("");
  1652. }
  1653. migrate_params_test_apply(params, &tmp);
  1654. if (!migrate_params_check(&tmp, errp)) {
  1655. /* Invalid parameter */
  1656. return;
  1657. }
  1658. migrate_params_apply(params, errp);
  1659. }
  1660. void qmp_migrate_start_postcopy(Error **errp)
  1661. {
  1662. MigrationState *s = migrate_get_current();
  1663. if (!migrate_postcopy()) {
  1664. error_setg(errp, "Enable postcopy with migrate_set_capability before"
  1665. " the start of migration");
  1666. return;
  1667. }
  1668. if (s->state == MIGRATION_STATUS_NONE) {
  1669. error_setg(errp, "Postcopy must be started after migration has been"
  1670. " started");
  1671. return;
  1672. }
  1673. /*
  1674. * we don't error if migration has finished since that would be racy
  1675. * with issuing this command.
  1676. */
  1677. qatomic_set(&s->start_postcopy, true);
  1678. }
  1679. /* shared migration helpers */
  1680. void migrate_set_state(int *state, int old_state, int new_state)
  1681. {
  1682. assert(new_state < MIGRATION_STATUS__MAX);
  1683. if (qatomic_cmpxchg(state, old_state, new_state) == old_state) {
  1684. trace_migrate_set_state(MigrationStatus_str(new_state));
  1685. migrate_generate_event(new_state);
  1686. }
  1687. }
  1688. static MigrationCapabilityStatus *migrate_cap_add(MigrationCapability index,
  1689. bool state)
  1690. {
  1691. MigrationCapabilityStatus *cap;
  1692. cap = g_new0(MigrationCapabilityStatus, 1);
  1693. cap->capability = index;
  1694. cap->state = state;
  1695. return cap;
  1696. }
  1697. void migrate_set_block_enabled(bool value, Error **errp)
  1698. {
  1699. MigrationCapabilityStatusList *cap = NULL;
  1700. QAPI_LIST_PREPEND(cap, migrate_cap_add(MIGRATION_CAPABILITY_BLOCK, value));
  1701. qmp_migrate_set_capabilities(cap, errp);
  1702. qapi_free_MigrationCapabilityStatusList(cap);
  1703. }
  1704. static void migrate_set_block_incremental(MigrationState *s, bool value)
  1705. {
  1706. s->parameters.block_incremental = value;
  1707. }
  1708. static void block_cleanup_parameters(MigrationState *s)
  1709. {
  1710. if (s->must_remove_block_options) {
  1711. /* setting to false can never fail */
  1712. migrate_set_block_enabled(false, &error_abort);
  1713. migrate_set_block_incremental(s, false);
  1714. s->must_remove_block_options = false;
  1715. }
  1716. }
  1717. static void migrate_fd_cleanup(MigrationState *s)
  1718. {
  1719. qemu_bh_delete(s->cleanup_bh);
  1720. s->cleanup_bh = NULL;
  1721. g_free(s->hostname);
  1722. s->hostname = NULL;
  1723. json_writer_free(s->vmdesc);
  1724. s->vmdesc = NULL;
  1725. qemu_savevm_state_cleanup();
  1726. if (s->to_dst_file) {
  1727. QEMUFile *tmp;
  1728. trace_migrate_fd_cleanup();
  1729. qemu_mutex_unlock_iothread();
  1730. if (s->migration_thread_running) {
  1731. qemu_thread_join(&s->thread);
  1732. s->migration_thread_running = false;
  1733. }
  1734. qemu_mutex_lock_iothread();
  1735. multifd_save_cleanup();
  1736. qemu_mutex_lock(&s->qemu_file_lock);
  1737. tmp = s->to_dst_file;
  1738. s->to_dst_file = NULL;
  1739. qemu_mutex_unlock(&s->qemu_file_lock);
  1740. /*
  1741. * Close the file handle without the lock to make sure the
  1742. * critical section won't block for long.
  1743. */
  1744. migration_ioc_unregister_yank_from_file(tmp);
  1745. qemu_fclose(tmp);
  1746. }
  1747. if (s->postcopy_qemufile_src) {
  1748. migration_ioc_unregister_yank_from_file(s->postcopy_qemufile_src);
  1749. qemu_fclose(s->postcopy_qemufile_src);
  1750. s->postcopy_qemufile_src = NULL;
  1751. }
  1752. assert(!migration_is_active(s));
  1753. if (s->state == MIGRATION_STATUS_CANCELLING) {
  1754. migrate_set_state(&s->state, MIGRATION_STATUS_CANCELLING,
  1755. MIGRATION_STATUS_CANCELLED);
  1756. }
  1757. if (s->error) {
  1758. /* It is used on info migrate. We can't free it */
  1759. error_report_err(error_copy(s->error));
  1760. }
  1761. notifier_list_notify(&migration_state_notifiers, s);
  1762. block_cleanup_parameters(s);
  1763. yank_unregister_instance(MIGRATION_YANK_INSTANCE);
  1764. }
  1765. static void migrate_fd_cleanup_schedule(MigrationState *s)
  1766. {
  1767. /*
  1768. * Ref the state for bh, because it may be called when
  1769. * there're already no other refs
  1770. */
  1771. object_ref(OBJECT(s));
  1772. qemu_bh_schedule(s->cleanup_bh);
  1773. }
  1774. static void migrate_fd_cleanup_bh(void *opaque)
  1775. {
  1776. MigrationState *s = opaque;
  1777. migrate_fd_cleanup(s);
  1778. object_unref(OBJECT(s));
  1779. }
  1780. void migrate_set_error(MigrationState *s, const Error *error)
  1781. {
  1782. QEMU_LOCK_GUARD(&s->error_mutex);
  1783. if (!s->error) {
  1784. s->error = error_copy(error);
  1785. }
  1786. }
  1787. static void migrate_error_free(MigrationState *s)
  1788. {
  1789. QEMU_LOCK_GUARD(&s->error_mutex);
  1790. if (s->error) {
  1791. error_free(s->error);
  1792. s->error = NULL;
  1793. }
  1794. }
  1795. void migrate_fd_error(MigrationState *s, const Error *error)
  1796. {
  1797. trace_migrate_fd_error(error_get_pretty(error));
  1798. assert(s->to_dst_file == NULL);
  1799. migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
  1800. MIGRATION_STATUS_FAILED);
  1801. migrate_set_error(s, error);
  1802. }
  1803. static void migrate_fd_cancel(MigrationState *s)
  1804. {
  1805. int old_state ;
  1806. QEMUFile *f = migrate_get_current()->to_dst_file;
  1807. trace_migrate_fd_cancel();
  1808. WITH_QEMU_LOCK_GUARD(&s->qemu_file_lock) {
  1809. if (s->rp_state.from_dst_file) {
  1810. /* shutdown the rp socket, so causing the rp thread to shutdown */
  1811. qemu_file_shutdown(s->rp_state.from_dst_file);
  1812. }
  1813. }
  1814. do {
  1815. old_state = s->state;
  1816. if (!migration_is_running(old_state)) {
  1817. break;
  1818. }
  1819. /* If the migration is paused, kick it out of the pause */
  1820. if (old_state == MIGRATION_STATUS_PRE_SWITCHOVER) {
  1821. qemu_sem_post(&s->pause_sem);
  1822. }
  1823. migrate_set_state(&s->state, old_state, MIGRATION_STATUS_CANCELLING);
  1824. } while (s->state != MIGRATION_STATUS_CANCELLING);
  1825. /*
  1826. * If we're unlucky the migration code might be stuck somewhere in a
  1827. * send/write while the network has failed and is waiting to timeout;
  1828. * if we've got shutdown(2) available then we can force it to quit.
  1829. * The outgoing qemu file gets closed in migrate_fd_cleanup that is
  1830. * called in a bh, so there is no race against this cancel.
  1831. */
  1832. if (s->state == MIGRATION_STATUS_CANCELLING && f) {
  1833. qemu_file_shutdown(f);
  1834. }
  1835. if (s->state == MIGRATION_STATUS_CANCELLING && s->block_inactive) {
  1836. Error *local_err = NULL;
  1837. bdrv_activate_all(&local_err);
  1838. if (local_err) {
  1839. error_report_err(local_err);
  1840. } else {
  1841. s->block_inactive = false;
  1842. }
  1843. }
  1844. }
  1845. void add_migration_state_change_notifier(Notifier *notify)
  1846. {
  1847. notifier_list_add(&migration_state_notifiers, notify);
  1848. }
  1849. void remove_migration_state_change_notifier(Notifier *notify)
  1850. {
  1851. notifier_remove(notify);
  1852. }
  1853. bool migration_in_setup(MigrationState *s)
  1854. {
  1855. return s->state == MIGRATION_STATUS_SETUP;
  1856. }
  1857. bool migration_has_finished(MigrationState *s)
  1858. {
  1859. return s->state == MIGRATION_STATUS_COMPLETED;
  1860. }
  1861. bool migration_has_failed(MigrationState *s)
  1862. {
  1863. return (s->state == MIGRATION_STATUS_CANCELLED ||
  1864. s->state == MIGRATION_STATUS_FAILED);
  1865. }
  1866. bool migration_in_postcopy(void)
  1867. {
  1868. MigrationState *s = migrate_get_current();
  1869. switch (s->state) {
  1870. case MIGRATION_STATUS_POSTCOPY_ACTIVE:
  1871. case MIGRATION_STATUS_POSTCOPY_PAUSED:
  1872. case MIGRATION_STATUS_POSTCOPY_RECOVER:
  1873. return true;
  1874. default:
  1875. return false;
  1876. }
  1877. }
  1878. bool migration_in_postcopy_after_devices(MigrationState *s)
  1879. {
  1880. return migration_in_postcopy() && s->postcopy_after_devices;
  1881. }
  1882. bool migration_in_incoming_postcopy(void)
  1883. {
  1884. PostcopyState ps = postcopy_state_get();
  1885. return ps >= POSTCOPY_INCOMING_DISCARD && ps < POSTCOPY_INCOMING_END;
  1886. }
  1887. bool migration_incoming_postcopy_advised(void)
  1888. {
  1889. PostcopyState ps = postcopy_state_get();
  1890. return ps >= POSTCOPY_INCOMING_ADVISE && ps < POSTCOPY_INCOMING_END;
  1891. }
  1892. bool migration_in_bg_snapshot(void)
  1893. {
  1894. MigrationState *s = migrate_get_current();
  1895. return migrate_background_snapshot() &&
  1896. migration_is_setup_or_active(s->state);
  1897. }
  1898. bool migration_is_idle(void)
  1899. {
  1900. MigrationState *s = current_migration;
  1901. if (!s) {
  1902. return true;
  1903. }
  1904. switch (s->state) {
  1905. case MIGRATION_STATUS_NONE:
  1906. case MIGRATION_STATUS_CANCELLED:
  1907. case MIGRATION_STATUS_COMPLETED:
  1908. case MIGRATION_STATUS_FAILED:
  1909. return true;
  1910. case MIGRATION_STATUS_SETUP:
  1911. case MIGRATION_STATUS_CANCELLING:
  1912. case MIGRATION_STATUS_ACTIVE:
  1913. case MIGRATION_STATUS_POSTCOPY_ACTIVE:
  1914. case MIGRATION_STATUS_COLO:
  1915. case MIGRATION_STATUS_PRE_SWITCHOVER:
  1916. case MIGRATION_STATUS_DEVICE:
  1917. case MIGRATION_STATUS_WAIT_UNPLUG:
  1918. return false;
  1919. case MIGRATION_STATUS__MAX:
  1920. g_assert_not_reached();
  1921. }
  1922. return false;
  1923. }
  1924. bool migration_is_active(MigrationState *s)
  1925. {
  1926. return (s->state == MIGRATION_STATUS_ACTIVE ||
  1927. s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
  1928. }
  1929. void migrate_init(MigrationState *s)
  1930. {
  1931. /*
  1932. * Reinitialise all migration state, except
  1933. * parameters/capabilities that the user set, and
  1934. * locks.
  1935. */
  1936. s->cleanup_bh = 0;
  1937. s->vm_start_bh = 0;
  1938. s->to_dst_file = NULL;
  1939. s->state = MIGRATION_STATUS_NONE;
  1940. s->rp_state.from_dst_file = NULL;
  1941. s->rp_state.error = false;
  1942. s->mbps = 0.0;
  1943. s->pages_per_second = 0.0;
  1944. s->downtime = 0;
  1945. s->expected_downtime = 0;
  1946. s->setup_time = 0;
  1947. s->start_postcopy = false;
  1948. s->postcopy_after_devices = false;
  1949. s->migration_thread_running = false;
  1950. error_free(s->error);
  1951. s->error = NULL;
  1952. s->hostname = NULL;
  1953. migrate_set_state(&s->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP);
  1954. s->start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
  1955. s->total_time = 0;
  1956. s->vm_was_running = false;
  1957. s->iteration_initial_bytes = 0;
  1958. s->threshold_size = 0;
  1959. }
  1960. int migrate_add_blocker_internal(Error *reason, Error **errp)
  1961. {
  1962. /* Snapshots are similar to migrations, so check RUN_STATE_SAVE_VM too. */
  1963. if (runstate_check(RUN_STATE_SAVE_VM) || !migration_is_idle()) {
  1964. error_propagate_prepend(errp, error_copy(reason),
  1965. "disallowing migration blocker "
  1966. "(migration/snapshot in progress) for: ");
  1967. return -EBUSY;
  1968. }
  1969. migration_blockers = g_slist_prepend(migration_blockers, reason);
  1970. return 0;
  1971. }
  1972. int migrate_add_blocker(Error *reason, Error **errp)
  1973. {
  1974. if (only_migratable) {
  1975. error_propagate_prepend(errp, error_copy(reason),
  1976. "disallowing migration blocker "
  1977. "(--only-migratable) for: ");
  1978. return -EACCES;
  1979. }
  1980. return migrate_add_blocker_internal(reason, errp);
  1981. }
  1982. void migrate_del_blocker(Error *reason)
  1983. {
  1984. migration_blockers = g_slist_remove(migration_blockers, reason);
  1985. }
  1986. void qmp_migrate_incoming(const char *uri, Error **errp)
  1987. {
  1988. Error *local_err = NULL;
  1989. static bool once = true;
  1990. if (!once) {
  1991. error_setg(errp, "The incoming migration has already been started");
  1992. return;
  1993. }
  1994. if (!runstate_check(RUN_STATE_INMIGRATE)) {
  1995. error_setg(errp, "'-incoming' was not specified on the command line");
  1996. return;
  1997. }
  1998. if (!yank_register_instance(MIGRATION_YANK_INSTANCE, errp)) {
  1999. return;
  2000. }
  2001. qemu_start_incoming_migration(uri, &local_err);
  2002. if (local_err) {
  2003. yank_unregister_instance(MIGRATION_YANK_INSTANCE);
  2004. error_propagate(errp, local_err);
  2005. return;
  2006. }
  2007. once = false;
  2008. }
  2009. void qmp_migrate_recover(const char *uri, Error **errp)
  2010. {
  2011. MigrationIncomingState *mis = migration_incoming_get_current();
  2012. /*
  2013. * Don't even bother to use ERRP_GUARD() as it _must_ always be set by
  2014. * callers (no one should ignore a recover failure); if there is, it's a
  2015. * programming error.
  2016. */
  2017. assert(errp);
  2018. if (mis->state != MIGRATION_STATUS_POSTCOPY_PAUSED) {
  2019. error_setg(errp, "Migrate recover can only be run "
  2020. "when postcopy is paused.");
  2021. return;
  2022. }
  2023. /* If there's an existing transport, release it */
  2024. migration_incoming_transport_cleanup(mis);
  2025. /*
  2026. * Note that this call will never start a real migration; it will
  2027. * only re-setup the migration stream and poke existing migration
  2028. * to continue using that newly established channel.
  2029. */
  2030. qemu_start_incoming_migration(uri, errp);
  2031. }
  2032. void qmp_migrate_pause(Error **errp)
  2033. {
  2034. MigrationState *ms = migrate_get_current();
  2035. MigrationIncomingState *mis = migration_incoming_get_current();
  2036. int ret;
  2037. if (ms->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
  2038. /* Source side, during postcopy */
  2039. qemu_mutex_lock(&ms->qemu_file_lock);
  2040. ret = qemu_file_shutdown(ms->to_dst_file);
  2041. qemu_mutex_unlock(&ms->qemu_file_lock);
  2042. if (ret) {
  2043. error_setg(errp, "Failed to pause source migration");
  2044. }
  2045. return;
  2046. }
  2047. if (mis->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
  2048. ret = qemu_file_shutdown(mis->from_src_file);
  2049. if (ret) {
  2050. error_setg(errp, "Failed to pause destination migration");
  2051. }
  2052. return;
  2053. }
  2054. error_setg(errp, "migrate-pause is currently only supported "
  2055. "during postcopy-active state");
  2056. }
  2057. bool migration_is_blocked(Error **errp)
  2058. {
  2059. if (qemu_savevm_state_blocked(errp)) {
  2060. return true;
  2061. }
  2062. if (migration_blockers) {
  2063. error_propagate(errp, error_copy(migration_blockers->data));
  2064. return true;
  2065. }
  2066. return false;
  2067. }
  2068. /* Returns true if continue to migrate, or false if error detected */
  2069. static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc,
  2070. bool resume, Error **errp)
  2071. {
  2072. Error *local_err = NULL;
  2073. if (resume) {
  2074. if (s->state != MIGRATION_STATUS_POSTCOPY_PAUSED) {
  2075. error_setg(errp, "Cannot resume if there is no "
  2076. "paused migration");
  2077. return false;
  2078. }
  2079. /*
  2080. * Postcopy recovery won't work well with release-ram
  2081. * capability since release-ram will drop the page buffer as
  2082. * long as the page is put into the send buffer. So if there
  2083. * is a network failure happened, any page buffers that have
  2084. * not yet reached the destination VM but have already been
  2085. * sent from the source VM will be lost forever. Let's refuse
  2086. * the client from resuming such a postcopy migration.
  2087. * Luckily release-ram was designed to only be used when src
  2088. * and destination VMs are on the same host, so it should be
  2089. * fine.
  2090. */
  2091. if (migrate_release_ram()) {
  2092. error_setg(errp, "Postcopy recovery cannot work "
  2093. "when release-ram capability is set");
  2094. return false;
  2095. }
  2096. /* This is a resume, skip init status */
  2097. return true;
  2098. }
  2099. if (migration_is_running(s->state)) {
  2100. error_setg(errp, QERR_MIGRATION_ACTIVE);
  2101. return false;
  2102. }
  2103. if (runstate_check(RUN_STATE_INMIGRATE)) {
  2104. error_setg(errp, "Guest is waiting for an incoming migration");
  2105. return false;
  2106. }
  2107. if (runstate_check(RUN_STATE_POSTMIGRATE)) {
  2108. error_setg(errp, "Can't migrate the vm that was paused due to "
  2109. "previous migration");
  2110. return false;
  2111. }
  2112. if (migration_is_blocked(errp)) {
  2113. return false;
  2114. }
  2115. if (blk || blk_inc) {
  2116. if (migrate_colo_enabled()) {
  2117. error_setg(errp, "No disk migration is required in COLO mode");
  2118. return false;
  2119. }
  2120. if (migrate_use_block() || migrate_use_block_incremental()) {
  2121. error_setg(errp, "Command options are incompatible with "
  2122. "current migration capabilities");
  2123. return false;
  2124. }
  2125. migrate_set_block_enabled(true, &local_err);
  2126. if (local_err) {
  2127. error_propagate(errp, local_err);
  2128. return false;
  2129. }
  2130. s->must_remove_block_options = true;
  2131. }
  2132. if (blk_inc) {
  2133. migrate_set_block_incremental(s, true);
  2134. }
  2135. migrate_init(s);
  2136. /*
  2137. * set ram_counters compression_counters memory to zero for a
  2138. * new migration
  2139. */
  2140. memset(&ram_counters, 0, sizeof(ram_counters));
  2141. memset(&compression_counters, 0, sizeof(compression_counters));
  2142. return true;
  2143. }
  2144. void qmp_migrate(const char *uri, bool has_blk, bool blk,
  2145. bool has_inc, bool inc, bool has_detach, bool detach,
  2146. bool has_resume, bool resume, Error **errp)
  2147. {
  2148. Error *local_err = NULL;
  2149. MigrationState *s = migrate_get_current();
  2150. const char *p = NULL;
  2151. /* URI is not suitable for migration? */
  2152. if (!migration_channels_and_uri_compatible(uri, errp)) {
  2153. return;
  2154. }
  2155. if (!migrate_prepare(s, has_blk && blk, has_inc && inc,
  2156. has_resume && resume, errp)) {
  2157. /* Error detected, put into errp */
  2158. return;
  2159. }
  2160. if (!(has_resume && resume)) {
  2161. if (!yank_register_instance(MIGRATION_YANK_INSTANCE, errp)) {
  2162. return;
  2163. }
  2164. }
  2165. if (strstart(uri, "tcp:", &p) ||
  2166. strstart(uri, "unix:", NULL) ||
  2167. strstart(uri, "vsock:", NULL)) {
  2168. socket_start_outgoing_migration(s, p ? p : uri, &local_err);
  2169. #ifdef CONFIG_RDMA
  2170. } else if (strstart(uri, "rdma:", &p)) {
  2171. rdma_start_outgoing_migration(s, p, &local_err);
  2172. #endif
  2173. } else if (strstart(uri, "exec:", &p)) {
  2174. exec_start_outgoing_migration(s, p, &local_err);
  2175. } else if (strstart(uri, "fd:", &p)) {
  2176. fd_start_outgoing_migration(s, p, &local_err);
  2177. } else {
  2178. if (!(has_resume && resume)) {
  2179. yank_unregister_instance(MIGRATION_YANK_INSTANCE);
  2180. }
  2181. error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "uri",
  2182. "a valid migration protocol");
  2183. migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
  2184. MIGRATION_STATUS_FAILED);
  2185. block_cleanup_parameters(s);
  2186. return;
  2187. }
  2188. if (local_err) {
  2189. if (!(has_resume && resume)) {
  2190. yank_unregister_instance(MIGRATION_YANK_INSTANCE);
  2191. }
  2192. migrate_fd_error(s, local_err);
  2193. error_propagate(errp, local_err);
  2194. return;
  2195. }
  2196. }
  2197. void qmp_migrate_cancel(Error **errp)
  2198. {
  2199. migration_cancel(NULL);
  2200. }
  2201. void qmp_migrate_continue(MigrationStatus state, Error **errp)
  2202. {
  2203. MigrationState *s = migrate_get_current();
  2204. if (s->state != state) {
  2205. error_setg(errp, "Migration not in expected state: %s",
  2206. MigrationStatus_str(s->state));
  2207. return;
  2208. }
  2209. qemu_sem_post(&s->pause_sem);
  2210. }
  2211. bool migrate_release_ram(void)
  2212. {
  2213. MigrationState *s;
  2214. s = migrate_get_current();
  2215. return s->enabled_capabilities[MIGRATION_CAPABILITY_RELEASE_RAM];
  2216. }
  2217. bool migrate_postcopy_ram(void)
  2218. {
  2219. MigrationState *s;
  2220. s = migrate_get_current();
  2221. return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM];
  2222. }
  2223. bool migrate_postcopy(void)
  2224. {
  2225. return migrate_postcopy_ram() || migrate_dirty_bitmaps();
  2226. }
  2227. bool migrate_auto_converge(void)
  2228. {
  2229. MigrationState *s;
  2230. s = migrate_get_current();
  2231. return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE];
  2232. }
  2233. bool migrate_zero_blocks(void)
  2234. {
  2235. MigrationState *s;
  2236. s = migrate_get_current();
  2237. return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS];
  2238. }
  2239. bool migrate_postcopy_blocktime(void)
  2240. {
  2241. MigrationState *s;
  2242. s = migrate_get_current();
  2243. return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME];
  2244. }
  2245. bool migrate_use_compression(void)
  2246. {
  2247. MigrationState *s;
  2248. s = migrate_get_current();
  2249. return s->enabled_capabilities[MIGRATION_CAPABILITY_COMPRESS];
  2250. }
  2251. int migrate_compress_level(void)
  2252. {
  2253. MigrationState *s;
  2254. s = migrate_get_current();
  2255. return s->parameters.compress_level;
  2256. }
  2257. int migrate_compress_threads(void)
  2258. {
  2259. MigrationState *s;
  2260. s = migrate_get_current();
  2261. return s->parameters.compress_threads;
  2262. }
  2263. int migrate_compress_wait_thread(void)
  2264. {
  2265. MigrationState *s;
  2266. s = migrate_get_current();
  2267. return s->parameters.compress_wait_thread;
  2268. }
  2269. int migrate_decompress_threads(void)
  2270. {
  2271. MigrationState *s;
  2272. s = migrate_get_current();
  2273. return s->parameters.decompress_threads;
  2274. }
  2275. bool migrate_dirty_bitmaps(void)
  2276. {
  2277. MigrationState *s;
  2278. s = migrate_get_current();
  2279. return s->enabled_capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS];
  2280. }
  2281. bool migrate_ignore_shared(void)
  2282. {
  2283. MigrationState *s;
  2284. s = migrate_get_current();
  2285. return s->enabled_capabilities[MIGRATION_CAPABILITY_X_IGNORE_SHARED];
  2286. }
  2287. bool migrate_validate_uuid(void)
  2288. {
  2289. MigrationState *s;
  2290. s = migrate_get_current();
  2291. return s->enabled_capabilities[MIGRATION_CAPABILITY_VALIDATE_UUID];
  2292. }
  2293. bool migrate_use_events(void)
  2294. {
  2295. MigrationState *s;
  2296. s = migrate_get_current();
  2297. return s->enabled_capabilities[MIGRATION_CAPABILITY_EVENTS];
  2298. }
  2299. bool migrate_use_multifd(void)
  2300. {
  2301. MigrationState *s;
  2302. s = migrate_get_current();
  2303. return s->enabled_capabilities[MIGRATION_CAPABILITY_MULTIFD];
  2304. }
  2305. bool migrate_pause_before_switchover(void)
  2306. {
  2307. MigrationState *s;
  2308. s = migrate_get_current();
  2309. return s->enabled_capabilities[
  2310. MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER];
  2311. }
  2312. int migrate_multifd_channels(void)
  2313. {
  2314. MigrationState *s;
  2315. s = migrate_get_current();
  2316. return s->parameters.multifd_channels;
  2317. }
  2318. MultiFDCompression migrate_multifd_compression(void)
  2319. {
  2320. MigrationState *s;
  2321. s = migrate_get_current();
  2322. assert(s->parameters.multifd_compression < MULTIFD_COMPRESSION__MAX);
  2323. return s->parameters.multifd_compression;
  2324. }
  2325. int migrate_multifd_zlib_level(void)
  2326. {
  2327. MigrationState *s;
  2328. s = migrate_get_current();
  2329. return s->parameters.multifd_zlib_level;
  2330. }
  2331. int migrate_multifd_zstd_level(void)
  2332. {
  2333. MigrationState *s;
  2334. s = migrate_get_current();
  2335. return s->parameters.multifd_zstd_level;
  2336. }
  2337. #ifdef CONFIG_LINUX
  2338. bool migrate_use_zero_copy_send(void)
  2339. {
  2340. MigrationState *s;
  2341. s = migrate_get_current();
  2342. return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND];
  2343. }
  2344. #endif
  2345. int migrate_use_tls(void)
  2346. {
  2347. MigrationState *s;
  2348. s = migrate_get_current();
  2349. return s->parameters.tls_creds && *s->parameters.tls_creds;
  2350. }
  2351. int migrate_use_xbzrle(void)
  2352. {
  2353. MigrationState *s;
  2354. s = migrate_get_current();
  2355. return s->enabled_capabilities[MIGRATION_CAPABILITY_XBZRLE];
  2356. }
  2357. uint64_t migrate_xbzrle_cache_size(void)
  2358. {
  2359. MigrationState *s;
  2360. s = migrate_get_current();
  2361. return s->parameters.xbzrle_cache_size;
  2362. }
  2363. static int64_t migrate_max_postcopy_bandwidth(void)
  2364. {
  2365. MigrationState *s;
  2366. s = migrate_get_current();
  2367. return s->parameters.max_postcopy_bandwidth;
  2368. }
  2369. bool migrate_use_block(void)
  2370. {
  2371. MigrationState *s;
  2372. s = migrate_get_current();
  2373. return s->enabled_capabilities[MIGRATION_CAPABILITY_BLOCK];
  2374. }
  2375. bool migrate_use_return_path(void)
  2376. {
  2377. MigrationState *s;
  2378. s = migrate_get_current();
  2379. return s->enabled_capabilities[MIGRATION_CAPABILITY_RETURN_PATH];
  2380. }
  2381. bool migrate_use_block_incremental(void)
  2382. {
  2383. MigrationState *s;
  2384. s = migrate_get_current();
  2385. return s->parameters.block_incremental;
  2386. }
  2387. bool migrate_background_snapshot(void)
  2388. {
  2389. MigrationState *s;
  2390. s = migrate_get_current();
  2391. return s->enabled_capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT];
  2392. }
  2393. bool migrate_postcopy_preempt(void)
  2394. {
  2395. MigrationState *s;
  2396. s = migrate_get_current();
  2397. return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT];
  2398. }
  2399. /* migration thread support */
  2400. /*
  2401. * Something bad happened to the RP stream, mark an error
  2402. * The caller shall print or trace something to indicate why
  2403. */
  2404. static void mark_source_rp_bad(MigrationState *s)
  2405. {
  2406. s->rp_state.error = true;
  2407. }
  2408. static struct rp_cmd_args {
  2409. ssize_t len; /* -1 = variable */
  2410. const char *name;
  2411. } rp_cmd_args[] = {
  2412. [MIG_RP_MSG_INVALID] = { .len = -1, .name = "INVALID" },
  2413. [MIG_RP_MSG_SHUT] = { .len = 4, .name = "SHUT" },
  2414. [MIG_RP_MSG_PONG] = { .len = 4, .name = "PONG" },
  2415. [MIG_RP_MSG_REQ_PAGES] = { .len = 12, .name = "REQ_PAGES" },
  2416. [MIG_RP_MSG_REQ_PAGES_ID] = { .len = -1, .name = "REQ_PAGES_ID" },
  2417. [MIG_RP_MSG_RECV_BITMAP] = { .len = -1, .name = "RECV_BITMAP" },
  2418. [MIG_RP_MSG_RESUME_ACK] = { .len = 4, .name = "RESUME_ACK" },
  2419. [MIG_RP_MSG_MAX] = { .len = -1, .name = "MAX" },
  2420. };
  2421. /*
  2422. * Process a request for pages received on the return path,
  2423. * We're allowed to send more than requested (e.g. to round to our page size)
  2424. * and we don't need to send pages that have already been sent.
  2425. */
  2426. static void migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname,
  2427. ram_addr_t start, size_t len)
  2428. {
  2429. long our_host_ps = qemu_real_host_page_size();
  2430. trace_migrate_handle_rp_req_pages(rbname, start, len);
  2431. /*
  2432. * Since we currently insist on matching page sizes, just sanity check
  2433. * we're being asked for whole host pages.
  2434. */
  2435. if (!QEMU_IS_ALIGNED(start, our_host_ps) ||
  2436. !QEMU_IS_ALIGNED(len, our_host_ps)) {
  2437. error_report("%s: Misaligned page request, start: " RAM_ADDR_FMT
  2438. " len: %zd", __func__, start, len);
  2439. mark_source_rp_bad(ms);
  2440. return;
  2441. }
  2442. if (ram_save_queue_pages(rbname, start, len)) {
  2443. mark_source_rp_bad(ms);
  2444. }
  2445. }
  2446. /* Return true to retry, false to quit */
  2447. static bool postcopy_pause_return_path_thread(MigrationState *s)
  2448. {
  2449. trace_postcopy_pause_return_path();
  2450. qemu_sem_wait(&s->postcopy_pause_rp_sem);
  2451. trace_postcopy_pause_return_path_continued();
  2452. return true;
  2453. }
  2454. static int migrate_handle_rp_recv_bitmap(MigrationState *s, char *block_name)
  2455. {
  2456. RAMBlock *block = qemu_ram_block_by_name(block_name);
  2457. if (!block) {
  2458. error_report("%s: invalid block name '%s'", __func__, block_name);
  2459. return -EINVAL;
  2460. }
  2461. /* Fetch the received bitmap and refresh the dirty bitmap */
  2462. return ram_dirty_bitmap_reload(s, block);
  2463. }
  2464. static int migrate_handle_rp_resume_ack(MigrationState *s, uint32_t value)
  2465. {
  2466. trace_source_return_path_thread_resume_ack(value);
  2467. if (value != MIGRATION_RESUME_ACK_VALUE) {
  2468. error_report("%s: illegal resume_ack value %"PRIu32,
  2469. __func__, value);
  2470. return -1;
  2471. }
  2472. /* Now both sides are active. */
  2473. migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_RECOVER,
  2474. MIGRATION_STATUS_POSTCOPY_ACTIVE);
  2475. /* Notify send thread that time to continue send pages */
  2476. qemu_sem_post(&s->rp_state.rp_sem);
  2477. return 0;
  2478. }
  2479. /*
  2480. * Release ms->rp_state.from_dst_file (and postcopy_qemufile_src if
  2481. * existed) in a safe way.
  2482. */
  2483. static void migration_release_dst_files(MigrationState *ms)
  2484. {
  2485. QEMUFile *file;
  2486. WITH_QEMU_LOCK_GUARD(&ms->qemu_file_lock) {
  2487. /*
  2488. * Reset the from_dst_file pointer first before releasing it, as we
  2489. * can't block within lock section
  2490. */
  2491. file = ms->rp_state.from_dst_file;
  2492. ms->rp_state.from_dst_file = NULL;
  2493. }
  2494. /*
  2495. * Do the same to postcopy fast path socket too if there is. No
  2496. * locking needed because this qemufile should only be managed by
  2497. * return path thread.
  2498. */
  2499. if (ms->postcopy_qemufile_src) {
  2500. migration_ioc_unregister_yank_from_file(ms->postcopy_qemufile_src);
  2501. qemu_file_shutdown(ms->postcopy_qemufile_src);
  2502. qemu_fclose(ms->postcopy_qemufile_src);
  2503. ms->postcopy_qemufile_src = NULL;
  2504. }
  2505. qemu_fclose(file);
  2506. }
  2507. /*
  2508. * Handles messages sent on the return path towards the source VM
  2509. *
  2510. */
  2511. static void *source_return_path_thread(void *opaque)
  2512. {
  2513. MigrationState *ms = opaque;
  2514. QEMUFile *rp = ms->rp_state.from_dst_file;
  2515. uint16_t header_len, header_type;
  2516. uint8_t buf[512];
  2517. uint32_t tmp32, sibling_error;
  2518. ram_addr_t start = 0; /* =0 to silence warning */
  2519. size_t len = 0, expected_len;
  2520. int res;
  2521. trace_source_return_path_thread_entry();
  2522. rcu_register_thread();
  2523. retry:
  2524. while (!ms->rp_state.error && !qemu_file_get_error(rp) &&
  2525. migration_is_setup_or_active(ms->state)) {
  2526. trace_source_return_path_thread_loop_top();
  2527. header_type = qemu_get_be16(rp);
  2528. header_len = qemu_get_be16(rp);
  2529. if (qemu_file_get_error(rp)) {
  2530. mark_source_rp_bad(ms);
  2531. goto out;
  2532. }
  2533. if (header_type >= MIG_RP_MSG_MAX ||
  2534. header_type == MIG_RP_MSG_INVALID) {
  2535. error_report("RP: Received invalid message 0x%04x length 0x%04x",
  2536. header_type, header_len);
  2537. mark_source_rp_bad(ms);
  2538. goto out;
  2539. }
  2540. if ((rp_cmd_args[header_type].len != -1 &&
  2541. header_len != rp_cmd_args[header_type].len) ||
  2542. header_len > sizeof(buf)) {
  2543. error_report("RP: Received '%s' message (0x%04x) with"
  2544. "incorrect length %d expecting %zu",
  2545. rp_cmd_args[header_type].name, header_type, header_len,
  2546. (size_t)rp_cmd_args[header_type].len);
  2547. mark_source_rp_bad(ms);
  2548. goto out;
  2549. }
  2550. /* We know we've got a valid header by this point */
  2551. res = qemu_get_buffer(rp, buf, header_len);
  2552. if (res != header_len) {
  2553. error_report("RP: Failed reading data for message 0x%04x"
  2554. " read %d expected %d",
  2555. header_type, res, header_len);
  2556. mark_source_rp_bad(ms);
  2557. goto out;
  2558. }
  2559. /* OK, we have the message and the data */
  2560. switch (header_type) {
  2561. case MIG_RP_MSG_SHUT:
  2562. sibling_error = ldl_be_p(buf);
  2563. trace_source_return_path_thread_shut(sibling_error);
  2564. if (sibling_error) {
  2565. error_report("RP: Sibling indicated error %d", sibling_error);
  2566. mark_source_rp_bad(ms);
  2567. }
  2568. /*
  2569. * We'll let the main thread deal with closing the RP
  2570. * we could do a shutdown(2) on it, but we're the only user
  2571. * anyway, so there's nothing gained.
  2572. */
  2573. goto out;
  2574. case MIG_RP_MSG_PONG:
  2575. tmp32 = ldl_be_p(buf);
  2576. trace_source_return_path_thread_pong(tmp32);
  2577. qemu_sem_post(&ms->rp_state.rp_pong_acks);
  2578. break;
  2579. case MIG_RP_MSG_REQ_PAGES:
  2580. start = ldq_be_p(buf);
  2581. len = ldl_be_p(buf + 8);
  2582. migrate_handle_rp_req_pages(ms, NULL, start, len);
  2583. break;
  2584. case MIG_RP_MSG_REQ_PAGES_ID:
  2585. expected_len = 12 + 1; /* header + termination */
  2586. if (header_len >= expected_len) {
  2587. start = ldq_be_p(buf);
  2588. len = ldl_be_p(buf + 8);
  2589. /* Now we expect an idstr */
  2590. tmp32 = buf[12]; /* Length of the following idstr */
  2591. buf[13 + tmp32] = '\0';
  2592. expected_len += tmp32;
  2593. }
  2594. if (header_len != expected_len) {
  2595. error_report("RP: Req_Page_id with length %d expecting %zd",
  2596. header_len, expected_len);
  2597. mark_source_rp_bad(ms);
  2598. goto out;
  2599. }
  2600. migrate_handle_rp_req_pages(ms, (char *)&buf[13], start, len);
  2601. break;
  2602. case MIG_RP_MSG_RECV_BITMAP:
  2603. if (header_len < 1) {
  2604. error_report("%s: missing block name", __func__);
  2605. mark_source_rp_bad(ms);
  2606. goto out;
  2607. }
  2608. /* Format: len (1B) + idstr (<255B). This ends the idstr. */
  2609. buf[buf[0] + 1] = '\0';
  2610. if (migrate_handle_rp_recv_bitmap(ms, (char *)(buf + 1))) {
  2611. mark_source_rp_bad(ms);
  2612. goto out;
  2613. }
  2614. break;
  2615. case MIG_RP_MSG_RESUME_ACK:
  2616. tmp32 = ldl_be_p(buf);
  2617. if (migrate_handle_rp_resume_ack(ms, tmp32)) {
  2618. mark_source_rp_bad(ms);
  2619. goto out;
  2620. }
  2621. break;
  2622. default:
  2623. break;
  2624. }
  2625. }
  2626. out:
  2627. res = qemu_file_get_error(rp);
  2628. if (res) {
  2629. if (res && migration_in_postcopy()) {
  2630. /*
  2631. * Maybe there is something we can do: it looks like a
  2632. * network down issue, and we pause for a recovery.
  2633. */
  2634. migration_release_dst_files(ms);
  2635. rp = NULL;
  2636. if (postcopy_pause_return_path_thread(ms)) {
  2637. /*
  2638. * Reload rp, reset the rest. Referencing it is safe since
  2639. * it's reset only by us above, or when migration completes
  2640. */
  2641. rp = ms->rp_state.from_dst_file;
  2642. ms->rp_state.error = false;
  2643. goto retry;
  2644. }
  2645. }
  2646. trace_source_return_path_thread_bad_end();
  2647. mark_source_rp_bad(ms);
  2648. }
  2649. trace_source_return_path_thread_end();
  2650. migration_release_dst_files(ms);
  2651. rcu_unregister_thread();
  2652. return NULL;
  2653. }
  2654. static int open_return_path_on_source(MigrationState *ms,
  2655. bool create_thread)
  2656. {
  2657. ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->to_dst_file);
  2658. if (!ms->rp_state.from_dst_file) {
  2659. return -1;
  2660. }
  2661. trace_open_return_path_on_source();
  2662. if (!create_thread) {
  2663. /* We're done */
  2664. return 0;
  2665. }
  2666. qemu_thread_create(&ms->rp_state.rp_thread, "return path",
  2667. source_return_path_thread, ms, QEMU_THREAD_JOINABLE);
  2668. ms->rp_state.rp_thread_created = true;
  2669. trace_open_return_path_on_source_continue();
  2670. return 0;
  2671. }
  2672. /* Returns 0 if the RP was ok, otherwise there was an error on the RP */
  2673. static int await_return_path_close_on_source(MigrationState *ms)
  2674. {
  2675. /*
  2676. * If this is a normal exit then the destination will send a SHUT and the
  2677. * rp_thread will exit, however if there's an error we need to cause
  2678. * it to exit.
  2679. */
  2680. if (qemu_file_get_error(ms->to_dst_file) && ms->rp_state.from_dst_file) {
  2681. /*
  2682. * shutdown(2), if we have it, will cause it to unblock if it's stuck
  2683. * waiting for the destination.
  2684. */
  2685. qemu_file_shutdown(ms->rp_state.from_dst_file);
  2686. mark_source_rp_bad(ms);
  2687. }
  2688. trace_await_return_path_close_on_source_joining();
  2689. qemu_thread_join(&ms->rp_state.rp_thread);
  2690. ms->rp_state.rp_thread_created = false;
  2691. trace_await_return_path_close_on_source_close();
  2692. return ms->rp_state.error;
  2693. }
  2694. static inline void
  2695. migration_wait_main_channel(MigrationState *ms)
  2696. {
  2697. /* Wait until one PONG message received */
  2698. qemu_sem_wait(&ms->rp_state.rp_pong_acks);
  2699. }
  2700. /*
  2701. * Switch from normal iteration to postcopy
  2702. * Returns non-0 on error
  2703. */
  2704. static int postcopy_start(MigrationState *ms)
  2705. {
  2706. int ret;
  2707. QIOChannelBuffer *bioc;
  2708. QEMUFile *fb;
  2709. int64_t time_at_stop = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
  2710. int64_t bandwidth = migrate_max_postcopy_bandwidth();
  2711. bool restart_block = false;
  2712. int cur_state = MIGRATION_STATUS_ACTIVE;
  2713. if (migrate_postcopy_preempt()) {
  2714. migration_wait_main_channel(ms);
  2715. if (postcopy_preempt_establish_channel(ms)) {
  2716. migrate_set_state(&ms->state, ms->state, MIGRATION_STATUS_FAILED);
  2717. return -1;
  2718. }
  2719. }
  2720. if (!migrate_pause_before_switchover()) {
  2721. migrate_set_state(&ms->state, MIGRATION_STATUS_ACTIVE,
  2722. MIGRATION_STATUS_POSTCOPY_ACTIVE);
  2723. }
  2724. trace_postcopy_start();
  2725. qemu_mutex_lock_iothread();
  2726. trace_postcopy_start_set_run();
  2727. qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
  2728. global_state_store();
  2729. ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
  2730. if (ret < 0) {
  2731. goto fail;
  2732. }
  2733. ret = migration_maybe_pause(ms, &cur_state,
  2734. MIGRATION_STATUS_POSTCOPY_ACTIVE);
  2735. if (ret < 0) {
  2736. goto fail;
  2737. }
  2738. ret = bdrv_inactivate_all();
  2739. if (ret < 0) {
  2740. goto fail;
  2741. }
  2742. restart_block = true;
  2743. /*
  2744. * Cause any non-postcopiable, but iterative devices to
  2745. * send out their final data.
  2746. */
  2747. qemu_savevm_state_complete_precopy(ms->to_dst_file, true, false);
  2748. /*
  2749. * in Finish migrate and with the io-lock held everything should
  2750. * be quiet, but we've potentially still got dirty pages and we
  2751. * need to tell the destination to throw any pages it's already received
  2752. * that are dirty
  2753. */
  2754. if (migrate_postcopy_ram()) {
  2755. ram_postcopy_send_discard_bitmap(ms);
  2756. }
  2757. /*
  2758. * send rest of state - note things that are doing postcopy
  2759. * will notice we're in POSTCOPY_ACTIVE and not actually
  2760. * wrap their state up here
  2761. */
  2762. /* 0 max-postcopy-bandwidth means unlimited */
  2763. if (!bandwidth) {
  2764. qemu_file_set_rate_limit(ms->to_dst_file, INT64_MAX);
  2765. } else {
  2766. qemu_file_set_rate_limit(ms->to_dst_file, bandwidth / XFER_LIMIT_RATIO);
  2767. }
  2768. if (migrate_postcopy_ram()) {
  2769. /* Ping just for debugging, helps line traces up */
  2770. qemu_savevm_send_ping(ms->to_dst_file, 2);
  2771. }
  2772. /*
  2773. * While loading the device state we may trigger page transfer
  2774. * requests and the fd must be free to process those, and thus
  2775. * the destination must read the whole device state off the fd before
  2776. * it starts processing it. Unfortunately the ad-hoc migration format
  2777. * doesn't allow the destination to know the size to read without fully
  2778. * parsing it through each devices load-state code (especially the open
  2779. * coded devices that use get/put).
  2780. * So we wrap the device state up in a package with a length at the start;
  2781. * to do this we use a qemu_buf to hold the whole of the device state.
  2782. */
  2783. bioc = qio_channel_buffer_new(4096);
  2784. qio_channel_set_name(QIO_CHANNEL(bioc), "migration-postcopy-buffer");
  2785. fb = qemu_file_new_output(QIO_CHANNEL(bioc));
  2786. object_unref(OBJECT(bioc));
  2787. /*
  2788. * Make sure the receiver can get incoming pages before we send the rest
  2789. * of the state
  2790. */
  2791. qemu_savevm_send_postcopy_listen(fb);
  2792. qemu_savevm_state_complete_precopy(fb, false, false);
  2793. if (migrate_postcopy_ram()) {
  2794. qemu_savevm_send_ping(fb, 3);
  2795. }
  2796. qemu_savevm_send_postcopy_run(fb);
  2797. /* <><> end of stuff going into the package */
  2798. /* Last point of recovery; as soon as we send the package the destination
  2799. * can open devices and potentially start running.
  2800. * Lets just check again we've not got any errors.
  2801. */
  2802. ret = qemu_file_get_error(ms->to_dst_file);
  2803. if (ret) {
  2804. error_report("postcopy_start: Migration stream errored (pre package)");
  2805. goto fail_closefb;
  2806. }
  2807. restart_block = false;
  2808. /* Now send that blob */
  2809. if (qemu_savevm_send_packaged(ms->to_dst_file, bioc->data, bioc->usage)) {
  2810. goto fail_closefb;
  2811. }
  2812. qemu_fclose(fb);
  2813. /* Send a notify to give a chance for anything that needs to happen
  2814. * at the transition to postcopy and after the device state; in particular
  2815. * spice needs to trigger a transition now
  2816. */
  2817. ms->postcopy_after_devices = true;
  2818. notifier_list_notify(&migration_state_notifiers, ms);
  2819. ms->downtime = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - time_at_stop;
  2820. qemu_mutex_unlock_iothread();
  2821. if (migrate_postcopy_ram()) {
  2822. /*
  2823. * Although this ping is just for debug, it could potentially be
  2824. * used for getting a better measurement of downtime at the source.
  2825. */
  2826. qemu_savevm_send_ping(ms->to_dst_file, 4);
  2827. }
  2828. if (migrate_release_ram()) {
  2829. ram_postcopy_migrated_memory_release(ms);
  2830. }
  2831. ret = qemu_file_get_error(ms->to_dst_file);
  2832. if (ret) {
  2833. error_report("postcopy_start: Migration stream errored");
  2834. migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
  2835. MIGRATION_STATUS_FAILED);
  2836. }
  2837. trace_postcopy_preempt_enabled(migrate_postcopy_preempt());
  2838. return ret;
  2839. fail_closefb:
  2840. qemu_fclose(fb);
  2841. fail:
  2842. migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
  2843. MIGRATION_STATUS_FAILED);
  2844. if (restart_block) {
  2845. /* A failure happened early enough that we know the destination hasn't
  2846. * accessed block devices, so we're safe to recover.
  2847. */
  2848. Error *local_err = NULL;
  2849. bdrv_activate_all(&local_err);
  2850. if (local_err) {
  2851. error_report_err(local_err);
  2852. }
  2853. }
  2854. qemu_mutex_unlock_iothread();
  2855. return -1;
  2856. }
  2857. /**
  2858. * migration_maybe_pause: Pause if required to by
  2859. * migrate_pause_before_switchover called with the iothread locked
  2860. * Returns: 0 on success
  2861. */
  2862. static int migration_maybe_pause(MigrationState *s,
  2863. int *current_active_state,
  2864. int new_state)
  2865. {
  2866. if (!migrate_pause_before_switchover()) {
  2867. return 0;
  2868. }
  2869. /* Since leaving this state is not atomic with posting the semaphore
  2870. * it's possible that someone could have issued multiple migrate_continue
  2871. * and the semaphore is incorrectly positive at this point;
  2872. * the docs say it's undefined to reinit a semaphore that's already
  2873. * init'd, so use timedwait to eat up any existing posts.
  2874. */
  2875. while (qemu_sem_timedwait(&s->pause_sem, 1) == 0) {
  2876. /* This block intentionally left blank */
  2877. }
  2878. /*
  2879. * If the migration is cancelled when it is in the completion phase,
  2880. * the migration state is set to MIGRATION_STATUS_CANCELLING.
  2881. * So we don't need to wait a semaphore, otherwise we would always
  2882. * wait for the 'pause_sem' semaphore.
  2883. */
  2884. if (s->state != MIGRATION_STATUS_CANCELLING) {
  2885. qemu_mutex_unlock_iothread();
  2886. migrate_set_state(&s->state, *current_active_state,
  2887. MIGRATION_STATUS_PRE_SWITCHOVER);
  2888. qemu_sem_wait(&s->pause_sem);
  2889. migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER,
  2890. new_state);
  2891. *current_active_state = new_state;
  2892. qemu_mutex_lock_iothread();
  2893. }
  2894. return s->state == new_state ? 0 : -EINVAL;
  2895. }
  2896. /**
  2897. * migration_completion: Used by migration_thread when there's not much left.
  2898. * The caller 'breaks' the loop when this returns.
  2899. *
  2900. * @s: Current migration state
  2901. */
  2902. static void migration_completion(MigrationState *s)
  2903. {
  2904. int ret;
  2905. int current_active_state = s->state;
  2906. if (s->state == MIGRATION_STATUS_ACTIVE) {
  2907. qemu_mutex_lock_iothread();
  2908. s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
  2909. qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
  2910. s->vm_was_running = runstate_is_running();
  2911. ret = global_state_store();
  2912. if (!ret) {
  2913. bool inactivate = !migrate_colo_enabled();
  2914. ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
  2915. trace_migration_completion_vm_stop(ret);
  2916. if (ret >= 0) {
  2917. ret = migration_maybe_pause(s, &current_active_state,
  2918. MIGRATION_STATUS_DEVICE);
  2919. }
  2920. if (ret >= 0) {
  2921. qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX);
  2922. ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false,
  2923. inactivate);
  2924. }
  2925. if (inactivate && ret >= 0) {
  2926. s->block_inactive = true;
  2927. }
  2928. }
  2929. qemu_mutex_unlock_iothread();
  2930. if (ret < 0) {
  2931. goto fail;
  2932. }
  2933. } else if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
  2934. trace_migration_completion_postcopy_end();
  2935. qemu_mutex_lock_iothread();
  2936. qemu_savevm_state_complete_postcopy(s->to_dst_file);
  2937. qemu_mutex_unlock_iothread();
  2938. /*
  2939. * Shutdown the postcopy fast path thread. This is only needed
  2940. * when dest QEMU binary is old (7.1/7.2). QEMU 8.0+ doesn't need
  2941. * this.
  2942. */
  2943. if (migrate_postcopy_preempt() && s->preempt_pre_7_2) {
  2944. postcopy_preempt_shutdown_file(s);
  2945. }
  2946. trace_migration_completion_postcopy_end_after_complete();
  2947. } else {
  2948. goto fail;
  2949. }
  2950. /*
  2951. * If rp was opened we must clean up the thread before
  2952. * cleaning everything else up (since if there are no failures
  2953. * it will wait for the destination to send it's status in
  2954. * a SHUT command).
  2955. */
  2956. if (s->rp_state.rp_thread_created) {
  2957. int rp_error;
  2958. trace_migration_return_path_end_before();
  2959. rp_error = await_return_path_close_on_source(s);
  2960. trace_migration_return_path_end_after(rp_error);
  2961. if (rp_error) {
  2962. goto fail_invalidate;
  2963. }
  2964. }
  2965. if (qemu_file_get_error(s->to_dst_file)) {
  2966. trace_migration_completion_file_err();
  2967. goto fail_invalidate;
  2968. }
  2969. if (migrate_colo_enabled() && s->state == MIGRATION_STATUS_ACTIVE) {
  2970. /* COLO does not support postcopy */
  2971. migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
  2972. MIGRATION_STATUS_COLO);
  2973. } else {
  2974. migrate_set_state(&s->state, current_active_state,
  2975. MIGRATION_STATUS_COMPLETED);
  2976. }
  2977. return;
  2978. fail_invalidate:
  2979. /* If not doing postcopy, vm_start() will be called: let's regain
  2980. * control on images.
  2981. */
  2982. if (s->state == MIGRATION_STATUS_ACTIVE ||
  2983. s->state == MIGRATION_STATUS_DEVICE) {
  2984. Error *local_err = NULL;
  2985. qemu_mutex_lock_iothread();
  2986. bdrv_activate_all(&local_err);
  2987. if (local_err) {
  2988. error_report_err(local_err);
  2989. } else {
  2990. s->block_inactive = false;
  2991. }
  2992. qemu_mutex_unlock_iothread();
  2993. }
  2994. fail:
  2995. migrate_set_state(&s->state, current_active_state,
  2996. MIGRATION_STATUS_FAILED);
  2997. }
  2998. /**
  2999. * bg_migration_completion: Used by bg_migration_thread when after all the
  3000. * RAM has been saved. The caller 'breaks' the loop when this returns.
  3001. *
  3002. * @s: Current migration state
  3003. */
  3004. static void bg_migration_completion(MigrationState *s)
  3005. {
  3006. int current_active_state = s->state;
  3007. /*
  3008. * Stop tracking RAM writes - un-protect memory, un-register UFFD
  3009. * memory ranges, flush kernel wait queues and wake up threads
  3010. * waiting for write fault to be resolved.
  3011. */
  3012. ram_write_tracking_stop();
  3013. if (s->state == MIGRATION_STATUS_ACTIVE) {
  3014. /*
  3015. * By this moment we have RAM content saved into the migration stream.
  3016. * The next step is to flush the non-RAM content (device state)
  3017. * right after the ram content. The device state has been stored into
  3018. * the temporary buffer before RAM saving started.
  3019. */
  3020. qemu_put_buffer(s->to_dst_file, s->bioc->data, s->bioc->usage);
  3021. qemu_fflush(s->to_dst_file);
  3022. } else if (s->state == MIGRATION_STATUS_CANCELLING) {
  3023. goto fail;
  3024. }
  3025. if (qemu_file_get_error(s->to_dst_file)) {
  3026. trace_migration_completion_file_err();
  3027. goto fail;
  3028. }
  3029. migrate_set_state(&s->state, current_active_state,
  3030. MIGRATION_STATUS_COMPLETED);
  3031. return;
  3032. fail:
  3033. migrate_set_state(&s->state, current_active_state,
  3034. MIGRATION_STATUS_FAILED);
  3035. }
  3036. bool migrate_colo_enabled(void)
  3037. {
  3038. MigrationState *s = migrate_get_current();
  3039. return s->enabled_capabilities[MIGRATION_CAPABILITY_X_COLO];
  3040. }
  3041. typedef enum MigThrError {
  3042. /* No error detected */
  3043. MIG_THR_ERR_NONE = 0,
  3044. /* Detected error, but resumed successfully */
  3045. MIG_THR_ERR_RECOVERED = 1,
  3046. /* Detected fatal error, need to exit */
  3047. MIG_THR_ERR_FATAL = 2,
  3048. } MigThrError;
  3049. static int postcopy_resume_handshake(MigrationState *s)
  3050. {
  3051. qemu_savevm_send_postcopy_resume(s->to_dst_file);
  3052. while (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) {
  3053. qemu_sem_wait(&s->rp_state.rp_sem);
  3054. }
  3055. if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
  3056. return 0;
  3057. }
  3058. return -1;
  3059. }
  3060. /* Return zero if success, or <0 for error */
  3061. static int postcopy_do_resume(MigrationState *s)
  3062. {
  3063. int ret;
  3064. /*
  3065. * Call all the resume_prepare() hooks, so that modules can be
  3066. * ready for the migration resume.
  3067. */
  3068. ret = qemu_savevm_state_resume_prepare(s);
  3069. if (ret) {
  3070. error_report("%s: resume_prepare() failure detected: %d",
  3071. __func__, ret);
  3072. return ret;
  3073. }
  3074. /*
  3075. * If preempt is enabled, re-establish the preempt channel. Note that
  3076. * we do it after resume prepare to make sure the main channel will be
  3077. * created before the preempt channel. E.g. with weak network, the
  3078. * dest QEMU may get messed up with the preempt and main channels on
  3079. * the order of connection setup. This guarantees the correct order.
  3080. */
  3081. ret = postcopy_preempt_establish_channel(s);
  3082. if (ret) {
  3083. error_report("%s: postcopy_preempt_establish_channel(): %d",
  3084. __func__, ret);
  3085. return ret;
  3086. }
  3087. /*
  3088. * Last handshake with destination on the resume (destination will
  3089. * switch to postcopy-active afterwards)
  3090. */
  3091. ret = postcopy_resume_handshake(s);
  3092. if (ret) {
  3093. error_report("%s: handshake failed: %d", __func__, ret);
  3094. return ret;
  3095. }
  3096. return 0;
  3097. }
  3098. /*
  3099. * We don't return until we are in a safe state to continue current
  3100. * postcopy migration. Returns MIG_THR_ERR_RECOVERED if recovered, or
  3101. * MIG_THR_ERR_FATAL if unrecovery failure happened.
  3102. */
  3103. static MigThrError postcopy_pause(MigrationState *s)
  3104. {
  3105. assert(s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
  3106. while (true) {
  3107. QEMUFile *file;
  3108. /*
  3109. * Current channel is possibly broken. Release it. Note that this is
  3110. * guaranteed even without lock because to_dst_file should only be
  3111. * modified by the migration thread. That also guarantees that the
  3112. * unregister of yank is safe too without the lock. It should be safe
  3113. * even to be within the qemu_file_lock, but we didn't do that to avoid
  3114. * taking more mutex (yank_lock) within qemu_file_lock. TL;DR: we make
  3115. * the qemu_file_lock critical section as small as possible.
  3116. */
  3117. assert(s->to_dst_file);
  3118. migration_ioc_unregister_yank_from_file(s->to_dst_file);
  3119. qemu_mutex_lock(&s->qemu_file_lock);
  3120. file = s->to_dst_file;
  3121. s->to_dst_file = NULL;
  3122. qemu_mutex_unlock(&s->qemu_file_lock);
  3123. qemu_file_shutdown(file);
  3124. qemu_fclose(file);
  3125. migrate_set_state(&s->state, s->state,
  3126. MIGRATION_STATUS_POSTCOPY_PAUSED);
  3127. error_report("Detected IO failure for postcopy. "
  3128. "Migration paused.");
  3129. /*
  3130. * We wait until things fixed up. Then someone will setup the
  3131. * status back for us.
  3132. */
  3133. while (s->state == MIGRATION_STATUS_POSTCOPY_PAUSED) {
  3134. qemu_sem_wait(&s->postcopy_pause_sem);
  3135. }
  3136. if (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) {
  3137. /* Woken up by a recover procedure. Give it a shot */
  3138. /*
  3139. * Firstly, let's wake up the return path now, with a new
  3140. * return path channel.
  3141. */
  3142. qemu_sem_post(&s->postcopy_pause_rp_sem);
  3143. /* Do the resume logic */
  3144. if (postcopy_do_resume(s) == 0) {
  3145. /* Let's continue! */
  3146. trace_postcopy_pause_continued();
  3147. return MIG_THR_ERR_RECOVERED;
  3148. } else {
  3149. /*
  3150. * Something wrong happened during the recovery, let's
  3151. * pause again. Pause is always better than throwing
  3152. * data away.
  3153. */
  3154. continue;
  3155. }
  3156. } else {
  3157. /* This is not right... Time to quit. */
  3158. return MIG_THR_ERR_FATAL;
  3159. }
  3160. }
  3161. }
  3162. static MigThrError migration_detect_error(MigrationState *s)
  3163. {
  3164. int ret;
  3165. int state = s->state;
  3166. Error *local_error = NULL;
  3167. if (state == MIGRATION_STATUS_CANCELLING ||
  3168. state == MIGRATION_STATUS_CANCELLED) {
  3169. /* End the migration, but don't set the state to failed */
  3170. return MIG_THR_ERR_FATAL;
  3171. }
  3172. /*
  3173. * Try to detect any file errors. Note that postcopy_qemufile_src will
  3174. * be NULL when postcopy preempt is not enabled.
  3175. */
  3176. ret = qemu_file_get_error_obj_any(s->to_dst_file,
  3177. s->postcopy_qemufile_src,
  3178. &local_error);
  3179. if (!ret) {
  3180. /* Everything is fine */
  3181. assert(!local_error);
  3182. return MIG_THR_ERR_NONE;
  3183. }
  3184. if (local_error) {
  3185. migrate_set_error(s, local_error);
  3186. error_free(local_error);
  3187. }
  3188. if (state == MIGRATION_STATUS_POSTCOPY_ACTIVE && ret) {
  3189. /*
  3190. * For postcopy, we allow the network to be down for a
  3191. * while. After that, it can be continued by a
  3192. * recovery phase.
  3193. */
  3194. return postcopy_pause(s);
  3195. } else {
  3196. /*
  3197. * For precopy (or postcopy with error outside IO), we fail
  3198. * with no time.
  3199. */
  3200. migrate_set_state(&s->state, state, MIGRATION_STATUS_FAILED);
  3201. trace_migration_thread_file_err();
  3202. /* Time to stop the migration, now. */
  3203. return MIG_THR_ERR_FATAL;
  3204. }
  3205. }
  3206. /* How many bytes have we transferred since the beginning of the migration */
  3207. static uint64_t migration_total_bytes(MigrationState *s)
  3208. {
  3209. return qemu_file_total_transferred(s->to_dst_file) +
  3210. ram_counters.multifd_bytes;
  3211. }
  3212. static void migration_calculate_complete(MigrationState *s)
  3213. {
  3214. uint64_t bytes = migration_total_bytes(s);
  3215. int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
  3216. int64_t transfer_time;
  3217. s->total_time = end_time - s->start_time;
  3218. if (!s->downtime) {
  3219. /*
  3220. * It's still not set, so we are precopy migration. For
  3221. * postcopy, downtime is calculated during postcopy_start().
  3222. */
  3223. s->downtime = end_time - s->downtime_start;
  3224. }
  3225. transfer_time = s->total_time - s->setup_time;
  3226. if (transfer_time) {
  3227. s->mbps = ((double) bytes * 8.0) / transfer_time / 1000;
  3228. }
  3229. }
  3230. static void update_iteration_initial_status(MigrationState *s)
  3231. {
  3232. /*
  3233. * Update these three fields at the same time to avoid mismatch info lead
  3234. * wrong speed calculation.
  3235. */
  3236. s->iteration_start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
  3237. s->iteration_initial_bytes = migration_total_bytes(s);
  3238. s->iteration_initial_pages = ram_get_total_transferred_pages();
  3239. }
  3240. static void migration_update_counters(MigrationState *s,
  3241. int64_t current_time)
  3242. {
  3243. uint64_t transferred, transferred_pages, time_spent;
  3244. uint64_t current_bytes; /* bytes transferred since the beginning */
  3245. double bandwidth;
  3246. if (current_time < s->iteration_start_time + BUFFER_DELAY) {
  3247. return;
  3248. }
  3249. current_bytes = migration_total_bytes(s);
  3250. transferred = current_bytes - s->iteration_initial_bytes;
  3251. time_spent = current_time - s->iteration_start_time;
  3252. bandwidth = (double)transferred / time_spent;
  3253. s->threshold_size = bandwidth * s->parameters.downtime_limit;
  3254. s->mbps = (((double) transferred * 8.0) /
  3255. ((double) time_spent / 1000.0)) / 1000.0 / 1000.0;
  3256. transferred_pages = ram_get_total_transferred_pages() -
  3257. s->iteration_initial_pages;
  3258. s->pages_per_second = (double) transferred_pages /
  3259. (((double) time_spent / 1000.0));
  3260. /*
  3261. * if we haven't sent anything, we don't want to
  3262. * recalculate. 10000 is a small enough number for our purposes
  3263. */
  3264. if (ram_counters.dirty_pages_rate && transferred > 10000) {
  3265. s->expected_downtime = ram_counters.remaining / bandwidth;
  3266. }
  3267. qemu_file_reset_rate_limit(s->to_dst_file);
  3268. update_iteration_initial_status(s);
  3269. trace_migrate_transferred(transferred, time_spent,
  3270. bandwidth, s->threshold_size);
  3271. }
  3272. /* Migration thread iteration status */
  3273. typedef enum {
  3274. MIG_ITERATE_RESUME, /* Resume current iteration */
  3275. MIG_ITERATE_SKIP, /* Skip current iteration */
  3276. MIG_ITERATE_BREAK, /* Break the loop */
  3277. } MigIterateState;
  3278. /*
  3279. * Return true if continue to the next iteration directly, false
  3280. * otherwise.
  3281. */
  3282. static MigIterateState migration_iteration_run(MigrationState *s)
  3283. {
  3284. uint64_t must_precopy, can_postcopy;
  3285. bool in_postcopy = s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE;
  3286. qemu_savevm_state_pending_estimate(&must_precopy, &can_postcopy);
  3287. uint64_t pending_size = must_precopy + can_postcopy;
  3288. trace_migrate_pending_estimate(pending_size, must_precopy, can_postcopy);
  3289. if (must_precopy <= s->threshold_size) {
  3290. qemu_savevm_state_pending_exact(&must_precopy, &can_postcopy);
  3291. pending_size = must_precopy + can_postcopy;
  3292. trace_migrate_pending_exact(pending_size, must_precopy, can_postcopy);
  3293. }
  3294. if (!pending_size || pending_size < s->threshold_size) {
  3295. trace_migration_thread_low_pending(pending_size);
  3296. migration_completion(s);
  3297. return MIG_ITERATE_BREAK;
  3298. }
  3299. /* Still a significant amount to transfer */
  3300. if (!in_postcopy && must_precopy <= s->threshold_size &&
  3301. qatomic_read(&s->start_postcopy)) {
  3302. if (postcopy_start(s)) {
  3303. error_report("%s: postcopy failed to start", __func__);
  3304. }
  3305. return MIG_ITERATE_SKIP;
  3306. }
  3307. /* Just another iteration step */
  3308. qemu_savevm_state_iterate(s->to_dst_file, in_postcopy);
  3309. return MIG_ITERATE_RESUME;
  3310. }
  3311. static void migration_iteration_finish(MigrationState *s)
  3312. {
  3313. /* If we enabled cpu throttling for auto-converge, turn it off. */
  3314. cpu_throttle_stop();
  3315. qemu_mutex_lock_iothread();
  3316. switch (s->state) {
  3317. case MIGRATION_STATUS_COMPLETED:
  3318. migration_calculate_complete(s);
  3319. runstate_set(RUN_STATE_POSTMIGRATE);
  3320. break;
  3321. case MIGRATION_STATUS_COLO:
  3322. if (!migrate_colo_enabled()) {
  3323. error_report("%s: critical error: calling COLO code without "
  3324. "COLO enabled", __func__);
  3325. }
  3326. migrate_start_colo_process(s);
  3327. s->vm_was_running = true;
  3328. /* Fallthrough */
  3329. case MIGRATION_STATUS_FAILED:
  3330. case MIGRATION_STATUS_CANCELLED:
  3331. case MIGRATION_STATUS_CANCELLING:
  3332. if (s->vm_was_running) {
  3333. if (!runstate_check(RUN_STATE_SHUTDOWN)) {
  3334. vm_start();
  3335. }
  3336. } else {
  3337. if (runstate_check(RUN_STATE_FINISH_MIGRATE)) {
  3338. runstate_set(RUN_STATE_POSTMIGRATE);
  3339. }
  3340. }
  3341. break;
  3342. default:
  3343. /* Should not reach here, but if so, forgive the VM. */
  3344. error_report("%s: Unknown ending state %d", __func__, s->state);
  3345. break;
  3346. }
  3347. migrate_fd_cleanup_schedule(s);
  3348. qemu_mutex_unlock_iothread();
  3349. }
  3350. static void bg_migration_iteration_finish(MigrationState *s)
  3351. {
  3352. qemu_mutex_lock_iothread();
  3353. switch (s->state) {
  3354. case MIGRATION_STATUS_COMPLETED:
  3355. migration_calculate_complete(s);
  3356. break;
  3357. case MIGRATION_STATUS_ACTIVE:
  3358. case MIGRATION_STATUS_FAILED:
  3359. case MIGRATION_STATUS_CANCELLED:
  3360. case MIGRATION_STATUS_CANCELLING:
  3361. break;
  3362. default:
  3363. /* Should not reach here, but if so, forgive the VM. */
  3364. error_report("%s: Unknown ending state %d", __func__, s->state);
  3365. break;
  3366. }
  3367. migrate_fd_cleanup_schedule(s);
  3368. qemu_mutex_unlock_iothread();
  3369. }
  3370. /*
  3371. * Return true if continue to the next iteration directly, false
  3372. * otherwise.
  3373. */
  3374. static MigIterateState bg_migration_iteration_run(MigrationState *s)
  3375. {
  3376. int res;
  3377. res = qemu_savevm_state_iterate(s->to_dst_file, false);
  3378. if (res > 0) {
  3379. bg_migration_completion(s);
  3380. return MIG_ITERATE_BREAK;
  3381. }
  3382. return MIG_ITERATE_RESUME;
  3383. }
  3384. void migration_make_urgent_request(void)
  3385. {
  3386. qemu_sem_post(&migrate_get_current()->rate_limit_sem);
  3387. }
  3388. void migration_consume_urgent_request(void)
  3389. {
  3390. qemu_sem_wait(&migrate_get_current()->rate_limit_sem);
  3391. }
  3392. /* Returns true if the rate limiting was broken by an urgent request */
  3393. bool migration_rate_limit(void)
  3394. {
  3395. int64_t now = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
  3396. MigrationState *s = migrate_get_current();
  3397. bool urgent = false;
  3398. migration_update_counters(s, now);
  3399. if (qemu_file_rate_limit(s->to_dst_file)) {
  3400. if (qemu_file_get_error(s->to_dst_file)) {
  3401. return false;
  3402. }
  3403. /*
  3404. * Wait for a delay to do rate limiting OR
  3405. * something urgent to post the semaphore.
  3406. */
  3407. int ms = s->iteration_start_time + BUFFER_DELAY - now;
  3408. trace_migration_rate_limit_pre(ms);
  3409. if (qemu_sem_timedwait(&s->rate_limit_sem, ms) == 0) {
  3410. /*
  3411. * We were woken by one or more urgent things but
  3412. * the timedwait will have consumed one of them.
  3413. * The service routine for the urgent wake will dec
  3414. * the semaphore itself for each item it consumes,
  3415. * so add this one we just eat back.
  3416. */
  3417. qemu_sem_post(&s->rate_limit_sem);
  3418. urgent = true;
  3419. }
  3420. trace_migration_rate_limit_post(urgent);
  3421. }
  3422. return urgent;
  3423. }
  3424. /*
  3425. * if failover devices are present, wait they are completely
  3426. * unplugged
  3427. */
  3428. static void qemu_savevm_wait_unplug(MigrationState *s, int old_state,
  3429. int new_state)
  3430. {
  3431. if (qemu_savevm_state_guest_unplug_pending()) {
  3432. migrate_set_state(&s->state, old_state, MIGRATION_STATUS_WAIT_UNPLUG);
  3433. while (s->state == MIGRATION_STATUS_WAIT_UNPLUG &&
  3434. qemu_savevm_state_guest_unplug_pending()) {
  3435. qemu_sem_timedwait(&s->wait_unplug_sem, 250);
  3436. }
  3437. if (s->state != MIGRATION_STATUS_WAIT_UNPLUG) {
  3438. int timeout = 120; /* 30 seconds */
  3439. /*
  3440. * migration has been canceled
  3441. * but as we have started an unplug we must wait the end
  3442. * to be able to plug back the card
  3443. */
  3444. while (timeout-- && qemu_savevm_state_guest_unplug_pending()) {
  3445. qemu_sem_timedwait(&s->wait_unplug_sem, 250);
  3446. }
  3447. if (qemu_savevm_state_guest_unplug_pending() &&
  3448. !qtest_enabled()) {
  3449. warn_report("migration: partially unplugged device on "
  3450. "failure");
  3451. }
  3452. }
  3453. migrate_set_state(&s->state, MIGRATION_STATUS_WAIT_UNPLUG, new_state);
  3454. } else {
  3455. migrate_set_state(&s->state, old_state, new_state);
  3456. }
  3457. }
  3458. /*
  3459. * Master migration thread on the source VM.
  3460. * It drives the migration and pumps the data down the outgoing channel.
  3461. */
  3462. static void *migration_thread(void *opaque)
  3463. {
  3464. MigrationState *s = opaque;
  3465. MigrationThread *thread = NULL;
  3466. int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
  3467. MigThrError thr_error;
  3468. bool urgent = false;
  3469. thread = MigrationThreadAdd("live_migration", qemu_get_thread_id());
  3470. rcu_register_thread();
  3471. object_ref(OBJECT(s));
  3472. update_iteration_initial_status(s);
  3473. qemu_savevm_state_header(s->to_dst_file);
  3474. /*
  3475. * If we opened the return path, we need to make sure dst has it
  3476. * opened as well.
  3477. */
  3478. if (s->rp_state.rp_thread_created) {
  3479. /* Now tell the dest that it should open its end so it can reply */
  3480. qemu_savevm_send_open_return_path(s->to_dst_file);
  3481. /* And do a ping that will make stuff easier to debug */
  3482. qemu_savevm_send_ping(s->to_dst_file, 1);
  3483. }
  3484. if (migrate_postcopy()) {
  3485. /*
  3486. * Tell the destination that we *might* want to do postcopy later;
  3487. * if the other end can't do postcopy it should fail now, nice and
  3488. * early.
  3489. */
  3490. qemu_savevm_send_postcopy_advise(s->to_dst_file);
  3491. }
  3492. if (migrate_colo_enabled()) {
  3493. /* Notify migration destination that we enable COLO */
  3494. qemu_savevm_send_colo_enable(s->to_dst_file);
  3495. }
  3496. qemu_savevm_state_setup(s->to_dst_file);
  3497. qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
  3498. MIGRATION_STATUS_ACTIVE);
  3499. s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
  3500. trace_migration_thread_setup_complete();
  3501. while (migration_is_active(s)) {
  3502. if (urgent || !qemu_file_rate_limit(s->to_dst_file)) {
  3503. MigIterateState iter_state = migration_iteration_run(s);
  3504. if (iter_state == MIG_ITERATE_SKIP) {
  3505. continue;
  3506. } else if (iter_state == MIG_ITERATE_BREAK) {
  3507. break;
  3508. }
  3509. }
  3510. /*
  3511. * Try to detect any kind of failures, and see whether we
  3512. * should stop the migration now.
  3513. */
  3514. thr_error = migration_detect_error(s);
  3515. if (thr_error == MIG_THR_ERR_FATAL) {
  3516. /* Stop migration */
  3517. break;
  3518. } else if (thr_error == MIG_THR_ERR_RECOVERED) {
  3519. /*
  3520. * Just recovered from a e.g. network failure, reset all
  3521. * the local variables. This is important to avoid
  3522. * breaking transferred_bytes and bandwidth calculation
  3523. */
  3524. update_iteration_initial_status(s);
  3525. }
  3526. urgent = migration_rate_limit();
  3527. }
  3528. trace_migration_thread_after_loop();
  3529. migration_iteration_finish(s);
  3530. object_unref(OBJECT(s));
  3531. rcu_unregister_thread();
  3532. MigrationThreadDel(thread);
  3533. return NULL;
  3534. }
  3535. static void bg_migration_vm_start_bh(void *opaque)
  3536. {
  3537. MigrationState *s = opaque;
  3538. qemu_bh_delete(s->vm_start_bh);
  3539. s->vm_start_bh = NULL;
  3540. vm_start();
  3541. s->downtime = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - s->downtime_start;
  3542. }
  3543. /**
  3544. * Background snapshot thread, based on live migration code.
  3545. * This is an alternative implementation of live migration mechanism
  3546. * introduced specifically to support background snapshots.
  3547. *
  3548. * It takes advantage of userfault_fd write protection mechanism introduced
  3549. * in v5.7 kernel. Compared to existing dirty page logging migration much
  3550. * lesser stream traffic is produced resulting in smaller snapshot images,
  3551. * simply cause of no page duplicates can get into the stream.
  3552. *
  3553. * Another key point is that generated vmstate stream reflects machine state
  3554. * 'frozen' at the beginning of snapshot creation compared to dirty page logging
  3555. * mechanism, which effectively results in that saved snapshot is the state of VM
  3556. * at the end of the process.
  3557. */
  3558. static void *bg_migration_thread(void *opaque)
  3559. {
  3560. MigrationState *s = opaque;
  3561. int64_t setup_start;
  3562. MigThrError thr_error;
  3563. QEMUFile *fb;
  3564. bool early_fail = true;
  3565. rcu_register_thread();
  3566. object_ref(OBJECT(s));
  3567. qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX);
  3568. setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
  3569. /*
  3570. * We want to save vmstate for the moment when migration has been
  3571. * initiated but also we want to save RAM content while VM is running.
  3572. * The RAM content should appear first in the vmstate. So, we first
  3573. * stash the non-RAM part of the vmstate to the temporary buffer,
  3574. * then write RAM part of the vmstate to the migration stream
  3575. * with vCPUs running and, finally, write stashed non-RAM part of
  3576. * the vmstate from the buffer to the migration stream.
  3577. */
  3578. s->bioc = qio_channel_buffer_new(512 * 1024);
  3579. qio_channel_set_name(QIO_CHANNEL(s->bioc), "vmstate-buffer");
  3580. fb = qemu_file_new_output(QIO_CHANNEL(s->bioc));
  3581. object_unref(OBJECT(s->bioc));
  3582. update_iteration_initial_status(s);
  3583. /*
  3584. * Prepare for tracking memory writes with UFFD-WP - populate
  3585. * RAM pages before protecting.
  3586. */
  3587. #ifdef __linux__
  3588. ram_write_tracking_prepare();
  3589. #endif
  3590. qemu_savevm_state_header(s->to_dst_file);
  3591. qemu_savevm_state_setup(s->to_dst_file);
  3592. qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
  3593. MIGRATION_STATUS_ACTIVE);
  3594. s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
  3595. trace_migration_thread_setup_complete();
  3596. s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
  3597. qemu_mutex_lock_iothread();
  3598. /*
  3599. * If VM is currently in suspended state, then, to make a valid runstate
  3600. * transition in vm_stop_force_state() we need to wakeup it up.
  3601. */
  3602. qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
  3603. s->vm_was_running = runstate_is_running();
  3604. if (global_state_store()) {
  3605. goto fail;
  3606. }
  3607. /* Forcibly stop VM before saving state of vCPUs and devices */
  3608. if (vm_stop_force_state(RUN_STATE_PAUSED)) {
  3609. goto fail;
  3610. }
  3611. /*
  3612. * Put vCPUs in sync with shadow context structures, then
  3613. * save their state to channel-buffer along with devices.
  3614. */
  3615. cpu_synchronize_all_states();
  3616. if (qemu_savevm_state_complete_precopy_non_iterable(fb, false, false)) {
  3617. goto fail;
  3618. }
  3619. /*
  3620. * Since we are going to get non-iterable state data directly
  3621. * from s->bioc->data, explicit flush is needed here.
  3622. */
  3623. qemu_fflush(fb);
  3624. /* Now initialize UFFD context and start tracking RAM writes */
  3625. if (ram_write_tracking_start()) {
  3626. goto fail;
  3627. }
  3628. early_fail = false;
  3629. /*
  3630. * Start VM from BH handler to avoid write-fault lock here.
  3631. * UFFD-WP protection for the whole RAM is already enabled so
  3632. * calling VM state change notifiers from vm_start() would initiate
  3633. * writes to virtio VQs memory which is in write-protected region.
  3634. */
  3635. s->vm_start_bh = qemu_bh_new(bg_migration_vm_start_bh, s);
  3636. qemu_bh_schedule(s->vm_start_bh);
  3637. qemu_mutex_unlock_iothread();
  3638. while (migration_is_active(s)) {
  3639. MigIterateState iter_state = bg_migration_iteration_run(s);
  3640. if (iter_state == MIG_ITERATE_SKIP) {
  3641. continue;
  3642. } else if (iter_state == MIG_ITERATE_BREAK) {
  3643. break;
  3644. }
  3645. /*
  3646. * Try to detect any kind of failures, and see whether we
  3647. * should stop the migration now.
  3648. */
  3649. thr_error = migration_detect_error(s);
  3650. if (thr_error == MIG_THR_ERR_FATAL) {
  3651. /* Stop migration */
  3652. break;
  3653. }
  3654. migration_update_counters(s, qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
  3655. }
  3656. trace_migration_thread_after_loop();
  3657. fail:
  3658. if (early_fail) {
  3659. migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
  3660. MIGRATION_STATUS_FAILED);
  3661. qemu_mutex_unlock_iothread();
  3662. }
  3663. bg_migration_iteration_finish(s);
  3664. qemu_fclose(fb);
  3665. object_unref(OBJECT(s));
  3666. rcu_unregister_thread();
  3667. return NULL;
  3668. }
  3669. void migrate_fd_connect(MigrationState *s, Error *error_in)
  3670. {
  3671. Error *local_err = NULL;
  3672. int64_t rate_limit;
  3673. bool resume = s->state == MIGRATION_STATUS_POSTCOPY_PAUSED;
  3674. /*
  3675. * If there's a previous error, free it and prepare for another one.
  3676. * Meanwhile if migration completes successfully, there won't have an error
  3677. * dumped when calling migrate_fd_cleanup().
  3678. */
  3679. migrate_error_free(s);
  3680. s->expected_downtime = s->parameters.downtime_limit;
  3681. if (resume) {
  3682. assert(s->cleanup_bh);
  3683. } else {
  3684. assert(!s->cleanup_bh);
  3685. s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup_bh, s);
  3686. }
  3687. if (error_in) {
  3688. migrate_fd_error(s, error_in);
  3689. if (resume) {
  3690. /*
  3691. * Don't do cleanup for resume if channel is invalid, but only dump
  3692. * the error. We wait for another channel connect from the user.
  3693. * The error_report still gives HMP user a hint on what failed.
  3694. * It's normally done in migrate_fd_cleanup(), but call it here
  3695. * explicitly.
  3696. */
  3697. error_report_err(error_copy(s->error));
  3698. } else {
  3699. migrate_fd_cleanup(s);
  3700. }
  3701. return;
  3702. }
  3703. if (resume) {
  3704. /* This is a resumed migration */
  3705. rate_limit = s->parameters.max_postcopy_bandwidth /
  3706. XFER_LIMIT_RATIO;
  3707. } else {
  3708. /* This is a fresh new migration */
  3709. rate_limit = s->parameters.max_bandwidth / XFER_LIMIT_RATIO;
  3710. /* Notify before starting migration thread */
  3711. notifier_list_notify(&migration_state_notifiers, s);
  3712. }
  3713. qemu_file_set_rate_limit(s->to_dst_file, rate_limit);
  3714. qemu_file_set_blocking(s->to_dst_file, true);
  3715. /*
  3716. * Open the return path. For postcopy, it is used exclusively. For
  3717. * precopy, only if user specified "return-path" capability would
  3718. * QEMU uses the return path.
  3719. */
  3720. if (migrate_postcopy_ram() || migrate_use_return_path()) {
  3721. if (open_return_path_on_source(s, !resume)) {
  3722. error_report("Unable to open return-path for postcopy");
  3723. migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED);
  3724. migrate_fd_cleanup(s);
  3725. return;
  3726. }
  3727. }
  3728. /*
  3729. * This needs to be done before resuming a postcopy. Note: for newer
  3730. * QEMUs we will delay the channel creation until postcopy_start(), to
  3731. * avoid disorder of channel creations.
  3732. */
  3733. if (migrate_postcopy_preempt() && s->preempt_pre_7_2) {
  3734. postcopy_preempt_setup(s);
  3735. }
  3736. if (resume) {
  3737. /* Wakeup the main migration thread to do the recovery */
  3738. migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_PAUSED,
  3739. MIGRATION_STATUS_POSTCOPY_RECOVER);
  3740. qemu_sem_post(&s->postcopy_pause_sem);
  3741. return;
  3742. }
  3743. if (multifd_save_setup(&local_err) != 0) {
  3744. error_report_err(local_err);
  3745. migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
  3746. MIGRATION_STATUS_FAILED);
  3747. migrate_fd_cleanup(s);
  3748. return;
  3749. }
  3750. if (migrate_background_snapshot()) {
  3751. qemu_thread_create(&s->thread, "bg_snapshot",
  3752. bg_migration_thread, s, QEMU_THREAD_JOINABLE);
  3753. } else {
  3754. qemu_thread_create(&s->thread, "live_migration",
  3755. migration_thread, s, QEMU_THREAD_JOINABLE);
  3756. }
  3757. s->migration_thread_running = true;
  3758. }
  3759. void migration_global_dump(Monitor *mon)
  3760. {
  3761. MigrationState *ms = migrate_get_current();
  3762. monitor_printf(mon, "globals:\n");
  3763. monitor_printf(mon, "store-global-state: %s\n",
  3764. ms->store_global_state ? "on" : "off");
  3765. monitor_printf(mon, "only-migratable: %s\n",
  3766. only_migratable ? "on" : "off");
  3767. monitor_printf(mon, "send-configuration: %s\n",
  3768. ms->send_configuration ? "on" : "off");
  3769. monitor_printf(mon, "send-section-footer: %s\n",
  3770. ms->send_section_footer ? "on" : "off");
  3771. monitor_printf(mon, "decompress-error-check: %s\n",
  3772. ms->decompress_error_check ? "on" : "off");
  3773. monitor_printf(mon, "clear-bitmap-shift: %u\n",
  3774. ms->clear_bitmap_shift);
  3775. }
  3776. #define DEFINE_PROP_MIG_CAP(name, x) \
  3777. DEFINE_PROP_BOOL(name, MigrationState, enabled_capabilities[x], false)
  3778. static Property migration_properties[] = {
  3779. DEFINE_PROP_BOOL("store-global-state", MigrationState,
  3780. store_global_state, true),
  3781. DEFINE_PROP_BOOL("send-configuration", MigrationState,
  3782. send_configuration, true),
  3783. DEFINE_PROP_BOOL("send-section-footer", MigrationState,
  3784. send_section_footer, true),
  3785. DEFINE_PROP_BOOL("decompress-error-check", MigrationState,
  3786. decompress_error_check, true),
  3787. DEFINE_PROP_UINT8("x-clear-bitmap-shift", MigrationState,
  3788. clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT),
  3789. DEFINE_PROP_BOOL("x-preempt-pre-7-2", MigrationState,
  3790. preempt_pre_7_2, false),
  3791. /* Migration parameters */
  3792. DEFINE_PROP_UINT8("x-compress-level", MigrationState,
  3793. parameters.compress_level,
  3794. DEFAULT_MIGRATE_COMPRESS_LEVEL),
  3795. DEFINE_PROP_UINT8("x-compress-threads", MigrationState,
  3796. parameters.compress_threads,
  3797. DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT),
  3798. DEFINE_PROP_BOOL("x-compress-wait-thread", MigrationState,
  3799. parameters.compress_wait_thread, true),
  3800. DEFINE_PROP_UINT8("x-decompress-threads", MigrationState,
  3801. parameters.decompress_threads,
  3802. DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT),
  3803. DEFINE_PROP_UINT8("x-throttle-trigger-threshold", MigrationState,
  3804. parameters.throttle_trigger_threshold,
  3805. DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD),
  3806. DEFINE_PROP_UINT8("x-cpu-throttle-initial", MigrationState,
  3807. parameters.cpu_throttle_initial,
  3808. DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL),
  3809. DEFINE_PROP_UINT8("x-cpu-throttle-increment", MigrationState,
  3810. parameters.cpu_throttle_increment,
  3811. DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT),
  3812. DEFINE_PROP_BOOL("x-cpu-throttle-tailslow", MigrationState,
  3813. parameters.cpu_throttle_tailslow, false),
  3814. DEFINE_PROP_SIZE("x-max-bandwidth", MigrationState,
  3815. parameters.max_bandwidth, MAX_THROTTLE),
  3816. DEFINE_PROP_UINT64("x-downtime-limit", MigrationState,
  3817. parameters.downtime_limit,
  3818. DEFAULT_MIGRATE_SET_DOWNTIME),
  3819. DEFINE_PROP_UINT32("x-checkpoint-delay", MigrationState,
  3820. parameters.x_checkpoint_delay,
  3821. DEFAULT_MIGRATE_X_CHECKPOINT_DELAY),
  3822. DEFINE_PROP_UINT8("multifd-channels", MigrationState,
  3823. parameters.multifd_channels,
  3824. DEFAULT_MIGRATE_MULTIFD_CHANNELS),
  3825. DEFINE_PROP_MULTIFD_COMPRESSION("multifd-compression", MigrationState,
  3826. parameters.multifd_compression,
  3827. DEFAULT_MIGRATE_MULTIFD_COMPRESSION),
  3828. DEFINE_PROP_UINT8("multifd-zlib-level", MigrationState,
  3829. parameters.multifd_zlib_level,
  3830. DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL),
  3831. DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState,
  3832. parameters.multifd_zstd_level,
  3833. DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL),
  3834. DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState,
  3835. parameters.xbzrle_cache_size,
  3836. DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE),
  3837. DEFINE_PROP_SIZE("max-postcopy-bandwidth", MigrationState,
  3838. parameters.max_postcopy_bandwidth,
  3839. DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH),
  3840. DEFINE_PROP_UINT8("max-cpu-throttle", MigrationState,
  3841. parameters.max_cpu_throttle,
  3842. DEFAULT_MIGRATE_MAX_CPU_THROTTLE),
  3843. DEFINE_PROP_SIZE("announce-initial", MigrationState,
  3844. parameters.announce_initial,
  3845. DEFAULT_MIGRATE_ANNOUNCE_INITIAL),
  3846. DEFINE_PROP_SIZE("announce-max", MigrationState,
  3847. parameters.announce_max,
  3848. DEFAULT_MIGRATE_ANNOUNCE_MAX),
  3849. DEFINE_PROP_SIZE("announce-rounds", MigrationState,
  3850. parameters.announce_rounds,
  3851. DEFAULT_MIGRATE_ANNOUNCE_ROUNDS),
  3852. DEFINE_PROP_SIZE("announce-step", MigrationState,
  3853. parameters.announce_step,
  3854. DEFAULT_MIGRATE_ANNOUNCE_STEP),
  3855. DEFINE_PROP_STRING("tls-creds", MigrationState, parameters.tls_creds),
  3856. DEFINE_PROP_STRING("tls-hostname", MigrationState, parameters.tls_hostname),
  3857. DEFINE_PROP_STRING("tls-authz", MigrationState, parameters.tls_authz),
  3858. /* Migration capabilities */
  3859. DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE),
  3860. DEFINE_PROP_MIG_CAP("x-rdma-pin-all", MIGRATION_CAPABILITY_RDMA_PIN_ALL),
  3861. DEFINE_PROP_MIG_CAP("x-auto-converge", MIGRATION_CAPABILITY_AUTO_CONVERGE),
  3862. DEFINE_PROP_MIG_CAP("x-zero-blocks", MIGRATION_CAPABILITY_ZERO_BLOCKS),
  3863. DEFINE_PROP_MIG_CAP("x-compress", MIGRATION_CAPABILITY_COMPRESS),
  3864. DEFINE_PROP_MIG_CAP("x-events", MIGRATION_CAPABILITY_EVENTS),
  3865. DEFINE_PROP_MIG_CAP("x-postcopy-ram", MIGRATION_CAPABILITY_POSTCOPY_RAM),
  3866. DEFINE_PROP_MIG_CAP("x-postcopy-preempt",
  3867. MIGRATION_CAPABILITY_POSTCOPY_PREEMPT),
  3868. DEFINE_PROP_MIG_CAP("x-colo", MIGRATION_CAPABILITY_X_COLO),
  3869. DEFINE_PROP_MIG_CAP("x-release-ram", MIGRATION_CAPABILITY_RELEASE_RAM),
  3870. DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK),
  3871. DEFINE_PROP_MIG_CAP("x-return-path", MIGRATION_CAPABILITY_RETURN_PATH),
  3872. DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD),
  3873. DEFINE_PROP_MIG_CAP("x-background-snapshot",
  3874. MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT),
  3875. #ifdef CONFIG_LINUX
  3876. DEFINE_PROP_MIG_CAP("x-zero-copy-send",
  3877. MIGRATION_CAPABILITY_ZERO_COPY_SEND),
  3878. #endif
  3879. DEFINE_PROP_END_OF_LIST(),
  3880. };
  3881. static void migration_class_init(ObjectClass *klass, void *data)
  3882. {
  3883. DeviceClass *dc = DEVICE_CLASS(klass);
  3884. dc->user_creatable = false;
  3885. device_class_set_props(dc, migration_properties);
  3886. }
  3887. static void migration_instance_finalize(Object *obj)
  3888. {
  3889. MigrationState *ms = MIGRATION_OBJ(obj);
  3890. qemu_mutex_destroy(&ms->error_mutex);
  3891. qemu_mutex_destroy(&ms->qemu_file_lock);
  3892. qemu_sem_destroy(&ms->wait_unplug_sem);
  3893. qemu_sem_destroy(&ms->rate_limit_sem);
  3894. qemu_sem_destroy(&ms->pause_sem);
  3895. qemu_sem_destroy(&ms->postcopy_pause_sem);
  3896. qemu_sem_destroy(&ms->postcopy_pause_rp_sem);
  3897. qemu_sem_destroy(&ms->rp_state.rp_sem);
  3898. qemu_sem_destroy(&ms->rp_state.rp_pong_acks);
  3899. qemu_sem_destroy(&ms->postcopy_qemufile_src_sem);
  3900. error_free(ms->error);
  3901. }
  3902. static void migration_instance_init(Object *obj)
  3903. {
  3904. MigrationState *ms = MIGRATION_OBJ(obj);
  3905. MigrationParameters *params = &ms->parameters;
  3906. ms->state = MIGRATION_STATUS_NONE;
  3907. ms->mbps = -1;
  3908. ms->pages_per_second = -1;
  3909. qemu_sem_init(&ms->pause_sem, 0);
  3910. qemu_mutex_init(&ms->error_mutex);
  3911. params->tls_hostname = g_strdup("");
  3912. params->tls_creds = g_strdup("");
  3913. /* Set has_* up only for parameter checks */
  3914. params->has_compress_level = true;
  3915. params->has_compress_threads = true;
  3916. params->has_compress_wait_thread = true;
  3917. params->has_decompress_threads = true;
  3918. params->has_throttle_trigger_threshold = true;
  3919. params->has_cpu_throttle_initial = true;
  3920. params->has_cpu_throttle_increment = true;
  3921. params->has_cpu_throttle_tailslow = true;
  3922. params->has_max_bandwidth = true;
  3923. params->has_downtime_limit = true;
  3924. params->has_x_checkpoint_delay = true;
  3925. params->has_block_incremental = true;
  3926. params->has_multifd_channels = true;
  3927. params->has_multifd_compression = true;
  3928. params->has_multifd_zlib_level = true;
  3929. params->has_multifd_zstd_level = true;
  3930. params->has_xbzrle_cache_size = true;
  3931. params->has_max_postcopy_bandwidth = true;
  3932. params->has_max_cpu_throttle = true;
  3933. params->has_announce_initial = true;
  3934. params->has_announce_max = true;
  3935. params->has_announce_rounds = true;
  3936. params->has_announce_step = true;
  3937. qemu_sem_init(&ms->postcopy_pause_sem, 0);
  3938. qemu_sem_init(&ms->postcopy_pause_rp_sem, 0);
  3939. qemu_sem_init(&ms->rp_state.rp_sem, 0);
  3940. qemu_sem_init(&ms->rp_state.rp_pong_acks, 0);
  3941. qemu_sem_init(&ms->rate_limit_sem, 0);
  3942. qemu_sem_init(&ms->wait_unplug_sem, 0);
  3943. qemu_sem_init(&ms->postcopy_qemufile_src_sem, 0);
  3944. qemu_mutex_init(&ms->qemu_file_lock);
  3945. }
  3946. /*
  3947. * Return true if check pass, false otherwise. Error will be put
  3948. * inside errp if provided.
  3949. */
  3950. static bool migration_object_check(MigrationState *ms, Error **errp)
  3951. {
  3952. MigrationCapabilityStatusList *head = NULL;
  3953. /* Assuming all off */
  3954. bool cap_list[MIGRATION_CAPABILITY__MAX] = { 0 }, ret;
  3955. int i;
  3956. if (!migrate_params_check(&ms->parameters, errp)) {
  3957. return false;
  3958. }
  3959. for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
  3960. if (ms->enabled_capabilities[i]) {
  3961. QAPI_LIST_PREPEND(head, migrate_cap_add(i, true));
  3962. }
  3963. }
  3964. ret = migrate_caps_check(cap_list, head, errp);
  3965. /* It works with head == NULL */
  3966. qapi_free_MigrationCapabilityStatusList(head);
  3967. return ret;
  3968. }
  3969. static const TypeInfo migration_type = {
  3970. .name = TYPE_MIGRATION,
  3971. /*
  3972. * NOTE: TYPE_MIGRATION is not really a device, as the object is
  3973. * not created using qdev_new(), it is not attached to the qdev
  3974. * device tree, and it is never realized.
  3975. *
  3976. * TODO: Make this TYPE_OBJECT once QOM provides something like
  3977. * TYPE_DEVICE's "-global" properties.
  3978. */
  3979. .parent = TYPE_DEVICE,
  3980. .class_init = migration_class_init,
  3981. .class_size = sizeof(MigrationClass),
  3982. .instance_size = sizeof(MigrationState),
  3983. .instance_init = migration_instance_init,
  3984. .instance_finalize = migration_instance_finalize,
  3985. };
  3986. static void register_migration_types(void)
  3987. {
  3988. type_register_static(&migration_type);
  3989. }
  3990. type_init(register_migration_types);