savevm.c 103 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544
  1. /*
  2. * QEMU System Emulator
  3. *
  4. * Copyright (c) 2003-2008 Fabrice Bellard
  5. * Copyright (c) 2009-2015 Red Hat Inc
  6. *
  7. * Authors:
  8. * Juan Quintela <quintela@redhat.com>
  9. *
  10. * Permission is hereby granted, free of charge, to any person obtaining a copy
  11. * of this software and associated documentation files (the "Software"), to deal
  12. * in the Software without restriction, including without limitation the rights
  13. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  14. * copies of the Software, and to permit persons to whom the Software is
  15. * furnished to do so, subject to the following conditions:
  16. *
  17. * The above copyright notice and this permission notice shall be included in
  18. * all copies or substantial portions of the Software.
  19. *
  20. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  21. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  22. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  23. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  24. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  25. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  26. * THE SOFTWARE.
  27. */
  28. #include "qemu/osdep.h"
  29. #include "hw/boards.h"
  30. #include "net/net.h"
  31. #include "migration.h"
  32. #include "migration/snapshot.h"
  33. #include "migration-stats.h"
  34. #include "migration/vmstate.h"
  35. #include "migration/misc.h"
  36. #include "migration/register.h"
  37. #include "migration/global_state.h"
  38. #include "migration/channel-block.h"
  39. #include "ram.h"
  40. #include "qemu-file.h"
  41. #include "savevm.h"
  42. #include "postcopy-ram.h"
  43. #include "qapi/error.h"
  44. #include "qapi/qapi-commands-migration.h"
  45. #include "qapi/clone-visitor.h"
  46. #include "qapi/qapi-builtin-visit.h"
  47. #include "qapi/qmp/qerror.h"
  48. #include "qemu/error-report.h"
  49. #include "sysemu/cpus.h"
  50. #include "exec/memory.h"
  51. #include "exec/target_page.h"
  52. #include "trace.h"
  53. #include "qemu/iov.h"
  54. #include "qemu/job.h"
  55. #include "qemu/main-loop.h"
  56. #include "block/snapshot.h"
  57. #include "qemu/cutils.h"
  58. #include "io/channel-buffer.h"
  59. #include "io/channel-file.h"
  60. #include "sysemu/replay.h"
  61. #include "sysemu/runstate.h"
  62. #include "sysemu/sysemu.h"
  63. #include "sysemu/xen.h"
  64. #include "migration/colo.h"
  65. #include "qemu/bitmap.h"
  66. #include "net/announce.h"
  67. #include "qemu/yank.h"
  68. #include "yank_functions.h"
  69. #include "sysemu/qtest.h"
  70. #include "options.h"
  71. const unsigned int postcopy_ram_discard_version;
  72. /* Subcommands for QEMU_VM_COMMAND */
  73. enum qemu_vm_cmd {
  74. MIG_CMD_INVALID = 0, /* Must be 0 */
  75. MIG_CMD_OPEN_RETURN_PATH, /* Tell the dest to open the Return path */
  76. MIG_CMD_PING, /* Request a PONG on the RP */
  77. MIG_CMD_POSTCOPY_ADVISE, /* Prior to any page transfers, just
  78. warn we might want to do PC */
  79. MIG_CMD_POSTCOPY_LISTEN, /* Start listening for incoming
  80. pages as it's running. */
  81. MIG_CMD_POSTCOPY_RUN, /* Start execution */
  82. MIG_CMD_POSTCOPY_RAM_DISCARD, /* A list of pages to discard that
  83. were previously sent during
  84. precopy but are dirty. */
  85. MIG_CMD_PACKAGED, /* Send a wrapped stream within this stream */
  86. MIG_CMD_ENABLE_COLO, /* Enable COLO */
  87. MIG_CMD_POSTCOPY_RESUME, /* resume postcopy on dest */
  88. MIG_CMD_RECV_BITMAP, /* Request for recved bitmap on dst */
  89. MIG_CMD_MAX
  90. };
  91. #define MAX_VM_CMD_PACKAGED_SIZE UINT32_MAX
  92. static struct mig_cmd_args {
  93. ssize_t len; /* -1 = variable */
  94. const char *name;
  95. } mig_cmd_args[] = {
  96. [MIG_CMD_INVALID] = { .len = -1, .name = "INVALID" },
  97. [MIG_CMD_OPEN_RETURN_PATH] = { .len = 0, .name = "OPEN_RETURN_PATH" },
  98. [MIG_CMD_PING] = { .len = sizeof(uint32_t), .name = "PING" },
  99. [MIG_CMD_POSTCOPY_ADVISE] = { .len = -1, .name = "POSTCOPY_ADVISE" },
  100. [MIG_CMD_POSTCOPY_LISTEN] = { .len = 0, .name = "POSTCOPY_LISTEN" },
  101. [MIG_CMD_POSTCOPY_RUN] = { .len = 0, .name = "POSTCOPY_RUN" },
  102. [MIG_CMD_POSTCOPY_RAM_DISCARD] = {
  103. .len = -1, .name = "POSTCOPY_RAM_DISCARD" },
  104. [MIG_CMD_POSTCOPY_RESUME] = { .len = 0, .name = "POSTCOPY_RESUME" },
  105. [MIG_CMD_PACKAGED] = { .len = 4, .name = "PACKAGED" },
  106. [MIG_CMD_RECV_BITMAP] = { .len = -1, .name = "RECV_BITMAP" },
  107. [MIG_CMD_MAX] = { .len = -1, .name = "MAX" },
  108. };
  109. /* Note for MIG_CMD_POSTCOPY_ADVISE:
  110. * The format of arguments is depending on postcopy mode:
  111. * - postcopy RAM only
  112. * uint64_t host page size
  113. * uint64_t target page size
  114. *
  115. * - postcopy RAM and postcopy dirty bitmaps
  116. * format is the same as for postcopy RAM only
  117. *
  118. * - postcopy dirty bitmaps only
  119. * Nothing. Command length field is 0.
  120. *
  121. * Be careful: adding a new postcopy entity with some other parameters should
  122. * not break format self-description ability. Good way is to introduce some
  123. * generic extendable format with an exception for two old entities.
  124. */
  125. /***********************************************************/
  126. /* savevm/loadvm support */
  127. static QEMUFile *qemu_fopen_bdrv(BlockDriverState *bs, int is_writable)
  128. {
  129. if (is_writable) {
  130. return qemu_file_new_output(QIO_CHANNEL(qio_channel_block_new(bs)));
  131. } else {
  132. return qemu_file_new_input(QIO_CHANNEL(qio_channel_block_new(bs)));
  133. }
  134. }
  135. /* QEMUFile timer support.
  136. * Not in qemu-file.c to not add qemu-timer.c as dependency to qemu-file.c
  137. */
  138. void timer_put(QEMUFile *f, QEMUTimer *ts)
  139. {
  140. uint64_t expire_time;
  141. expire_time = timer_expire_time_ns(ts);
  142. qemu_put_be64(f, expire_time);
  143. }
  144. void timer_get(QEMUFile *f, QEMUTimer *ts)
  145. {
  146. uint64_t expire_time;
  147. expire_time = qemu_get_be64(f);
  148. if (expire_time != -1) {
  149. timer_mod_ns(ts, expire_time);
  150. } else {
  151. timer_del(ts);
  152. }
  153. }
  154. /* VMState timer support.
  155. * Not in vmstate.c to not add qemu-timer.c as dependency to vmstate.c
  156. */
  157. static int get_timer(QEMUFile *f, void *pv, size_t size,
  158. const VMStateField *field)
  159. {
  160. QEMUTimer *v = pv;
  161. timer_get(f, v);
  162. return 0;
  163. }
  164. static int put_timer(QEMUFile *f, void *pv, size_t size,
  165. const VMStateField *field, JSONWriter *vmdesc)
  166. {
  167. QEMUTimer *v = pv;
  168. timer_put(f, v);
  169. return 0;
  170. }
  171. const VMStateInfo vmstate_info_timer = {
  172. .name = "timer",
  173. .get = get_timer,
  174. .put = put_timer,
  175. };
  176. typedef struct CompatEntry {
  177. char idstr[256];
  178. int instance_id;
  179. } CompatEntry;
  180. typedef struct SaveStateEntry {
  181. QTAILQ_ENTRY(SaveStateEntry) entry;
  182. char idstr[256];
  183. uint32_t instance_id;
  184. int alias_id;
  185. int version_id;
  186. /* version id read from the stream */
  187. int load_version_id;
  188. int section_id;
  189. /* section id read from the stream */
  190. int load_section_id;
  191. const SaveVMHandlers *ops;
  192. const VMStateDescription *vmsd;
  193. void *opaque;
  194. CompatEntry *compat;
  195. int is_ram;
  196. } SaveStateEntry;
  197. typedef struct SaveState {
  198. QTAILQ_HEAD(, SaveStateEntry) handlers;
  199. SaveStateEntry *handler_pri_head[MIG_PRI_MAX + 1];
  200. int global_section_id;
  201. uint32_t len;
  202. const char *name;
  203. uint32_t target_page_bits;
  204. uint32_t caps_count;
  205. MigrationCapability *capabilities;
  206. QemuUUID uuid;
  207. } SaveState;
  208. static SaveState savevm_state = {
  209. .handlers = QTAILQ_HEAD_INITIALIZER(savevm_state.handlers),
  210. .handler_pri_head = { [MIG_PRI_DEFAULT ... MIG_PRI_MAX] = NULL },
  211. .global_section_id = 0,
  212. };
  213. static SaveStateEntry *find_se(const char *idstr, uint32_t instance_id);
  214. static bool should_validate_capability(int capability)
  215. {
  216. assert(capability >= 0 && capability < MIGRATION_CAPABILITY__MAX);
  217. /* Validate only new capabilities to keep compatibility. */
  218. switch (capability) {
  219. case MIGRATION_CAPABILITY_X_IGNORE_SHARED:
  220. return true;
  221. default:
  222. return false;
  223. }
  224. }
  225. static uint32_t get_validatable_capabilities_count(void)
  226. {
  227. MigrationState *s = migrate_get_current();
  228. uint32_t result = 0;
  229. int i;
  230. for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
  231. if (should_validate_capability(i) && s->capabilities[i]) {
  232. result++;
  233. }
  234. }
  235. return result;
  236. }
  237. static int configuration_pre_save(void *opaque)
  238. {
  239. SaveState *state = opaque;
  240. const char *current_name = MACHINE_GET_CLASS(current_machine)->name;
  241. MigrationState *s = migrate_get_current();
  242. int i, j;
  243. state->len = strlen(current_name);
  244. state->name = current_name;
  245. state->target_page_bits = qemu_target_page_bits();
  246. state->caps_count = get_validatable_capabilities_count();
  247. state->capabilities = g_renew(MigrationCapability, state->capabilities,
  248. state->caps_count);
  249. for (i = j = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
  250. if (should_validate_capability(i) && s->capabilities[i]) {
  251. state->capabilities[j++] = i;
  252. }
  253. }
  254. state->uuid = qemu_uuid;
  255. return 0;
  256. }
  257. static int configuration_post_save(void *opaque)
  258. {
  259. SaveState *state = opaque;
  260. g_free(state->capabilities);
  261. state->capabilities = NULL;
  262. state->caps_count = 0;
  263. return 0;
  264. }
  265. static int configuration_pre_load(void *opaque)
  266. {
  267. SaveState *state = opaque;
  268. /* If there is no target-page-bits subsection it means the source
  269. * predates the variable-target-page-bits support and is using the
  270. * minimum possible value for this CPU.
  271. */
  272. state->target_page_bits = qemu_target_page_bits_min();
  273. return 0;
  274. }
  275. static bool configuration_validate_capabilities(SaveState *state)
  276. {
  277. bool ret = true;
  278. MigrationState *s = migrate_get_current();
  279. unsigned long *source_caps_bm;
  280. int i;
  281. source_caps_bm = bitmap_new(MIGRATION_CAPABILITY__MAX);
  282. for (i = 0; i < state->caps_count; i++) {
  283. MigrationCapability capability = state->capabilities[i];
  284. set_bit(capability, source_caps_bm);
  285. }
  286. for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
  287. bool source_state, target_state;
  288. if (!should_validate_capability(i)) {
  289. continue;
  290. }
  291. source_state = test_bit(i, source_caps_bm);
  292. target_state = s->capabilities[i];
  293. if (source_state != target_state) {
  294. error_report("Capability %s is %s, but received capability is %s",
  295. MigrationCapability_str(i),
  296. target_state ? "on" : "off",
  297. source_state ? "on" : "off");
  298. ret = false;
  299. /* Don't break here to report all failed capabilities */
  300. }
  301. }
  302. g_free(source_caps_bm);
  303. return ret;
  304. }
  305. static int configuration_post_load(void *opaque, int version_id)
  306. {
  307. SaveState *state = opaque;
  308. const char *current_name = MACHINE_GET_CLASS(current_machine)->name;
  309. int ret = 0;
  310. if (strncmp(state->name, current_name, state->len) != 0) {
  311. error_report("Machine type received is '%.*s' and local is '%s'",
  312. (int) state->len, state->name, current_name);
  313. ret = -EINVAL;
  314. goto out;
  315. }
  316. if (state->target_page_bits != qemu_target_page_bits()) {
  317. error_report("Received TARGET_PAGE_BITS is %d but local is %d",
  318. state->target_page_bits, qemu_target_page_bits());
  319. ret = -EINVAL;
  320. goto out;
  321. }
  322. if (!configuration_validate_capabilities(state)) {
  323. ret = -EINVAL;
  324. goto out;
  325. }
  326. out:
  327. g_free((void *)state->name);
  328. state->name = NULL;
  329. state->len = 0;
  330. g_free(state->capabilities);
  331. state->capabilities = NULL;
  332. state->caps_count = 0;
  333. return ret;
  334. }
  335. static int get_capability(QEMUFile *f, void *pv, size_t size,
  336. const VMStateField *field)
  337. {
  338. MigrationCapability *capability = pv;
  339. char capability_str[UINT8_MAX + 1];
  340. uint8_t len;
  341. int i;
  342. len = qemu_get_byte(f);
  343. qemu_get_buffer(f, (uint8_t *)capability_str, len);
  344. capability_str[len] = '\0';
  345. for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
  346. if (!strcmp(MigrationCapability_str(i), capability_str)) {
  347. *capability = i;
  348. return 0;
  349. }
  350. }
  351. error_report("Received unknown capability %s", capability_str);
  352. return -EINVAL;
  353. }
  354. static int put_capability(QEMUFile *f, void *pv, size_t size,
  355. const VMStateField *field, JSONWriter *vmdesc)
  356. {
  357. MigrationCapability *capability = pv;
  358. const char *capability_str = MigrationCapability_str(*capability);
  359. size_t len = strlen(capability_str);
  360. assert(len <= UINT8_MAX);
  361. qemu_put_byte(f, len);
  362. qemu_put_buffer(f, (uint8_t *)capability_str, len);
  363. return 0;
  364. }
  365. static const VMStateInfo vmstate_info_capability = {
  366. .name = "capability",
  367. .get = get_capability,
  368. .put = put_capability,
  369. };
  370. /* The target-page-bits subsection is present only if the
  371. * target page size is not the same as the default (ie the
  372. * minimum page size for a variable-page-size guest CPU).
  373. * If it is present then it contains the actual target page
  374. * bits for the machine, and migration will fail if the
  375. * two ends don't agree about it.
  376. */
  377. static bool vmstate_target_page_bits_needed(void *opaque)
  378. {
  379. return qemu_target_page_bits()
  380. > qemu_target_page_bits_min();
  381. }
  382. static const VMStateDescription vmstate_target_page_bits = {
  383. .name = "configuration/target-page-bits",
  384. .version_id = 1,
  385. .minimum_version_id = 1,
  386. .needed = vmstate_target_page_bits_needed,
  387. .fields = (VMStateField[]) {
  388. VMSTATE_UINT32(target_page_bits, SaveState),
  389. VMSTATE_END_OF_LIST()
  390. }
  391. };
  392. static bool vmstate_capabilites_needed(void *opaque)
  393. {
  394. return get_validatable_capabilities_count() > 0;
  395. }
  396. static const VMStateDescription vmstate_capabilites = {
  397. .name = "configuration/capabilities",
  398. .version_id = 1,
  399. .minimum_version_id = 1,
  400. .needed = vmstate_capabilites_needed,
  401. .fields = (VMStateField[]) {
  402. VMSTATE_UINT32_V(caps_count, SaveState, 1),
  403. VMSTATE_VARRAY_UINT32_ALLOC(capabilities, SaveState, caps_count, 1,
  404. vmstate_info_capability,
  405. MigrationCapability),
  406. VMSTATE_END_OF_LIST()
  407. }
  408. };
  409. static bool vmstate_uuid_needed(void *opaque)
  410. {
  411. return qemu_uuid_set && migrate_validate_uuid();
  412. }
  413. static int vmstate_uuid_post_load(void *opaque, int version_id)
  414. {
  415. SaveState *state = opaque;
  416. char uuid_src[UUID_STR_LEN];
  417. char uuid_dst[UUID_STR_LEN];
  418. if (!qemu_uuid_set) {
  419. /*
  420. * It's warning because user might not know UUID in some cases,
  421. * e.g. load an old snapshot
  422. */
  423. qemu_uuid_unparse(&state->uuid, uuid_src);
  424. warn_report("UUID is received %s, but local uuid isn't set",
  425. uuid_src);
  426. return 0;
  427. }
  428. if (!qemu_uuid_is_equal(&state->uuid, &qemu_uuid)) {
  429. qemu_uuid_unparse(&state->uuid, uuid_src);
  430. qemu_uuid_unparse(&qemu_uuid, uuid_dst);
  431. error_report("UUID received is %s and local is %s", uuid_src, uuid_dst);
  432. return -EINVAL;
  433. }
  434. return 0;
  435. }
  436. static const VMStateDescription vmstate_uuid = {
  437. .name = "configuration/uuid",
  438. .version_id = 1,
  439. .minimum_version_id = 1,
  440. .needed = vmstate_uuid_needed,
  441. .post_load = vmstate_uuid_post_load,
  442. .fields = (VMStateField[]) {
  443. VMSTATE_UINT8_ARRAY_V(uuid.data, SaveState, sizeof(QemuUUID), 1),
  444. VMSTATE_END_OF_LIST()
  445. }
  446. };
  447. static const VMStateDescription vmstate_configuration = {
  448. .name = "configuration",
  449. .version_id = 1,
  450. .pre_load = configuration_pre_load,
  451. .post_load = configuration_post_load,
  452. .pre_save = configuration_pre_save,
  453. .post_save = configuration_post_save,
  454. .fields = (VMStateField[]) {
  455. VMSTATE_UINT32(len, SaveState),
  456. VMSTATE_VBUFFER_ALLOC_UINT32(name, SaveState, 0, NULL, len),
  457. VMSTATE_END_OF_LIST()
  458. },
  459. .subsections = (const VMStateDescription *[]) {
  460. &vmstate_target_page_bits,
  461. &vmstate_capabilites,
  462. &vmstate_uuid,
  463. NULL
  464. }
  465. };
  466. static void dump_vmstate_vmsd(FILE *out_file,
  467. const VMStateDescription *vmsd, int indent,
  468. bool is_subsection);
  469. static void dump_vmstate_vmsf(FILE *out_file, const VMStateField *field,
  470. int indent)
  471. {
  472. fprintf(out_file, "%*s{\n", indent, "");
  473. indent += 2;
  474. fprintf(out_file, "%*s\"field\": \"%s\",\n", indent, "", field->name);
  475. fprintf(out_file, "%*s\"version_id\": %d,\n", indent, "",
  476. field->version_id);
  477. fprintf(out_file, "%*s\"field_exists\": %s,\n", indent, "",
  478. field->field_exists ? "true" : "false");
  479. if (field->flags & VMS_ARRAY) {
  480. fprintf(out_file, "%*s\"num\": %d,\n", indent, "", field->num);
  481. }
  482. fprintf(out_file, "%*s\"size\": %zu", indent, "", field->size);
  483. if (field->vmsd != NULL) {
  484. fprintf(out_file, ",\n");
  485. dump_vmstate_vmsd(out_file, field->vmsd, indent, false);
  486. }
  487. fprintf(out_file, "\n%*s}", indent - 2, "");
  488. }
  489. static void dump_vmstate_vmss(FILE *out_file,
  490. const VMStateDescription **subsection,
  491. int indent)
  492. {
  493. if (*subsection != NULL) {
  494. dump_vmstate_vmsd(out_file, *subsection, indent, true);
  495. }
  496. }
  497. static void dump_vmstate_vmsd(FILE *out_file,
  498. const VMStateDescription *vmsd, int indent,
  499. bool is_subsection)
  500. {
  501. if (is_subsection) {
  502. fprintf(out_file, "%*s{\n", indent, "");
  503. } else {
  504. fprintf(out_file, "%*s\"%s\": {\n", indent, "", "Description");
  505. }
  506. indent += 2;
  507. fprintf(out_file, "%*s\"name\": \"%s\",\n", indent, "", vmsd->name);
  508. fprintf(out_file, "%*s\"version_id\": %d,\n", indent, "",
  509. vmsd->version_id);
  510. fprintf(out_file, "%*s\"minimum_version_id\": %d", indent, "",
  511. vmsd->minimum_version_id);
  512. if (vmsd->fields != NULL) {
  513. const VMStateField *field = vmsd->fields;
  514. bool first;
  515. fprintf(out_file, ",\n%*s\"Fields\": [\n", indent, "");
  516. first = true;
  517. while (field->name != NULL) {
  518. if (field->flags & VMS_MUST_EXIST) {
  519. /* Ignore VMSTATE_VALIDATE bits; these don't get migrated */
  520. field++;
  521. continue;
  522. }
  523. if (!first) {
  524. fprintf(out_file, ",\n");
  525. }
  526. dump_vmstate_vmsf(out_file, field, indent + 2);
  527. field++;
  528. first = false;
  529. }
  530. assert(field->flags == VMS_END);
  531. fprintf(out_file, "\n%*s]", indent, "");
  532. }
  533. if (vmsd->subsections != NULL) {
  534. const VMStateDescription **subsection = vmsd->subsections;
  535. bool first;
  536. fprintf(out_file, ",\n%*s\"Subsections\": [\n", indent, "");
  537. first = true;
  538. while (*subsection != NULL) {
  539. if (!first) {
  540. fprintf(out_file, ",\n");
  541. }
  542. dump_vmstate_vmss(out_file, subsection, indent + 2);
  543. subsection++;
  544. first = false;
  545. }
  546. fprintf(out_file, "\n%*s]", indent, "");
  547. }
  548. fprintf(out_file, "\n%*s}", indent - 2, "");
  549. }
  550. static void dump_machine_type(FILE *out_file)
  551. {
  552. MachineClass *mc;
  553. mc = MACHINE_GET_CLASS(current_machine);
  554. fprintf(out_file, " \"vmschkmachine\": {\n");
  555. fprintf(out_file, " \"Name\": \"%s\"\n", mc->name);
  556. fprintf(out_file, " },\n");
  557. }
  558. void dump_vmstate_json_to_file(FILE *out_file)
  559. {
  560. GSList *list, *elt;
  561. bool first;
  562. fprintf(out_file, "{\n");
  563. dump_machine_type(out_file);
  564. first = true;
  565. list = object_class_get_list(TYPE_DEVICE, true);
  566. for (elt = list; elt; elt = elt->next) {
  567. DeviceClass *dc = OBJECT_CLASS_CHECK(DeviceClass, elt->data,
  568. TYPE_DEVICE);
  569. const char *name;
  570. int indent = 2;
  571. if (!dc->vmsd) {
  572. continue;
  573. }
  574. if (!first) {
  575. fprintf(out_file, ",\n");
  576. }
  577. name = object_class_get_name(OBJECT_CLASS(dc));
  578. fprintf(out_file, "%*s\"%s\": {\n", indent, "", name);
  579. indent += 2;
  580. fprintf(out_file, "%*s\"Name\": \"%s\",\n", indent, "", name);
  581. fprintf(out_file, "%*s\"version_id\": %d,\n", indent, "",
  582. dc->vmsd->version_id);
  583. fprintf(out_file, "%*s\"minimum_version_id\": %d,\n", indent, "",
  584. dc->vmsd->minimum_version_id);
  585. dump_vmstate_vmsd(out_file, dc->vmsd, indent, false);
  586. fprintf(out_file, "\n%*s}", indent - 2, "");
  587. first = false;
  588. }
  589. fprintf(out_file, "\n}\n");
  590. fclose(out_file);
  591. g_slist_free(list);
  592. }
  593. static uint32_t calculate_new_instance_id(const char *idstr)
  594. {
  595. SaveStateEntry *se;
  596. uint32_t instance_id = 0;
  597. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  598. if (strcmp(idstr, se->idstr) == 0
  599. && instance_id <= se->instance_id) {
  600. instance_id = se->instance_id + 1;
  601. }
  602. }
  603. /* Make sure we never loop over without being noticed */
  604. assert(instance_id != VMSTATE_INSTANCE_ID_ANY);
  605. return instance_id;
  606. }
  607. static int calculate_compat_instance_id(const char *idstr)
  608. {
  609. SaveStateEntry *se;
  610. int instance_id = 0;
  611. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  612. if (!se->compat) {
  613. continue;
  614. }
  615. if (strcmp(idstr, se->compat->idstr) == 0
  616. && instance_id <= se->compat->instance_id) {
  617. instance_id = se->compat->instance_id + 1;
  618. }
  619. }
  620. return instance_id;
  621. }
  622. static inline MigrationPriority save_state_priority(SaveStateEntry *se)
  623. {
  624. if (se->vmsd) {
  625. return se->vmsd->priority;
  626. }
  627. return MIG_PRI_DEFAULT;
  628. }
  629. static void savevm_state_handler_insert(SaveStateEntry *nse)
  630. {
  631. MigrationPriority priority = save_state_priority(nse);
  632. SaveStateEntry *se;
  633. int i;
  634. assert(priority <= MIG_PRI_MAX);
  635. /*
  636. * This should never happen otherwise migration will probably fail
  637. * silently somewhere because we can be wrongly applying one
  638. * object properties upon another one. Bail out ASAP.
  639. */
  640. if (find_se(nse->idstr, nse->instance_id)) {
  641. error_report("%s: Detected duplicate SaveStateEntry: "
  642. "id=%s, instance_id=0x%"PRIx32, __func__,
  643. nse->idstr, nse->instance_id);
  644. exit(EXIT_FAILURE);
  645. }
  646. for (i = priority - 1; i >= 0; i--) {
  647. se = savevm_state.handler_pri_head[i];
  648. if (se != NULL) {
  649. assert(save_state_priority(se) < priority);
  650. break;
  651. }
  652. }
  653. if (i >= 0) {
  654. QTAILQ_INSERT_BEFORE(se, nse, entry);
  655. } else {
  656. QTAILQ_INSERT_TAIL(&savevm_state.handlers, nse, entry);
  657. }
  658. if (savevm_state.handler_pri_head[priority] == NULL) {
  659. savevm_state.handler_pri_head[priority] = nse;
  660. }
  661. }
  662. static void savevm_state_handler_remove(SaveStateEntry *se)
  663. {
  664. SaveStateEntry *next;
  665. MigrationPriority priority = save_state_priority(se);
  666. if (se == savevm_state.handler_pri_head[priority]) {
  667. next = QTAILQ_NEXT(se, entry);
  668. if (next != NULL && save_state_priority(next) == priority) {
  669. savevm_state.handler_pri_head[priority] = next;
  670. } else {
  671. savevm_state.handler_pri_head[priority] = NULL;
  672. }
  673. }
  674. QTAILQ_REMOVE(&savevm_state.handlers, se, entry);
  675. }
  676. /* TODO: Individual devices generally have very little idea about the rest
  677. of the system, so instance_id should be removed/replaced.
  678. Meanwhile pass -1 as instance_id if you do not already have a clearly
  679. distinguishing id for all instances of your device class. */
  680. int register_savevm_live(const char *idstr,
  681. uint32_t instance_id,
  682. int version_id,
  683. const SaveVMHandlers *ops,
  684. void *opaque)
  685. {
  686. SaveStateEntry *se;
  687. se = g_new0(SaveStateEntry, 1);
  688. se->version_id = version_id;
  689. se->section_id = savevm_state.global_section_id++;
  690. se->ops = ops;
  691. se->opaque = opaque;
  692. se->vmsd = NULL;
  693. /* if this is a live_savem then set is_ram */
  694. if (ops->save_setup != NULL) {
  695. se->is_ram = 1;
  696. }
  697. pstrcat(se->idstr, sizeof(se->idstr), idstr);
  698. if (instance_id == VMSTATE_INSTANCE_ID_ANY) {
  699. se->instance_id = calculate_new_instance_id(se->idstr);
  700. } else {
  701. se->instance_id = instance_id;
  702. }
  703. assert(!se->compat || se->instance_id == 0);
  704. savevm_state_handler_insert(se);
  705. return 0;
  706. }
  707. void unregister_savevm(VMStateIf *obj, const char *idstr, void *opaque)
  708. {
  709. SaveStateEntry *se, *new_se;
  710. char id[256] = "";
  711. if (obj) {
  712. char *oid = vmstate_if_get_id(obj);
  713. if (oid) {
  714. pstrcpy(id, sizeof(id), oid);
  715. pstrcat(id, sizeof(id), "/");
  716. g_free(oid);
  717. }
  718. }
  719. pstrcat(id, sizeof(id), idstr);
  720. QTAILQ_FOREACH_SAFE(se, &savevm_state.handlers, entry, new_se) {
  721. if (strcmp(se->idstr, id) == 0 && se->opaque == opaque) {
  722. savevm_state_handler_remove(se);
  723. g_free(se->compat);
  724. g_free(se);
  725. }
  726. }
  727. }
  728. /*
  729. * Perform some basic checks on vmsd's at registration
  730. * time.
  731. */
  732. static void vmstate_check(const VMStateDescription *vmsd)
  733. {
  734. const VMStateField *field = vmsd->fields;
  735. const VMStateDescription **subsection = vmsd->subsections;
  736. if (field) {
  737. while (field->name) {
  738. if (field->flags & (VMS_STRUCT | VMS_VSTRUCT)) {
  739. /* Recurse to sub structures */
  740. vmstate_check(field->vmsd);
  741. }
  742. /* Carry on */
  743. field++;
  744. }
  745. /* Check for the end of field list canary */
  746. if (field->flags != VMS_END) {
  747. error_report("VMSTATE not ending with VMS_END: %s", vmsd->name);
  748. g_assert_not_reached();
  749. }
  750. }
  751. while (subsection && *subsection) {
  752. /*
  753. * The name of a subsection should start with the name of the
  754. * current object.
  755. */
  756. assert(!strncmp(vmsd->name, (*subsection)->name, strlen(vmsd->name)));
  757. vmstate_check(*subsection);
  758. subsection++;
  759. }
  760. }
  761. /*
  762. * See comment in hw/intc/xics.c:icp_realize()
  763. *
  764. * This function can be removed when
  765. * pre_2_10_vmstate_register_dummy_icp() is removed.
  766. */
  767. int vmstate_replace_hack_for_ppc(VMStateIf *obj, int instance_id,
  768. const VMStateDescription *vmsd,
  769. void *opaque)
  770. {
  771. SaveStateEntry *se = find_se(vmsd->name, instance_id);
  772. if (se) {
  773. savevm_state_handler_remove(se);
  774. }
  775. return vmstate_register(obj, instance_id, vmsd, opaque);
  776. }
  777. int vmstate_register_with_alias_id(VMStateIf *obj, uint32_t instance_id,
  778. const VMStateDescription *vmsd,
  779. void *opaque, int alias_id,
  780. int required_for_version,
  781. Error **errp)
  782. {
  783. SaveStateEntry *se;
  784. /* If this triggers, alias support can be dropped for the vmsd. */
  785. assert(alias_id == -1 || required_for_version >= vmsd->minimum_version_id);
  786. se = g_new0(SaveStateEntry, 1);
  787. se->version_id = vmsd->version_id;
  788. se->section_id = savevm_state.global_section_id++;
  789. se->opaque = opaque;
  790. se->vmsd = vmsd;
  791. se->alias_id = alias_id;
  792. if (obj) {
  793. char *id = vmstate_if_get_id(obj);
  794. if (id) {
  795. if (snprintf(se->idstr, sizeof(se->idstr), "%s/", id) >=
  796. sizeof(se->idstr)) {
  797. error_setg(errp, "Path too long for VMState (%s)", id);
  798. g_free(id);
  799. g_free(se);
  800. return -1;
  801. }
  802. g_free(id);
  803. se->compat = g_new0(CompatEntry, 1);
  804. pstrcpy(se->compat->idstr, sizeof(se->compat->idstr), vmsd->name);
  805. se->compat->instance_id = instance_id == VMSTATE_INSTANCE_ID_ANY ?
  806. calculate_compat_instance_id(vmsd->name) : instance_id;
  807. instance_id = VMSTATE_INSTANCE_ID_ANY;
  808. }
  809. }
  810. pstrcat(se->idstr, sizeof(se->idstr), vmsd->name);
  811. if (instance_id == VMSTATE_INSTANCE_ID_ANY) {
  812. se->instance_id = calculate_new_instance_id(se->idstr);
  813. } else {
  814. se->instance_id = instance_id;
  815. }
  816. /* Perform a recursive sanity check during the test runs */
  817. if (qtest_enabled()) {
  818. vmstate_check(vmsd);
  819. }
  820. assert(!se->compat || se->instance_id == 0);
  821. savevm_state_handler_insert(se);
  822. return 0;
  823. }
  824. void vmstate_unregister(VMStateIf *obj, const VMStateDescription *vmsd,
  825. void *opaque)
  826. {
  827. SaveStateEntry *se, *new_se;
  828. QTAILQ_FOREACH_SAFE(se, &savevm_state.handlers, entry, new_se) {
  829. if (se->vmsd == vmsd && se->opaque == opaque) {
  830. savevm_state_handler_remove(se);
  831. g_free(se->compat);
  832. g_free(se);
  833. }
  834. }
  835. }
  836. static int vmstate_load(QEMUFile *f, SaveStateEntry *se)
  837. {
  838. trace_vmstate_load(se->idstr, se->vmsd ? se->vmsd->name : "(old)");
  839. if (!se->vmsd) { /* Old style */
  840. return se->ops->load_state(f, se->opaque, se->load_version_id);
  841. }
  842. return vmstate_load_state(f, se->vmsd, se->opaque, se->load_version_id);
  843. }
  844. static void vmstate_save_old_style(QEMUFile *f, SaveStateEntry *se,
  845. JSONWriter *vmdesc)
  846. {
  847. uint64_t old_offset = qemu_file_transferred(f);
  848. se->ops->save_state(f, se->opaque);
  849. uint64_t size = qemu_file_transferred(f) - old_offset;
  850. if (vmdesc) {
  851. json_writer_int64(vmdesc, "size", size);
  852. json_writer_start_array(vmdesc, "fields");
  853. json_writer_start_object(vmdesc, NULL);
  854. json_writer_str(vmdesc, "name", "data");
  855. json_writer_int64(vmdesc, "size", size);
  856. json_writer_str(vmdesc, "type", "buffer");
  857. json_writer_end_object(vmdesc);
  858. json_writer_end_array(vmdesc);
  859. }
  860. }
  861. /*
  862. * Write the header for device section (QEMU_VM_SECTION START/END/PART/FULL)
  863. */
  864. static void save_section_header(QEMUFile *f, SaveStateEntry *se,
  865. uint8_t section_type)
  866. {
  867. qemu_put_byte(f, section_type);
  868. qemu_put_be32(f, se->section_id);
  869. if (section_type == QEMU_VM_SECTION_FULL ||
  870. section_type == QEMU_VM_SECTION_START) {
  871. /* ID string */
  872. size_t len = strlen(se->idstr);
  873. qemu_put_byte(f, len);
  874. qemu_put_buffer(f, (uint8_t *)se->idstr, len);
  875. qemu_put_be32(f, se->instance_id);
  876. qemu_put_be32(f, se->version_id);
  877. }
  878. }
  879. /*
  880. * Write a footer onto device sections that catches cases misformatted device
  881. * sections.
  882. */
  883. static void save_section_footer(QEMUFile *f, SaveStateEntry *se)
  884. {
  885. if (migrate_get_current()->send_section_footer) {
  886. qemu_put_byte(f, QEMU_VM_SECTION_FOOTER);
  887. qemu_put_be32(f, se->section_id);
  888. }
  889. }
  890. static int vmstate_save(QEMUFile *f, SaveStateEntry *se, JSONWriter *vmdesc)
  891. {
  892. int ret;
  893. Error *local_err = NULL;
  894. MigrationState *s = migrate_get_current();
  895. if ((!se->ops || !se->ops->save_state) && !se->vmsd) {
  896. return 0;
  897. }
  898. if (se->vmsd && !vmstate_section_needed(se->vmsd, se->opaque)) {
  899. trace_savevm_section_skip(se->idstr, se->section_id);
  900. return 0;
  901. }
  902. trace_savevm_section_start(se->idstr, se->section_id);
  903. save_section_header(f, se, QEMU_VM_SECTION_FULL);
  904. if (vmdesc) {
  905. json_writer_start_object(vmdesc, NULL);
  906. json_writer_str(vmdesc, "name", se->idstr);
  907. json_writer_int64(vmdesc, "instance_id", se->instance_id);
  908. }
  909. trace_vmstate_save(se->idstr, se->vmsd ? se->vmsd->name : "(old)");
  910. if (!se->vmsd) {
  911. vmstate_save_old_style(f, se, vmdesc);
  912. } else {
  913. ret = vmstate_save_state_with_err(f, se->vmsd, se->opaque, vmdesc, &local_err);
  914. if (ret) {
  915. migrate_set_error(s, local_err);
  916. error_report_err(local_err);
  917. return ret;
  918. }
  919. }
  920. trace_savevm_section_end(se->idstr, se->section_id, 0);
  921. save_section_footer(f, se);
  922. if (vmdesc) {
  923. json_writer_end_object(vmdesc);
  924. }
  925. return 0;
  926. }
  927. /**
  928. * qemu_savevm_command_send: Send a 'QEMU_VM_COMMAND' type element with the
  929. * command and associated data.
  930. *
  931. * @f: File to send command on
  932. * @command: Command type to send
  933. * @len: Length of associated data
  934. * @data: Data associated with command.
  935. */
  936. static void qemu_savevm_command_send(QEMUFile *f,
  937. enum qemu_vm_cmd command,
  938. uint16_t len,
  939. uint8_t *data)
  940. {
  941. trace_savevm_command_send(command, len);
  942. qemu_put_byte(f, QEMU_VM_COMMAND);
  943. qemu_put_be16(f, (uint16_t)command);
  944. qemu_put_be16(f, len);
  945. qemu_put_buffer(f, data, len);
  946. qemu_fflush(f);
  947. }
  948. void qemu_savevm_send_colo_enable(QEMUFile *f)
  949. {
  950. trace_savevm_send_colo_enable();
  951. qemu_savevm_command_send(f, MIG_CMD_ENABLE_COLO, 0, NULL);
  952. }
  953. void qemu_savevm_send_ping(QEMUFile *f, uint32_t value)
  954. {
  955. uint32_t buf;
  956. trace_savevm_send_ping(value);
  957. buf = cpu_to_be32(value);
  958. qemu_savevm_command_send(f, MIG_CMD_PING, sizeof(value), (uint8_t *)&buf);
  959. }
  960. void qemu_savevm_send_open_return_path(QEMUFile *f)
  961. {
  962. trace_savevm_send_open_return_path();
  963. qemu_savevm_command_send(f, MIG_CMD_OPEN_RETURN_PATH, 0, NULL);
  964. }
  965. /* We have a buffer of data to send; we don't want that all to be loaded
  966. * by the command itself, so the command contains just the length of the
  967. * extra buffer that we then send straight after it.
  968. * TODO: Must be a better way to organise that
  969. *
  970. * Returns:
  971. * 0 on success
  972. * -ve on error
  973. */
  974. int qemu_savevm_send_packaged(QEMUFile *f, const uint8_t *buf, size_t len)
  975. {
  976. uint32_t tmp;
  977. MigrationState *ms = migrate_get_current();
  978. Error *local_err = NULL;
  979. if (len > MAX_VM_CMD_PACKAGED_SIZE) {
  980. error_setg(&local_err, "%s: Unreasonably large packaged state: %zu",
  981. __func__, len);
  982. migrate_set_error(ms, local_err);
  983. error_report_err(local_err);
  984. return -1;
  985. }
  986. tmp = cpu_to_be32(len);
  987. trace_qemu_savevm_send_packaged();
  988. qemu_savevm_command_send(f, MIG_CMD_PACKAGED, 4, (uint8_t *)&tmp);
  989. qemu_put_buffer(f, buf, len);
  990. return 0;
  991. }
  992. /* Send prior to any postcopy transfer */
  993. void qemu_savevm_send_postcopy_advise(QEMUFile *f)
  994. {
  995. if (migrate_postcopy_ram()) {
  996. uint64_t tmp[2];
  997. tmp[0] = cpu_to_be64(ram_pagesize_summary());
  998. tmp[1] = cpu_to_be64(qemu_target_page_size());
  999. trace_qemu_savevm_send_postcopy_advise();
  1000. qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_ADVISE,
  1001. 16, (uint8_t *)tmp);
  1002. } else {
  1003. qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_ADVISE, 0, NULL);
  1004. }
  1005. }
  1006. /* Sent prior to starting the destination running in postcopy, discard pages
  1007. * that have already been sent but redirtied on the source.
  1008. * CMD_POSTCOPY_RAM_DISCARD consist of:
  1009. * byte version (0)
  1010. * byte Length of name field (not including 0)
  1011. * n x byte RAM block name
  1012. * byte 0 terminator (just for safety)
  1013. * n x Byte ranges within the named RAMBlock
  1014. * be64 Start of the range
  1015. * be64 Length
  1016. *
  1017. * name: RAMBlock name that these entries are part of
  1018. * len: Number of page entries
  1019. * start_list: 'len' addresses
  1020. * length_list: 'len' addresses
  1021. *
  1022. */
  1023. void qemu_savevm_send_postcopy_ram_discard(QEMUFile *f, const char *name,
  1024. uint16_t len,
  1025. uint64_t *start_list,
  1026. uint64_t *length_list)
  1027. {
  1028. uint8_t *buf;
  1029. uint16_t tmplen;
  1030. uint16_t t;
  1031. size_t name_len = strlen(name);
  1032. trace_qemu_savevm_send_postcopy_ram_discard(name, len);
  1033. assert(name_len < 256);
  1034. buf = g_malloc0(1 + 1 + name_len + 1 + (8 + 8) * len);
  1035. buf[0] = postcopy_ram_discard_version;
  1036. buf[1] = name_len;
  1037. memcpy(buf + 2, name, name_len);
  1038. tmplen = 2 + name_len;
  1039. buf[tmplen++] = '\0';
  1040. for (t = 0; t < len; t++) {
  1041. stq_be_p(buf + tmplen, start_list[t]);
  1042. tmplen += 8;
  1043. stq_be_p(buf + tmplen, length_list[t]);
  1044. tmplen += 8;
  1045. }
  1046. qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RAM_DISCARD, tmplen, buf);
  1047. g_free(buf);
  1048. }
  1049. /* Get the destination into a state where it can receive postcopy data. */
  1050. void qemu_savevm_send_postcopy_listen(QEMUFile *f)
  1051. {
  1052. trace_savevm_send_postcopy_listen();
  1053. qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_LISTEN, 0, NULL);
  1054. }
  1055. /* Kick the destination into running */
  1056. void qemu_savevm_send_postcopy_run(QEMUFile *f)
  1057. {
  1058. trace_savevm_send_postcopy_run();
  1059. qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RUN, 0, NULL);
  1060. }
  1061. void qemu_savevm_send_postcopy_resume(QEMUFile *f)
  1062. {
  1063. trace_savevm_send_postcopy_resume();
  1064. qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RESUME, 0, NULL);
  1065. }
  1066. void qemu_savevm_send_recv_bitmap(QEMUFile *f, char *block_name)
  1067. {
  1068. size_t len;
  1069. char buf[256];
  1070. trace_savevm_send_recv_bitmap(block_name);
  1071. buf[0] = len = strlen(block_name);
  1072. memcpy(buf + 1, block_name, len);
  1073. qemu_savevm_command_send(f, MIG_CMD_RECV_BITMAP, len + 1, (uint8_t *)buf);
  1074. }
  1075. bool qemu_savevm_state_blocked(Error **errp)
  1076. {
  1077. SaveStateEntry *se;
  1078. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  1079. if (se->vmsd && se->vmsd->unmigratable) {
  1080. error_setg(errp, "State blocked by non-migratable device '%s'",
  1081. se->idstr);
  1082. return true;
  1083. }
  1084. }
  1085. return false;
  1086. }
  1087. void qemu_savevm_non_migratable_list(strList **reasons)
  1088. {
  1089. SaveStateEntry *se;
  1090. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  1091. if (se->vmsd && se->vmsd->unmigratable) {
  1092. QAPI_LIST_PREPEND(*reasons,
  1093. g_strdup_printf("non-migratable device: %s",
  1094. se->idstr));
  1095. }
  1096. }
  1097. }
  1098. void qemu_savevm_state_header(QEMUFile *f)
  1099. {
  1100. MigrationState *s = migrate_get_current();
  1101. s->vmdesc = json_writer_new(false);
  1102. trace_savevm_state_header();
  1103. qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
  1104. qemu_put_be32(f, QEMU_VM_FILE_VERSION);
  1105. if (s->send_configuration) {
  1106. qemu_put_byte(f, QEMU_VM_CONFIGURATION);
  1107. /*
  1108. * This starts the main json object and is paired with the
  1109. * json_writer_end_object in
  1110. * qemu_savevm_state_complete_precopy_non_iterable
  1111. */
  1112. json_writer_start_object(s->vmdesc, NULL);
  1113. json_writer_start_object(s->vmdesc, "configuration");
  1114. vmstate_save_state(f, &vmstate_configuration, &savevm_state, s->vmdesc);
  1115. json_writer_end_object(s->vmdesc);
  1116. }
  1117. }
  1118. bool qemu_savevm_state_guest_unplug_pending(void)
  1119. {
  1120. SaveStateEntry *se;
  1121. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  1122. if (se->vmsd && se->vmsd->dev_unplug_pending &&
  1123. se->vmsd->dev_unplug_pending(se->opaque)) {
  1124. return true;
  1125. }
  1126. }
  1127. return false;
  1128. }
  1129. int qemu_savevm_state_prepare(Error **errp)
  1130. {
  1131. SaveStateEntry *se;
  1132. int ret;
  1133. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  1134. if (!se->ops || !se->ops->save_prepare) {
  1135. continue;
  1136. }
  1137. if (se->ops->is_active) {
  1138. if (!se->ops->is_active(se->opaque)) {
  1139. continue;
  1140. }
  1141. }
  1142. ret = se->ops->save_prepare(se->opaque, errp);
  1143. if (ret < 0) {
  1144. return ret;
  1145. }
  1146. }
  1147. return 0;
  1148. }
  1149. void qemu_savevm_state_setup(QEMUFile *f)
  1150. {
  1151. MigrationState *ms = migrate_get_current();
  1152. SaveStateEntry *se;
  1153. Error *local_err = NULL;
  1154. int ret;
  1155. json_writer_int64(ms->vmdesc, "page_size", qemu_target_page_size());
  1156. json_writer_start_array(ms->vmdesc, "devices");
  1157. trace_savevm_state_setup();
  1158. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  1159. if (se->vmsd && se->vmsd->early_setup) {
  1160. ret = vmstate_save(f, se, ms->vmdesc);
  1161. if (ret) {
  1162. qemu_file_set_error(f, ret);
  1163. break;
  1164. }
  1165. continue;
  1166. }
  1167. if (!se->ops || !se->ops->save_setup) {
  1168. continue;
  1169. }
  1170. if (se->ops->is_active) {
  1171. if (!se->ops->is_active(se->opaque)) {
  1172. continue;
  1173. }
  1174. }
  1175. save_section_header(f, se, QEMU_VM_SECTION_START);
  1176. ret = se->ops->save_setup(f, se->opaque);
  1177. save_section_footer(f, se);
  1178. if (ret < 0) {
  1179. qemu_file_set_error(f, ret);
  1180. break;
  1181. }
  1182. }
  1183. if (precopy_notify(PRECOPY_NOTIFY_SETUP, &local_err)) {
  1184. error_report_err(local_err);
  1185. }
  1186. }
  1187. int qemu_savevm_state_resume_prepare(MigrationState *s)
  1188. {
  1189. SaveStateEntry *se;
  1190. int ret;
  1191. trace_savevm_state_resume_prepare();
  1192. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  1193. if (!se->ops || !se->ops->resume_prepare) {
  1194. continue;
  1195. }
  1196. if (se->ops->is_active) {
  1197. if (!se->ops->is_active(se->opaque)) {
  1198. continue;
  1199. }
  1200. }
  1201. ret = se->ops->resume_prepare(s, se->opaque);
  1202. if (ret < 0) {
  1203. return ret;
  1204. }
  1205. }
  1206. return 0;
  1207. }
  1208. /*
  1209. * this function has three return values:
  1210. * negative: there was one error, and we have -errno.
  1211. * 0 : We haven't finished, caller have to go again
  1212. * 1 : We have finished, we can go to complete phase
  1213. */
  1214. int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy)
  1215. {
  1216. SaveStateEntry *se;
  1217. int ret = 1;
  1218. trace_savevm_state_iterate();
  1219. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  1220. if (!se->ops || !se->ops->save_live_iterate) {
  1221. continue;
  1222. }
  1223. if (se->ops->is_active &&
  1224. !se->ops->is_active(se->opaque)) {
  1225. continue;
  1226. }
  1227. if (se->ops->is_active_iterate &&
  1228. !se->ops->is_active_iterate(se->opaque)) {
  1229. continue;
  1230. }
  1231. /*
  1232. * In the postcopy phase, any device that doesn't know how to
  1233. * do postcopy should have saved it's state in the _complete
  1234. * call that's already run, it might get confused if we call
  1235. * iterate afterwards.
  1236. */
  1237. if (postcopy &&
  1238. !(se->ops->has_postcopy && se->ops->has_postcopy(se->opaque))) {
  1239. continue;
  1240. }
  1241. if (migration_rate_exceeded(f)) {
  1242. return 0;
  1243. }
  1244. trace_savevm_section_start(se->idstr, se->section_id);
  1245. save_section_header(f, se, QEMU_VM_SECTION_PART);
  1246. ret = se->ops->save_live_iterate(f, se->opaque);
  1247. trace_savevm_section_end(se->idstr, se->section_id, ret);
  1248. save_section_footer(f, se);
  1249. if (ret < 0) {
  1250. error_report("failed to save SaveStateEntry with id(name): "
  1251. "%d(%s): %d",
  1252. se->section_id, se->idstr, ret);
  1253. qemu_file_set_error(f, ret);
  1254. }
  1255. if (ret <= 0) {
  1256. /* Do not proceed to the next vmstate before this one reported
  1257. completion of the current stage. This serializes the migration
  1258. and reduces the probability that a faster changing state is
  1259. synchronized over and over again. */
  1260. break;
  1261. }
  1262. }
  1263. return ret;
  1264. }
  1265. static bool should_send_vmdesc(void)
  1266. {
  1267. MachineState *machine = MACHINE(qdev_get_machine());
  1268. bool in_postcopy = migration_in_postcopy();
  1269. return !machine->suppress_vmdesc && !in_postcopy;
  1270. }
  1271. /*
  1272. * Calls the save_live_complete_postcopy methods
  1273. * causing the last few pages to be sent immediately and doing any associated
  1274. * cleanup.
  1275. * Note postcopy also calls qemu_savevm_state_complete_precopy to complete
  1276. * all the other devices, but that happens at the point we switch to postcopy.
  1277. */
  1278. void qemu_savevm_state_complete_postcopy(QEMUFile *f)
  1279. {
  1280. SaveStateEntry *se;
  1281. int ret;
  1282. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  1283. if (!se->ops || !se->ops->save_live_complete_postcopy) {
  1284. continue;
  1285. }
  1286. if (se->ops->is_active) {
  1287. if (!se->ops->is_active(se->opaque)) {
  1288. continue;
  1289. }
  1290. }
  1291. trace_savevm_section_start(se->idstr, se->section_id);
  1292. /* Section type */
  1293. qemu_put_byte(f, QEMU_VM_SECTION_END);
  1294. qemu_put_be32(f, se->section_id);
  1295. ret = se->ops->save_live_complete_postcopy(f, se->opaque);
  1296. trace_savevm_section_end(se->idstr, se->section_id, ret);
  1297. save_section_footer(f, se);
  1298. if (ret < 0) {
  1299. qemu_file_set_error(f, ret);
  1300. return;
  1301. }
  1302. }
  1303. qemu_put_byte(f, QEMU_VM_EOF);
  1304. qemu_fflush(f);
  1305. }
  1306. static
  1307. int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy)
  1308. {
  1309. int64_t start_ts_each, end_ts_each;
  1310. SaveStateEntry *se;
  1311. int ret;
  1312. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  1313. if (!se->ops ||
  1314. (in_postcopy && se->ops->has_postcopy &&
  1315. se->ops->has_postcopy(se->opaque)) ||
  1316. !se->ops->save_live_complete_precopy) {
  1317. continue;
  1318. }
  1319. if (se->ops->is_active) {
  1320. if (!se->ops->is_active(se->opaque)) {
  1321. continue;
  1322. }
  1323. }
  1324. start_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
  1325. trace_savevm_section_start(se->idstr, se->section_id);
  1326. save_section_header(f, se, QEMU_VM_SECTION_END);
  1327. ret = se->ops->save_live_complete_precopy(f, se->opaque);
  1328. trace_savevm_section_end(se->idstr, se->section_id, ret);
  1329. save_section_footer(f, se);
  1330. if (ret < 0) {
  1331. qemu_file_set_error(f, ret);
  1332. return -1;
  1333. }
  1334. end_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
  1335. trace_vmstate_downtime_save("iterable", se->idstr, se->instance_id,
  1336. end_ts_each - start_ts_each);
  1337. }
  1338. trace_vmstate_downtime_checkpoint("src-iterable-saved");
  1339. return 0;
  1340. }
  1341. int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
  1342. bool in_postcopy,
  1343. bool inactivate_disks)
  1344. {
  1345. MigrationState *ms = migrate_get_current();
  1346. int64_t start_ts_each, end_ts_each;
  1347. JSONWriter *vmdesc = ms->vmdesc;
  1348. int vmdesc_len;
  1349. SaveStateEntry *se;
  1350. int ret;
  1351. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  1352. if (se->vmsd && se->vmsd->early_setup) {
  1353. /* Already saved during qemu_savevm_state_setup(). */
  1354. continue;
  1355. }
  1356. start_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
  1357. ret = vmstate_save(f, se, vmdesc);
  1358. if (ret) {
  1359. qemu_file_set_error(f, ret);
  1360. return ret;
  1361. }
  1362. end_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
  1363. trace_vmstate_downtime_save("non-iterable", se->idstr, se->instance_id,
  1364. end_ts_each - start_ts_each);
  1365. }
  1366. if (inactivate_disks) {
  1367. /* Inactivate before sending QEMU_VM_EOF so that the
  1368. * bdrv_activate_all() on the other end won't fail. */
  1369. ret = bdrv_inactivate_all();
  1370. if (ret) {
  1371. Error *local_err = NULL;
  1372. error_setg(&local_err, "%s: bdrv_inactivate_all() failed (%d)",
  1373. __func__, ret);
  1374. migrate_set_error(ms, local_err);
  1375. error_report_err(local_err);
  1376. qemu_file_set_error(f, ret);
  1377. return ret;
  1378. }
  1379. }
  1380. if (!in_postcopy) {
  1381. /* Postcopy stream will still be going */
  1382. qemu_put_byte(f, QEMU_VM_EOF);
  1383. }
  1384. json_writer_end_array(vmdesc);
  1385. json_writer_end_object(vmdesc);
  1386. vmdesc_len = strlen(json_writer_get(vmdesc));
  1387. if (should_send_vmdesc()) {
  1388. qemu_put_byte(f, QEMU_VM_VMDESCRIPTION);
  1389. qemu_put_be32(f, vmdesc_len);
  1390. qemu_put_buffer(f, (uint8_t *)json_writer_get(vmdesc), vmdesc_len);
  1391. }
  1392. /* Free it now to detect any inconsistencies. */
  1393. json_writer_free(vmdesc);
  1394. ms->vmdesc = NULL;
  1395. trace_vmstate_downtime_checkpoint("src-non-iterable-saved");
  1396. return 0;
  1397. }
  1398. int qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only,
  1399. bool inactivate_disks)
  1400. {
  1401. int ret;
  1402. Error *local_err = NULL;
  1403. bool in_postcopy = migration_in_postcopy();
  1404. if (precopy_notify(PRECOPY_NOTIFY_COMPLETE, &local_err)) {
  1405. error_report_err(local_err);
  1406. }
  1407. trace_savevm_state_complete_precopy();
  1408. cpu_synchronize_all_states();
  1409. if (!in_postcopy || iterable_only) {
  1410. ret = qemu_savevm_state_complete_precopy_iterable(f, in_postcopy);
  1411. if (ret) {
  1412. return ret;
  1413. }
  1414. }
  1415. if (iterable_only) {
  1416. goto flush;
  1417. }
  1418. ret = qemu_savevm_state_complete_precopy_non_iterable(f, in_postcopy,
  1419. inactivate_disks);
  1420. if (ret) {
  1421. return ret;
  1422. }
  1423. flush:
  1424. return qemu_fflush(f);
  1425. }
  1426. /* Give an estimate of the amount left to be transferred,
  1427. * the result is split into the amount for units that can and
  1428. * for units that can't do postcopy.
  1429. */
  1430. void qemu_savevm_state_pending_estimate(uint64_t *must_precopy,
  1431. uint64_t *can_postcopy)
  1432. {
  1433. SaveStateEntry *se;
  1434. *must_precopy = 0;
  1435. *can_postcopy = 0;
  1436. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  1437. if (!se->ops || !se->ops->state_pending_estimate) {
  1438. continue;
  1439. }
  1440. if (se->ops->is_active) {
  1441. if (!se->ops->is_active(se->opaque)) {
  1442. continue;
  1443. }
  1444. }
  1445. se->ops->state_pending_estimate(se->opaque, must_precopy, can_postcopy);
  1446. }
  1447. }
  1448. void qemu_savevm_state_pending_exact(uint64_t *must_precopy,
  1449. uint64_t *can_postcopy)
  1450. {
  1451. SaveStateEntry *se;
  1452. *must_precopy = 0;
  1453. *can_postcopy = 0;
  1454. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  1455. if (!se->ops || !se->ops->state_pending_exact) {
  1456. continue;
  1457. }
  1458. if (se->ops->is_active) {
  1459. if (!se->ops->is_active(se->opaque)) {
  1460. continue;
  1461. }
  1462. }
  1463. se->ops->state_pending_exact(se->opaque, must_precopy, can_postcopy);
  1464. }
  1465. }
  1466. void qemu_savevm_state_cleanup(void)
  1467. {
  1468. SaveStateEntry *se;
  1469. Error *local_err = NULL;
  1470. if (precopy_notify(PRECOPY_NOTIFY_CLEANUP, &local_err)) {
  1471. error_report_err(local_err);
  1472. }
  1473. trace_savevm_state_cleanup();
  1474. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  1475. if (se->ops && se->ops->save_cleanup) {
  1476. se->ops->save_cleanup(se->opaque);
  1477. }
  1478. }
  1479. }
  1480. static int qemu_savevm_state(QEMUFile *f, Error **errp)
  1481. {
  1482. int ret;
  1483. MigrationState *ms = migrate_get_current();
  1484. MigrationStatus status;
  1485. if (migration_is_running(ms->state)) {
  1486. error_setg(errp, QERR_MIGRATION_ACTIVE);
  1487. return -EINVAL;
  1488. }
  1489. if (migrate_block()) {
  1490. error_setg(errp, "Block migration and snapshots are incompatible");
  1491. return -EINVAL;
  1492. }
  1493. ret = migrate_init(ms, errp);
  1494. if (ret) {
  1495. return ret;
  1496. }
  1497. ms->to_dst_file = f;
  1498. qemu_savevm_state_header(f);
  1499. qemu_savevm_state_setup(f);
  1500. while (qemu_file_get_error(f) == 0) {
  1501. if (qemu_savevm_state_iterate(f, false) > 0) {
  1502. break;
  1503. }
  1504. }
  1505. ret = qemu_file_get_error(f);
  1506. if (ret == 0) {
  1507. qemu_savevm_state_complete_precopy(f, false, false);
  1508. ret = qemu_file_get_error(f);
  1509. }
  1510. qemu_savevm_state_cleanup();
  1511. if (ret != 0) {
  1512. error_setg_errno(errp, -ret, "Error while writing VM state");
  1513. }
  1514. if (ret != 0) {
  1515. status = MIGRATION_STATUS_FAILED;
  1516. } else {
  1517. status = MIGRATION_STATUS_COMPLETED;
  1518. }
  1519. migrate_set_state(&ms->state, MIGRATION_STATUS_SETUP, status);
  1520. /* f is outer parameter, it should not stay in global migration state after
  1521. * this function finished */
  1522. ms->to_dst_file = NULL;
  1523. return ret;
  1524. }
  1525. void qemu_savevm_live_state(QEMUFile *f)
  1526. {
  1527. /* save QEMU_VM_SECTION_END section */
  1528. qemu_savevm_state_complete_precopy(f, true, false);
  1529. qemu_put_byte(f, QEMU_VM_EOF);
  1530. }
  1531. int qemu_save_device_state(QEMUFile *f)
  1532. {
  1533. SaveStateEntry *se;
  1534. if (!migration_in_colo_state()) {
  1535. qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
  1536. qemu_put_be32(f, QEMU_VM_FILE_VERSION);
  1537. }
  1538. cpu_synchronize_all_states();
  1539. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  1540. int ret;
  1541. if (se->is_ram) {
  1542. continue;
  1543. }
  1544. ret = vmstate_save(f, se, NULL);
  1545. if (ret) {
  1546. return ret;
  1547. }
  1548. }
  1549. qemu_put_byte(f, QEMU_VM_EOF);
  1550. return qemu_file_get_error(f);
  1551. }
  1552. static SaveStateEntry *find_se(const char *idstr, uint32_t instance_id)
  1553. {
  1554. SaveStateEntry *se;
  1555. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  1556. if (!strcmp(se->idstr, idstr) &&
  1557. (instance_id == se->instance_id ||
  1558. instance_id == se->alias_id))
  1559. return se;
  1560. /* Migrating from an older version? */
  1561. if (strstr(se->idstr, idstr) && se->compat) {
  1562. if (!strcmp(se->compat->idstr, idstr) &&
  1563. (instance_id == se->compat->instance_id ||
  1564. instance_id == se->alias_id))
  1565. return se;
  1566. }
  1567. }
  1568. return NULL;
  1569. }
  1570. enum LoadVMExitCodes {
  1571. /* Allow a command to quit all layers of nested loadvm loops */
  1572. LOADVM_QUIT = 1,
  1573. };
  1574. /* ------ incoming postcopy messages ------ */
  1575. /* 'advise' arrives before any transfers just to tell us that a postcopy
  1576. * *might* happen - it might be skipped if precopy transferred everything
  1577. * quickly.
  1578. */
  1579. static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis,
  1580. uint16_t len)
  1581. {
  1582. PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_ADVISE);
  1583. uint64_t remote_pagesize_summary, local_pagesize_summary, remote_tps;
  1584. size_t page_size = qemu_target_page_size();
  1585. Error *local_err = NULL;
  1586. trace_loadvm_postcopy_handle_advise();
  1587. if (ps != POSTCOPY_INCOMING_NONE) {
  1588. error_report("CMD_POSTCOPY_ADVISE in wrong postcopy state (%d)", ps);
  1589. return -1;
  1590. }
  1591. switch (len) {
  1592. case 0:
  1593. if (migrate_postcopy_ram()) {
  1594. error_report("RAM postcopy is enabled but have 0 byte advise");
  1595. return -EINVAL;
  1596. }
  1597. return 0;
  1598. case 8 + 8:
  1599. if (!migrate_postcopy_ram()) {
  1600. error_report("RAM postcopy is disabled but have 16 byte advise");
  1601. return -EINVAL;
  1602. }
  1603. break;
  1604. default:
  1605. error_report("CMD_POSTCOPY_ADVISE invalid length (%d)", len);
  1606. return -EINVAL;
  1607. }
  1608. if (!postcopy_ram_supported_by_host(mis, &local_err)) {
  1609. error_report_err(local_err);
  1610. postcopy_state_set(POSTCOPY_INCOMING_NONE);
  1611. return -1;
  1612. }
  1613. remote_pagesize_summary = qemu_get_be64(mis->from_src_file);
  1614. local_pagesize_summary = ram_pagesize_summary();
  1615. if (remote_pagesize_summary != local_pagesize_summary) {
  1616. /*
  1617. * This detects two potential causes of mismatch:
  1618. * a) A mismatch in host page sizes
  1619. * Some combinations of mismatch are probably possible but it gets
  1620. * a bit more complicated. In particular we need to place whole
  1621. * host pages on the dest at once, and we need to ensure that we
  1622. * handle dirtying to make sure we never end up sending part of
  1623. * a hostpage on it's own.
  1624. * b) The use of different huge page sizes on source/destination
  1625. * a more fine grain test is performed during RAM block migration
  1626. * but this test here causes a nice early clear failure, and
  1627. * also fails when passed to an older qemu that doesn't
  1628. * do huge pages.
  1629. */
  1630. error_report("Postcopy needs matching RAM page sizes (s=%" PRIx64
  1631. " d=%" PRIx64 ")",
  1632. remote_pagesize_summary, local_pagesize_summary);
  1633. return -1;
  1634. }
  1635. remote_tps = qemu_get_be64(mis->from_src_file);
  1636. if (remote_tps != page_size) {
  1637. /*
  1638. * Again, some differences could be dealt with, but for now keep it
  1639. * simple.
  1640. */
  1641. error_report("Postcopy needs matching target page sizes (s=%d d=%zd)",
  1642. (int)remote_tps, page_size);
  1643. return -1;
  1644. }
  1645. if (postcopy_notify(POSTCOPY_NOTIFY_INBOUND_ADVISE, &local_err)) {
  1646. error_report_err(local_err);
  1647. return -1;
  1648. }
  1649. if (ram_postcopy_incoming_init(mis)) {
  1650. return -1;
  1651. }
  1652. return 0;
  1653. }
  1654. /* After postcopy we will be told to throw some pages away since they're
  1655. * dirty and will have to be demand fetched. Must happen before CPU is
  1656. * started.
  1657. * There can be 0..many of these messages, each encoding multiple pages.
  1658. */
  1659. static int loadvm_postcopy_ram_handle_discard(MigrationIncomingState *mis,
  1660. uint16_t len)
  1661. {
  1662. int tmp;
  1663. char ramid[256];
  1664. PostcopyState ps = postcopy_state_get();
  1665. trace_loadvm_postcopy_ram_handle_discard();
  1666. switch (ps) {
  1667. case POSTCOPY_INCOMING_ADVISE:
  1668. /* 1st discard */
  1669. tmp = postcopy_ram_prepare_discard(mis);
  1670. if (tmp) {
  1671. return tmp;
  1672. }
  1673. break;
  1674. case POSTCOPY_INCOMING_DISCARD:
  1675. /* Expected state */
  1676. break;
  1677. default:
  1678. error_report("CMD_POSTCOPY_RAM_DISCARD in wrong postcopy state (%d)",
  1679. ps);
  1680. return -1;
  1681. }
  1682. /* We're expecting a
  1683. * Version (0)
  1684. * a RAM ID string (length byte, name, 0 term)
  1685. * then at least 1 16 byte chunk
  1686. */
  1687. if (len < (1 + 1 + 1 + 1 + 2 * 8)) {
  1688. error_report("CMD_POSTCOPY_RAM_DISCARD invalid length (%d)", len);
  1689. return -1;
  1690. }
  1691. tmp = qemu_get_byte(mis->from_src_file);
  1692. if (tmp != postcopy_ram_discard_version) {
  1693. error_report("CMD_POSTCOPY_RAM_DISCARD invalid version (%d)", tmp);
  1694. return -1;
  1695. }
  1696. if (!qemu_get_counted_string(mis->from_src_file, ramid)) {
  1697. error_report("CMD_POSTCOPY_RAM_DISCARD Failed to read RAMBlock ID");
  1698. return -1;
  1699. }
  1700. tmp = qemu_get_byte(mis->from_src_file);
  1701. if (tmp != 0) {
  1702. error_report("CMD_POSTCOPY_RAM_DISCARD missing nil (%d)", tmp);
  1703. return -1;
  1704. }
  1705. len -= 3 + strlen(ramid);
  1706. if (len % 16) {
  1707. error_report("CMD_POSTCOPY_RAM_DISCARD invalid length (%d)", len);
  1708. return -1;
  1709. }
  1710. trace_loadvm_postcopy_ram_handle_discard_header(ramid, len);
  1711. while (len) {
  1712. uint64_t start_addr, block_length;
  1713. start_addr = qemu_get_be64(mis->from_src_file);
  1714. block_length = qemu_get_be64(mis->from_src_file);
  1715. len -= 16;
  1716. int ret = ram_discard_range(ramid, start_addr, block_length);
  1717. if (ret) {
  1718. return ret;
  1719. }
  1720. }
  1721. trace_loadvm_postcopy_ram_handle_discard_end();
  1722. return 0;
  1723. }
  1724. /*
  1725. * Triggered by a postcopy_listen command; this thread takes over reading
  1726. * the input stream, leaving the main thread free to carry on loading the rest
  1727. * of the device state (from RAM).
  1728. * (TODO:This could do with being in a postcopy file - but there again it's
  1729. * just another input loop, not that postcopy specific)
  1730. */
  1731. static void *postcopy_ram_listen_thread(void *opaque)
  1732. {
  1733. MigrationIncomingState *mis = migration_incoming_get_current();
  1734. QEMUFile *f = mis->from_src_file;
  1735. int load_res;
  1736. MigrationState *migr = migrate_get_current();
  1737. object_ref(OBJECT(migr));
  1738. migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
  1739. MIGRATION_STATUS_POSTCOPY_ACTIVE);
  1740. qemu_sem_post(&mis->thread_sync_sem);
  1741. trace_postcopy_ram_listen_thread_start();
  1742. rcu_register_thread();
  1743. /*
  1744. * Because we're a thread and not a coroutine we can't yield
  1745. * in qemu_file, and thus we must be blocking now.
  1746. */
  1747. qemu_file_set_blocking(f, true);
  1748. load_res = qemu_loadvm_state_main(f, mis);
  1749. /*
  1750. * This is tricky, but, mis->from_src_file can change after it
  1751. * returns, when postcopy recovery happened. In the future, we may
  1752. * want a wrapper for the QEMUFile handle.
  1753. */
  1754. f = mis->from_src_file;
  1755. /* And non-blocking again so we don't block in any cleanup */
  1756. qemu_file_set_blocking(f, false);
  1757. trace_postcopy_ram_listen_thread_exit();
  1758. if (load_res < 0) {
  1759. qemu_file_set_error(f, load_res);
  1760. dirty_bitmap_mig_cancel_incoming();
  1761. if (postcopy_state_get() == POSTCOPY_INCOMING_RUNNING &&
  1762. !migrate_postcopy_ram() && migrate_dirty_bitmaps())
  1763. {
  1764. error_report("%s: loadvm failed during postcopy: %d. All states "
  1765. "are migrated except dirty bitmaps. Some dirty "
  1766. "bitmaps may be lost, and present migrated dirty "
  1767. "bitmaps are correctly migrated and valid.",
  1768. __func__, load_res);
  1769. load_res = 0; /* prevent further exit() */
  1770. } else {
  1771. error_report("%s: loadvm failed: %d", __func__, load_res);
  1772. migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
  1773. MIGRATION_STATUS_FAILED);
  1774. }
  1775. }
  1776. if (load_res >= 0) {
  1777. /*
  1778. * This looks good, but it's possible that the device loading in the
  1779. * main thread hasn't finished yet, and so we might not be in 'RUN'
  1780. * state yet; wait for the end of the main thread.
  1781. */
  1782. qemu_event_wait(&mis->main_thread_load_event);
  1783. }
  1784. postcopy_ram_incoming_cleanup(mis);
  1785. if (load_res < 0) {
  1786. /*
  1787. * If something went wrong then we have a bad state so exit;
  1788. * depending how far we got it might be possible at this point
  1789. * to leave the guest running and fire MCEs for pages that never
  1790. * arrived as a desperate recovery step.
  1791. */
  1792. rcu_unregister_thread();
  1793. exit(EXIT_FAILURE);
  1794. }
  1795. migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
  1796. MIGRATION_STATUS_COMPLETED);
  1797. /*
  1798. * If everything has worked fine, then the main thread has waited
  1799. * for us to start, and we're the last use of the mis.
  1800. * (If something broke then qemu will have to exit anyway since it's
  1801. * got a bad migration state).
  1802. */
  1803. migration_incoming_state_destroy();
  1804. qemu_loadvm_state_cleanup();
  1805. rcu_unregister_thread();
  1806. mis->have_listen_thread = false;
  1807. postcopy_state_set(POSTCOPY_INCOMING_END);
  1808. object_unref(OBJECT(migr));
  1809. return NULL;
  1810. }
  1811. /* After this message we must be able to immediately receive postcopy data */
  1812. static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
  1813. {
  1814. PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_LISTENING);
  1815. Error *local_err = NULL;
  1816. trace_loadvm_postcopy_handle_listen("enter");
  1817. if (ps != POSTCOPY_INCOMING_ADVISE && ps != POSTCOPY_INCOMING_DISCARD) {
  1818. error_report("CMD_POSTCOPY_LISTEN in wrong postcopy state (%d)", ps);
  1819. return -1;
  1820. }
  1821. if (ps == POSTCOPY_INCOMING_ADVISE) {
  1822. /*
  1823. * A rare case, we entered listen without having to do any discards,
  1824. * so do the setup that's normally done at the time of the 1st discard.
  1825. */
  1826. if (migrate_postcopy_ram()) {
  1827. postcopy_ram_prepare_discard(mis);
  1828. }
  1829. }
  1830. trace_loadvm_postcopy_handle_listen("after discard");
  1831. /*
  1832. * Sensitise RAM - can now generate requests for blocks that don't exist
  1833. * However, at this point the CPU shouldn't be running, and the IO
  1834. * shouldn't be doing anything yet so don't actually expect requests
  1835. */
  1836. if (migrate_postcopy_ram()) {
  1837. if (postcopy_ram_incoming_setup(mis)) {
  1838. postcopy_ram_incoming_cleanup(mis);
  1839. return -1;
  1840. }
  1841. }
  1842. trace_loadvm_postcopy_handle_listen("after uffd");
  1843. if (postcopy_notify(POSTCOPY_NOTIFY_INBOUND_LISTEN, &local_err)) {
  1844. error_report_err(local_err);
  1845. return -1;
  1846. }
  1847. mis->have_listen_thread = true;
  1848. postcopy_thread_create(mis, &mis->listen_thread, "postcopy/listen",
  1849. postcopy_ram_listen_thread, QEMU_THREAD_DETACHED);
  1850. trace_loadvm_postcopy_handle_listen("return");
  1851. return 0;
  1852. }
  1853. static void loadvm_postcopy_handle_run_bh(void *opaque)
  1854. {
  1855. Error *local_err = NULL;
  1856. MigrationIncomingState *mis = opaque;
  1857. trace_vmstate_downtime_checkpoint("dst-postcopy-bh-enter");
  1858. /* TODO we should move all of this lot into postcopy_ram.c or a shared code
  1859. * in migration.c
  1860. */
  1861. cpu_synchronize_all_post_init();
  1862. trace_vmstate_downtime_checkpoint("dst-postcopy-bh-cpu-synced");
  1863. qemu_announce_self(&mis->announce_timer, migrate_announce_params());
  1864. trace_vmstate_downtime_checkpoint("dst-postcopy-bh-announced");
  1865. /* Make sure all file formats throw away their mutable metadata.
  1866. * If we get an error here, just don't restart the VM yet. */
  1867. bdrv_activate_all(&local_err);
  1868. if (local_err) {
  1869. error_report_err(local_err);
  1870. local_err = NULL;
  1871. autostart = false;
  1872. }
  1873. trace_vmstate_downtime_checkpoint("dst-postcopy-bh-cache-invalidated");
  1874. dirty_bitmap_mig_before_vm_start();
  1875. if (autostart) {
  1876. /* Hold onto your hats, starting the CPU */
  1877. vm_start();
  1878. } else {
  1879. /* leave it paused and let management decide when to start the CPU */
  1880. runstate_set(RUN_STATE_PAUSED);
  1881. }
  1882. qemu_bh_delete(mis->bh);
  1883. trace_vmstate_downtime_checkpoint("dst-postcopy-bh-vm-started");
  1884. }
  1885. /* After all discards we can start running and asking for pages */
  1886. static int loadvm_postcopy_handle_run(MigrationIncomingState *mis)
  1887. {
  1888. PostcopyState ps = postcopy_state_get();
  1889. trace_loadvm_postcopy_handle_run();
  1890. if (ps != POSTCOPY_INCOMING_LISTENING) {
  1891. error_report("CMD_POSTCOPY_RUN in wrong postcopy state (%d)", ps);
  1892. return -1;
  1893. }
  1894. postcopy_state_set(POSTCOPY_INCOMING_RUNNING);
  1895. mis->bh = qemu_bh_new(loadvm_postcopy_handle_run_bh, mis);
  1896. qemu_bh_schedule(mis->bh);
  1897. /* We need to finish reading the stream from the package
  1898. * and also stop reading anything more from the stream that loaded the
  1899. * package (since it's now being read by the listener thread).
  1900. * LOADVM_QUIT will quit all the layers of nested loadvm loops.
  1901. */
  1902. return LOADVM_QUIT;
  1903. }
  1904. /* We must be with page_request_mutex held */
  1905. static gboolean postcopy_sync_page_req(gpointer key, gpointer value,
  1906. gpointer data)
  1907. {
  1908. MigrationIncomingState *mis = data;
  1909. void *host_addr = (void *) key;
  1910. ram_addr_t rb_offset;
  1911. RAMBlock *rb;
  1912. int ret;
  1913. rb = qemu_ram_block_from_host(host_addr, true, &rb_offset);
  1914. if (!rb) {
  1915. /*
  1916. * This should _never_ happen. However be nice for a migrating VM to
  1917. * not crash/assert. Post an error (note: intended to not use *_once
  1918. * because we do want to see all the illegal addresses; and this can
  1919. * never be triggered by the guest so we're safe) and move on next.
  1920. */
  1921. error_report("%s: illegal host addr %p", __func__, host_addr);
  1922. /* Try the next entry */
  1923. return FALSE;
  1924. }
  1925. ret = migrate_send_rp_message_req_pages(mis, rb, rb_offset);
  1926. if (ret) {
  1927. /* Please refer to above comment. */
  1928. error_report("%s: send rp message failed for addr %p",
  1929. __func__, host_addr);
  1930. return FALSE;
  1931. }
  1932. trace_postcopy_page_req_sync(host_addr);
  1933. return FALSE;
  1934. }
  1935. static void migrate_send_rp_req_pages_pending(MigrationIncomingState *mis)
  1936. {
  1937. WITH_QEMU_LOCK_GUARD(&mis->page_request_mutex) {
  1938. g_tree_foreach(mis->page_requested, postcopy_sync_page_req, mis);
  1939. }
  1940. }
  1941. static int loadvm_postcopy_handle_resume(MigrationIncomingState *mis)
  1942. {
  1943. if (mis->state != MIGRATION_STATUS_POSTCOPY_RECOVER) {
  1944. error_report("%s: illegal resume received", __func__);
  1945. /* Don't fail the load, only for this. */
  1946. return 0;
  1947. }
  1948. /*
  1949. * Reset the last_rb before we resend any page req to source again, since
  1950. * the source should have it reset already.
  1951. */
  1952. mis->last_rb = NULL;
  1953. /*
  1954. * This means source VM is ready to resume the postcopy migration.
  1955. */
  1956. migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_RECOVER,
  1957. MIGRATION_STATUS_POSTCOPY_ACTIVE);
  1958. trace_loadvm_postcopy_handle_resume();
  1959. /* Tell source that "we are ready" */
  1960. migrate_send_rp_resume_ack(mis, MIGRATION_RESUME_ACK_VALUE);
  1961. /*
  1962. * After a postcopy recovery, the source should have lost the postcopy
  1963. * queue, or potentially the requested pages could have been lost during
  1964. * the network down phase. Let's re-sync with the source VM by re-sending
  1965. * all the pending pages that we eagerly need, so these threads won't get
  1966. * blocked too long due to the recovery.
  1967. *
  1968. * Without this procedure, the faulted destination VM threads (waiting for
  1969. * page requests right before the postcopy is interrupted) can keep hanging
  1970. * until the pages are sent by the source during the background copying of
  1971. * pages, or another thread faulted on the same address accidentally.
  1972. */
  1973. migrate_send_rp_req_pages_pending(mis);
  1974. /*
  1975. * It's time to switch state and release the fault thread to continue
  1976. * service page faults. Note that this should be explicitly after the
  1977. * above call to migrate_send_rp_req_pages_pending(). In short:
  1978. * migrate_send_rp_message_req_pages() is not thread safe, yet.
  1979. */
  1980. qemu_sem_post(&mis->postcopy_pause_sem_fault);
  1981. if (migrate_postcopy_preempt()) {
  1982. /*
  1983. * The preempt channel will be created in async manner, now let's
  1984. * wait for it and make sure it's created.
  1985. */
  1986. qemu_sem_wait(&mis->postcopy_qemufile_dst_done);
  1987. assert(mis->postcopy_qemufile_dst);
  1988. /* Kick the fast ram load thread too */
  1989. qemu_sem_post(&mis->postcopy_pause_sem_fast_load);
  1990. }
  1991. return 0;
  1992. }
  1993. /**
  1994. * Immediately following this command is a blob of data containing an embedded
  1995. * chunk of migration stream; read it and load it.
  1996. *
  1997. * @mis: Incoming state
  1998. * @length: Length of packaged data to read
  1999. *
  2000. * Returns: Negative values on error
  2001. *
  2002. */
  2003. static int loadvm_handle_cmd_packaged(MigrationIncomingState *mis)
  2004. {
  2005. int ret;
  2006. size_t length;
  2007. QIOChannelBuffer *bioc;
  2008. length = qemu_get_be32(mis->from_src_file);
  2009. trace_loadvm_handle_cmd_packaged(length);
  2010. if (length > MAX_VM_CMD_PACKAGED_SIZE) {
  2011. error_report("Unreasonably large packaged state: %zu", length);
  2012. return -1;
  2013. }
  2014. bioc = qio_channel_buffer_new(length);
  2015. qio_channel_set_name(QIO_CHANNEL(bioc), "migration-loadvm-buffer");
  2016. ret = qemu_get_buffer(mis->from_src_file,
  2017. bioc->data,
  2018. length);
  2019. if (ret != length) {
  2020. object_unref(OBJECT(bioc));
  2021. error_report("CMD_PACKAGED: Buffer receive fail ret=%d length=%zu",
  2022. ret, length);
  2023. return (ret < 0) ? ret : -EAGAIN;
  2024. }
  2025. bioc->usage += length;
  2026. trace_loadvm_handle_cmd_packaged_received(ret);
  2027. QEMUFile *packf = qemu_file_new_input(QIO_CHANNEL(bioc));
  2028. ret = qemu_loadvm_state_main(packf, mis);
  2029. trace_loadvm_handle_cmd_packaged_main(ret);
  2030. qemu_fclose(packf);
  2031. object_unref(OBJECT(bioc));
  2032. return ret;
  2033. }
  2034. /*
  2035. * Handle request that source requests for recved_bitmap on
  2036. * destination. Payload format:
  2037. *
  2038. * len (1 byte) + ramblock_name (<255 bytes)
  2039. */
  2040. static int loadvm_handle_recv_bitmap(MigrationIncomingState *mis,
  2041. uint16_t len)
  2042. {
  2043. QEMUFile *file = mis->from_src_file;
  2044. RAMBlock *rb;
  2045. char block_name[256];
  2046. size_t cnt;
  2047. cnt = qemu_get_counted_string(file, block_name);
  2048. if (!cnt) {
  2049. error_report("%s: failed to read block name", __func__);
  2050. return -EINVAL;
  2051. }
  2052. /* Validate before using the data */
  2053. if (qemu_file_get_error(file)) {
  2054. return qemu_file_get_error(file);
  2055. }
  2056. if (len != cnt + 1) {
  2057. error_report("%s: invalid payload length (%d)", __func__, len);
  2058. return -EINVAL;
  2059. }
  2060. rb = qemu_ram_block_by_name(block_name);
  2061. if (!rb) {
  2062. error_report("%s: block '%s' not found", __func__, block_name);
  2063. return -EINVAL;
  2064. }
  2065. migrate_send_rp_recv_bitmap(mis, block_name);
  2066. trace_loadvm_handle_recv_bitmap(block_name);
  2067. return 0;
  2068. }
  2069. static int loadvm_process_enable_colo(MigrationIncomingState *mis)
  2070. {
  2071. int ret = migration_incoming_enable_colo();
  2072. if (!ret) {
  2073. ret = colo_init_ram_cache();
  2074. if (ret) {
  2075. migration_incoming_disable_colo();
  2076. }
  2077. }
  2078. return ret;
  2079. }
  2080. /*
  2081. * Process an incoming 'QEMU_VM_COMMAND'
  2082. * 0 just a normal return
  2083. * LOADVM_QUIT All good, but exit the loop
  2084. * <0 Error
  2085. */
  2086. static int loadvm_process_command(QEMUFile *f)
  2087. {
  2088. MigrationIncomingState *mis = migration_incoming_get_current();
  2089. uint16_t cmd;
  2090. uint16_t len;
  2091. uint32_t tmp32;
  2092. cmd = qemu_get_be16(f);
  2093. len = qemu_get_be16(f);
  2094. /* Check validity before continue processing of cmds */
  2095. if (qemu_file_get_error(f)) {
  2096. return qemu_file_get_error(f);
  2097. }
  2098. if (cmd >= MIG_CMD_MAX || cmd == MIG_CMD_INVALID) {
  2099. error_report("MIG_CMD 0x%x unknown (len 0x%x)", cmd, len);
  2100. return -EINVAL;
  2101. }
  2102. trace_loadvm_process_command(mig_cmd_args[cmd].name, len);
  2103. if (mig_cmd_args[cmd].len != -1 && mig_cmd_args[cmd].len != len) {
  2104. error_report("%s received with bad length - expecting %zu, got %d",
  2105. mig_cmd_args[cmd].name,
  2106. (size_t)mig_cmd_args[cmd].len, len);
  2107. return -ERANGE;
  2108. }
  2109. switch (cmd) {
  2110. case MIG_CMD_OPEN_RETURN_PATH:
  2111. if (mis->to_src_file) {
  2112. error_report("CMD_OPEN_RETURN_PATH called when RP already open");
  2113. /* Not really a problem, so don't give up */
  2114. return 0;
  2115. }
  2116. mis->to_src_file = qemu_file_get_return_path(f);
  2117. if (!mis->to_src_file) {
  2118. error_report("CMD_OPEN_RETURN_PATH failed");
  2119. return -1;
  2120. }
  2121. /*
  2122. * Switchover ack is enabled but no device uses it, so send an ACK to
  2123. * source that it's OK to switchover. Do it here, after return path has
  2124. * been created.
  2125. */
  2126. if (migrate_switchover_ack() && !mis->switchover_ack_pending_num) {
  2127. int ret = migrate_send_rp_switchover_ack(mis);
  2128. if (ret) {
  2129. error_report(
  2130. "Could not send switchover ack RP MSG, err %d (%s)", ret,
  2131. strerror(-ret));
  2132. return ret;
  2133. }
  2134. }
  2135. break;
  2136. case MIG_CMD_PING:
  2137. tmp32 = qemu_get_be32(f);
  2138. trace_loadvm_process_command_ping(tmp32);
  2139. if (!mis->to_src_file) {
  2140. error_report("CMD_PING (0x%x) received with no return path",
  2141. tmp32);
  2142. return -1;
  2143. }
  2144. migrate_send_rp_pong(mis, tmp32);
  2145. break;
  2146. case MIG_CMD_PACKAGED:
  2147. return loadvm_handle_cmd_packaged(mis);
  2148. case MIG_CMD_POSTCOPY_ADVISE:
  2149. return loadvm_postcopy_handle_advise(mis, len);
  2150. case MIG_CMD_POSTCOPY_LISTEN:
  2151. return loadvm_postcopy_handle_listen(mis);
  2152. case MIG_CMD_POSTCOPY_RUN:
  2153. return loadvm_postcopy_handle_run(mis);
  2154. case MIG_CMD_POSTCOPY_RAM_DISCARD:
  2155. return loadvm_postcopy_ram_handle_discard(mis, len);
  2156. case MIG_CMD_POSTCOPY_RESUME:
  2157. return loadvm_postcopy_handle_resume(mis);
  2158. case MIG_CMD_RECV_BITMAP:
  2159. return loadvm_handle_recv_bitmap(mis, len);
  2160. case MIG_CMD_ENABLE_COLO:
  2161. return loadvm_process_enable_colo(mis);
  2162. }
  2163. return 0;
  2164. }
  2165. /*
  2166. * Read a footer off the wire and check that it matches the expected section
  2167. *
  2168. * Returns: true if the footer was good
  2169. * false if there is a problem (and calls error_report to say why)
  2170. */
  2171. static bool check_section_footer(QEMUFile *f, SaveStateEntry *se)
  2172. {
  2173. int ret;
  2174. uint8_t read_mark;
  2175. uint32_t read_section_id;
  2176. if (!migrate_get_current()->send_section_footer) {
  2177. /* No footer to check */
  2178. return true;
  2179. }
  2180. read_mark = qemu_get_byte(f);
  2181. ret = qemu_file_get_error(f);
  2182. if (ret) {
  2183. error_report("%s: Read section footer failed: %d",
  2184. __func__, ret);
  2185. return false;
  2186. }
  2187. if (read_mark != QEMU_VM_SECTION_FOOTER) {
  2188. error_report("Missing section footer for %s", se->idstr);
  2189. return false;
  2190. }
  2191. read_section_id = qemu_get_be32(f);
  2192. if (read_section_id != se->load_section_id) {
  2193. error_report("Mismatched section id in footer for %s -"
  2194. " read 0x%x expected 0x%x",
  2195. se->idstr, read_section_id, se->load_section_id);
  2196. return false;
  2197. }
  2198. /* All good */
  2199. return true;
  2200. }
  2201. static int
  2202. qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis,
  2203. uint8_t type)
  2204. {
  2205. bool trace_downtime = (type == QEMU_VM_SECTION_FULL);
  2206. uint32_t instance_id, version_id, section_id;
  2207. int64_t start_ts, end_ts;
  2208. SaveStateEntry *se;
  2209. char idstr[256];
  2210. int ret;
  2211. /* Read section start */
  2212. section_id = qemu_get_be32(f);
  2213. if (!qemu_get_counted_string(f, idstr)) {
  2214. error_report("Unable to read ID string for section %u",
  2215. section_id);
  2216. return -EINVAL;
  2217. }
  2218. instance_id = qemu_get_be32(f);
  2219. version_id = qemu_get_be32(f);
  2220. ret = qemu_file_get_error(f);
  2221. if (ret) {
  2222. error_report("%s: Failed to read instance/version ID: %d",
  2223. __func__, ret);
  2224. return ret;
  2225. }
  2226. trace_qemu_loadvm_state_section_startfull(section_id, idstr,
  2227. instance_id, version_id);
  2228. /* Find savevm section */
  2229. se = find_se(idstr, instance_id);
  2230. if (se == NULL) {
  2231. error_report("Unknown savevm section or instance '%s' %"PRIu32". "
  2232. "Make sure that your current VM setup matches your "
  2233. "saved VM setup, including any hotplugged devices",
  2234. idstr, instance_id);
  2235. return -EINVAL;
  2236. }
  2237. /* Validate version */
  2238. if (version_id > se->version_id) {
  2239. error_report("savevm: unsupported version %d for '%s' v%d",
  2240. version_id, idstr, se->version_id);
  2241. return -EINVAL;
  2242. }
  2243. se->load_version_id = version_id;
  2244. se->load_section_id = section_id;
  2245. /* Validate if it is a device's state */
  2246. if (xen_enabled() && se->is_ram) {
  2247. error_report("loadvm: %s RAM loading not allowed on Xen", idstr);
  2248. return -EINVAL;
  2249. }
  2250. if (trace_downtime) {
  2251. start_ts = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
  2252. }
  2253. ret = vmstate_load(f, se);
  2254. if (ret < 0) {
  2255. error_report("error while loading state for instance 0x%"PRIx32" of"
  2256. " device '%s'", instance_id, idstr);
  2257. return ret;
  2258. }
  2259. if (trace_downtime) {
  2260. end_ts = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
  2261. trace_vmstate_downtime_load("non-iterable", se->idstr,
  2262. se->instance_id, end_ts - start_ts);
  2263. }
  2264. if (!check_section_footer(f, se)) {
  2265. return -EINVAL;
  2266. }
  2267. return 0;
  2268. }
  2269. static int
  2270. qemu_loadvm_section_part_end(QEMUFile *f, MigrationIncomingState *mis,
  2271. uint8_t type)
  2272. {
  2273. bool trace_downtime = (type == QEMU_VM_SECTION_END);
  2274. int64_t start_ts, end_ts;
  2275. uint32_t section_id;
  2276. SaveStateEntry *se;
  2277. int ret;
  2278. section_id = qemu_get_be32(f);
  2279. ret = qemu_file_get_error(f);
  2280. if (ret) {
  2281. error_report("%s: Failed to read section ID: %d",
  2282. __func__, ret);
  2283. return ret;
  2284. }
  2285. trace_qemu_loadvm_state_section_partend(section_id);
  2286. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  2287. if (se->load_section_id == section_id) {
  2288. break;
  2289. }
  2290. }
  2291. if (se == NULL) {
  2292. error_report("Unknown savevm section %d", section_id);
  2293. return -EINVAL;
  2294. }
  2295. if (trace_downtime) {
  2296. start_ts = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
  2297. }
  2298. ret = vmstate_load(f, se);
  2299. if (ret < 0) {
  2300. error_report("error while loading state section id %d(%s)",
  2301. section_id, se->idstr);
  2302. return ret;
  2303. }
  2304. if (trace_downtime) {
  2305. end_ts = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
  2306. trace_vmstate_downtime_load("iterable", se->idstr,
  2307. se->instance_id, end_ts - start_ts);
  2308. }
  2309. if (!check_section_footer(f, se)) {
  2310. return -EINVAL;
  2311. }
  2312. return 0;
  2313. }
  2314. static int qemu_loadvm_state_header(QEMUFile *f)
  2315. {
  2316. unsigned int v;
  2317. int ret;
  2318. v = qemu_get_be32(f);
  2319. if (v != QEMU_VM_FILE_MAGIC) {
  2320. error_report("Not a migration stream");
  2321. return -EINVAL;
  2322. }
  2323. v = qemu_get_be32(f);
  2324. if (v == QEMU_VM_FILE_VERSION_COMPAT) {
  2325. error_report("SaveVM v2 format is obsolete and don't work anymore");
  2326. return -ENOTSUP;
  2327. }
  2328. if (v != QEMU_VM_FILE_VERSION) {
  2329. error_report("Unsupported migration stream version");
  2330. return -ENOTSUP;
  2331. }
  2332. if (migrate_get_current()->send_configuration) {
  2333. if (qemu_get_byte(f) != QEMU_VM_CONFIGURATION) {
  2334. error_report("Configuration section missing");
  2335. qemu_loadvm_state_cleanup();
  2336. return -EINVAL;
  2337. }
  2338. ret = vmstate_load_state(f, &vmstate_configuration, &savevm_state, 0);
  2339. if (ret) {
  2340. qemu_loadvm_state_cleanup();
  2341. return ret;
  2342. }
  2343. }
  2344. return 0;
  2345. }
  2346. static void qemu_loadvm_state_switchover_ack_needed(MigrationIncomingState *mis)
  2347. {
  2348. SaveStateEntry *se;
  2349. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  2350. if (!se->ops || !se->ops->switchover_ack_needed) {
  2351. continue;
  2352. }
  2353. if (se->ops->switchover_ack_needed(se->opaque)) {
  2354. mis->switchover_ack_pending_num++;
  2355. }
  2356. }
  2357. trace_loadvm_state_switchover_ack_needed(mis->switchover_ack_pending_num);
  2358. }
  2359. static int qemu_loadvm_state_setup(QEMUFile *f)
  2360. {
  2361. SaveStateEntry *se;
  2362. int ret;
  2363. trace_loadvm_state_setup();
  2364. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  2365. if (!se->ops || !se->ops->load_setup) {
  2366. continue;
  2367. }
  2368. if (se->ops->is_active) {
  2369. if (!se->ops->is_active(se->opaque)) {
  2370. continue;
  2371. }
  2372. }
  2373. ret = se->ops->load_setup(f, se->opaque);
  2374. if (ret < 0) {
  2375. qemu_file_set_error(f, ret);
  2376. error_report("Load state of device %s failed", se->idstr);
  2377. return ret;
  2378. }
  2379. }
  2380. return 0;
  2381. }
  2382. void qemu_loadvm_state_cleanup(void)
  2383. {
  2384. SaveStateEntry *se;
  2385. trace_loadvm_state_cleanup();
  2386. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  2387. if (se->ops && se->ops->load_cleanup) {
  2388. se->ops->load_cleanup(se->opaque);
  2389. }
  2390. }
  2391. }
  2392. /* Return true if we should continue the migration, or false. */
  2393. static bool postcopy_pause_incoming(MigrationIncomingState *mis)
  2394. {
  2395. int i;
  2396. trace_postcopy_pause_incoming();
  2397. assert(migrate_postcopy_ram());
  2398. /*
  2399. * Unregister yank with either from/to src would work, since ioc behind it
  2400. * is the same
  2401. */
  2402. migration_ioc_unregister_yank_from_file(mis->from_src_file);
  2403. assert(mis->from_src_file);
  2404. qemu_file_shutdown(mis->from_src_file);
  2405. qemu_fclose(mis->from_src_file);
  2406. mis->from_src_file = NULL;
  2407. assert(mis->to_src_file);
  2408. qemu_file_shutdown(mis->to_src_file);
  2409. qemu_mutex_lock(&mis->rp_mutex);
  2410. qemu_fclose(mis->to_src_file);
  2411. mis->to_src_file = NULL;
  2412. qemu_mutex_unlock(&mis->rp_mutex);
  2413. /*
  2414. * NOTE: this must happen before reset the PostcopyTmpPages below,
  2415. * otherwise it's racy to reset those fields when the fast load thread
  2416. * can be accessing it in parallel.
  2417. */
  2418. if (mis->postcopy_qemufile_dst) {
  2419. qemu_file_shutdown(mis->postcopy_qemufile_dst);
  2420. /* Take the mutex to make sure the fast ram load thread halted */
  2421. qemu_mutex_lock(&mis->postcopy_prio_thread_mutex);
  2422. migration_ioc_unregister_yank_from_file(mis->postcopy_qemufile_dst);
  2423. qemu_fclose(mis->postcopy_qemufile_dst);
  2424. mis->postcopy_qemufile_dst = NULL;
  2425. qemu_mutex_unlock(&mis->postcopy_prio_thread_mutex);
  2426. }
  2427. /* Current state can be either ACTIVE or RECOVER */
  2428. migrate_set_state(&mis->state, mis->state,
  2429. MIGRATION_STATUS_POSTCOPY_PAUSED);
  2430. /* Notify the fault thread for the invalidated file handle */
  2431. postcopy_fault_thread_notify(mis);
  2432. /*
  2433. * If network is interrupted, any temp page we received will be useless
  2434. * because we didn't mark them as "received" in receivedmap. After a
  2435. * proper recovery later (which will sync src dirty bitmap with receivedmap
  2436. * on dest) these cached small pages will be resent again.
  2437. */
  2438. for (i = 0; i < mis->postcopy_channels; i++) {
  2439. postcopy_temp_page_reset(&mis->postcopy_tmp_pages[i]);
  2440. }
  2441. error_report("Detected IO failure for postcopy. "
  2442. "Migration paused.");
  2443. while (mis->state == MIGRATION_STATUS_POSTCOPY_PAUSED) {
  2444. qemu_sem_wait(&mis->postcopy_pause_sem_dst);
  2445. }
  2446. trace_postcopy_pause_incoming_continued();
  2447. return true;
  2448. }
  2449. int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis)
  2450. {
  2451. uint8_t section_type;
  2452. int ret = 0;
  2453. retry:
  2454. while (true) {
  2455. section_type = qemu_get_byte(f);
  2456. ret = qemu_file_get_error_obj_any(f, mis->postcopy_qemufile_dst, NULL);
  2457. if (ret) {
  2458. break;
  2459. }
  2460. trace_qemu_loadvm_state_section(section_type);
  2461. switch (section_type) {
  2462. case QEMU_VM_SECTION_START:
  2463. case QEMU_VM_SECTION_FULL:
  2464. ret = qemu_loadvm_section_start_full(f, mis, section_type);
  2465. if (ret < 0) {
  2466. goto out;
  2467. }
  2468. break;
  2469. case QEMU_VM_SECTION_PART:
  2470. case QEMU_VM_SECTION_END:
  2471. ret = qemu_loadvm_section_part_end(f, mis, section_type);
  2472. if (ret < 0) {
  2473. goto out;
  2474. }
  2475. break;
  2476. case QEMU_VM_COMMAND:
  2477. ret = loadvm_process_command(f);
  2478. trace_qemu_loadvm_state_section_command(ret);
  2479. if ((ret < 0) || (ret == LOADVM_QUIT)) {
  2480. goto out;
  2481. }
  2482. break;
  2483. case QEMU_VM_EOF:
  2484. /* This is the end of migration */
  2485. goto out;
  2486. default:
  2487. error_report("Unknown savevm section type %d", section_type);
  2488. ret = -EINVAL;
  2489. goto out;
  2490. }
  2491. }
  2492. out:
  2493. if (ret < 0) {
  2494. qemu_file_set_error(f, ret);
  2495. /* Cancel bitmaps incoming regardless of recovery */
  2496. dirty_bitmap_mig_cancel_incoming();
  2497. /*
  2498. * If we are during an active postcopy, then we pause instead
  2499. * of bail out to at least keep the VM's dirty data. Note
  2500. * that POSTCOPY_INCOMING_LISTENING stage is still not enough,
  2501. * during which we're still receiving device states and we
  2502. * still haven't yet started the VM on destination.
  2503. *
  2504. * Only RAM postcopy supports recovery. Still, if RAM postcopy is
  2505. * enabled, canceled bitmaps postcopy will not affect RAM postcopy
  2506. * recovering.
  2507. */
  2508. if (postcopy_state_get() == POSTCOPY_INCOMING_RUNNING &&
  2509. migrate_postcopy_ram() && postcopy_pause_incoming(mis)) {
  2510. /* Reset f to point to the newly created channel */
  2511. f = mis->from_src_file;
  2512. goto retry;
  2513. }
  2514. }
  2515. return ret;
  2516. }
  2517. int qemu_loadvm_state(QEMUFile *f)
  2518. {
  2519. MigrationIncomingState *mis = migration_incoming_get_current();
  2520. Error *local_err = NULL;
  2521. int ret;
  2522. if (qemu_savevm_state_blocked(&local_err)) {
  2523. error_report_err(local_err);
  2524. return -EINVAL;
  2525. }
  2526. ret = qemu_loadvm_state_header(f);
  2527. if (ret) {
  2528. return ret;
  2529. }
  2530. if (qemu_loadvm_state_setup(f) != 0) {
  2531. return -EINVAL;
  2532. }
  2533. if (migrate_switchover_ack()) {
  2534. qemu_loadvm_state_switchover_ack_needed(mis);
  2535. }
  2536. cpu_synchronize_all_pre_loadvm();
  2537. ret = qemu_loadvm_state_main(f, mis);
  2538. qemu_event_set(&mis->main_thread_load_event);
  2539. trace_qemu_loadvm_state_post_main(ret);
  2540. if (mis->have_listen_thread) {
  2541. /* Listen thread still going, can't clean up yet */
  2542. return ret;
  2543. }
  2544. if (ret == 0) {
  2545. ret = qemu_file_get_error(f);
  2546. }
  2547. /*
  2548. * Try to read in the VMDESC section as well, so that dumping tools that
  2549. * intercept our migration stream have the chance to see it.
  2550. */
  2551. /* We've got to be careful; if we don't read the data and just shut the fd
  2552. * then the sender can error if we close while it's still sending.
  2553. * We also mustn't read data that isn't there; some transports (RDMA)
  2554. * will stall waiting for that data when the source has already closed.
  2555. */
  2556. if (ret == 0 && should_send_vmdesc()) {
  2557. uint8_t *buf;
  2558. uint32_t size;
  2559. uint8_t section_type = qemu_get_byte(f);
  2560. if (section_type != QEMU_VM_VMDESCRIPTION) {
  2561. error_report("Expected vmdescription section, but got %d",
  2562. section_type);
  2563. /*
  2564. * It doesn't seem worth failing at this point since
  2565. * we apparently have an otherwise valid VM state
  2566. */
  2567. } else {
  2568. buf = g_malloc(0x1000);
  2569. size = qemu_get_be32(f);
  2570. while (size > 0) {
  2571. uint32_t read_chunk = MIN(size, 0x1000);
  2572. qemu_get_buffer(f, buf, read_chunk);
  2573. size -= read_chunk;
  2574. }
  2575. g_free(buf);
  2576. }
  2577. }
  2578. qemu_loadvm_state_cleanup();
  2579. cpu_synchronize_all_post_init();
  2580. return ret;
  2581. }
  2582. int qemu_load_device_state(QEMUFile *f)
  2583. {
  2584. MigrationIncomingState *mis = migration_incoming_get_current();
  2585. int ret;
  2586. /* Load QEMU_VM_SECTION_FULL section */
  2587. ret = qemu_loadvm_state_main(f, mis);
  2588. if (ret < 0) {
  2589. error_report("Failed to load device state: %d", ret);
  2590. return ret;
  2591. }
  2592. cpu_synchronize_all_post_init();
  2593. return 0;
  2594. }
  2595. int qemu_loadvm_approve_switchover(void)
  2596. {
  2597. MigrationIncomingState *mis = migration_incoming_get_current();
  2598. if (!mis->switchover_ack_pending_num) {
  2599. return -EINVAL;
  2600. }
  2601. mis->switchover_ack_pending_num--;
  2602. trace_loadvm_approve_switchover(mis->switchover_ack_pending_num);
  2603. if (mis->switchover_ack_pending_num) {
  2604. return 0;
  2605. }
  2606. return migrate_send_rp_switchover_ack(mis);
  2607. }
  2608. bool save_snapshot(const char *name, bool overwrite, const char *vmstate,
  2609. bool has_devices, strList *devices, Error **errp)
  2610. {
  2611. BlockDriverState *bs;
  2612. QEMUSnapshotInfo sn1, *sn = &sn1;
  2613. int ret = -1, ret2;
  2614. QEMUFile *f;
  2615. int saved_vm_running;
  2616. uint64_t vm_state_size;
  2617. g_autoptr(GDateTime) now = g_date_time_new_now_local();
  2618. GLOBAL_STATE_CODE();
  2619. if (migration_is_blocked(errp)) {
  2620. return false;
  2621. }
  2622. if (!replay_can_snapshot()) {
  2623. error_setg(errp, "Record/replay does not allow making snapshot "
  2624. "right now. Try once more later.");
  2625. return false;
  2626. }
  2627. if (!bdrv_all_can_snapshot(has_devices, devices, errp)) {
  2628. return false;
  2629. }
  2630. /* Delete old snapshots of the same name */
  2631. if (name) {
  2632. if (overwrite) {
  2633. if (bdrv_all_delete_snapshot(name, has_devices,
  2634. devices, errp) < 0) {
  2635. return false;
  2636. }
  2637. } else {
  2638. ret2 = bdrv_all_has_snapshot(name, has_devices, devices, errp);
  2639. if (ret2 < 0) {
  2640. return false;
  2641. }
  2642. if (ret2 == 1) {
  2643. error_setg(errp,
  2644. "Snapshot '%s' already exists in one or more devices",
  2645. name);
  2646. return false;
  2647. }
  2648. }
  2649. }
  2650. bs = bdrv_all_find_vmstate_bs(vmstate, has_devices, devices, errp);
  2651. if (bs == NULL) {
  2652. return false;
  2653. }
  2654. saved_vm_running = runstate_is_running();
  2655. global_state_store();
  2656. vm_stop(RUN_STATE_SAVE_VM);
  2657. bdrv_drain_all_begin();
  2658. memset(sn, 0, sizeof(*sn));
  2659. /* fill auxiliary fields */
  2660. sn->date_sec = g_date_time_to_unix(now);
  2661. sn->date_nsec = g_date_time_get_microsecond(now) * 1000;
  2662. sn->vm_clock_nsec = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
  2663. if (replay_mode != REPLAY_MODE_NONE) {
  2664. sn->icount = replay_get_current_icount();
  2665. } else {
  2666. sn->icount = -1ULL;
  2667. }
  2668. if (name) {
  2669. pstrcpy(sn->name, sizeof(sn->name), name);
  2670. } else {
  2671. g_autofree char *autoname = g_date_time_format(now, "vm-%Y%m%d%H%M%S");
  2672. pstrcpy(sn->name, sizeof(sn->name), autoname);
  2673. }
  2674. /* save the VM state */
  2675. f = qemu_fopen_bdrv(bs, 1);
  2676. if (!f) {
  2677. error_setg(errp, "Could not open VM state file");
  2678. goto the_end;
  2679. }
  2680. ret = qemu_savevm_state(f, errp);
  2681. vm_state_size = qemu_file_transferred(f);
  2682. ret2 = qemu_fclose(f);
  2683. if (ret < 0) {
  2684. goto the_end;
  2685. }
  2686. if (ret2 < 0) {
  2687. ret = ret2;
  2688. goto the_end;
  2689. }
  2690. ret = bdrv_all_create_snapshot(sn, bs, vm_state_size,
  2691. has_devices, devices, errp);
  2692. if (ret < 0) {
  2693. bdrv_all_delete_snapshot(sn->name, has_devices, devices, NULL);
  2694. goto the_end;
  2695. }
  2696. ret = 0;
  2697. the_end:
  2698. bdrv_drain_all_end();
  2699. if (saved_vm_running) {
  2700. vm_start();
  2701. }
  2702. return ret == 0;
  2703. }
  2704. void qmp_xen_save_devices_state(const char *filename, bool has_live, bool live,
  2705. Error **errp)
  2706. {
  2707. QEMUFile *f;
  2708. QIOChannelFile *ioc;
  2709. int saved_vm_running;
  2710. int ret;
  2711. if (!has_live) {
  2712. /* live default to true so old version of Xen tool stack can have a
  2713. * successful live migration */
  2714. live = true;
  2715. }
  2716. saved_vm_running = runstate_is_running();
  2717. vm_stop(RUN_STATE_SAVE_VM);
  2718. global_state_store_running();
  2719. ioc = qio_channel_file_new_path(filename, O_WRONLY | O_CREAT | O_TRUNC,
  2720. 0660, errp);
  2721. if (!ioc) {
  2722. goto the_end;
  2723. }
  2724. qio_channel_set_name(QIO_CHANNEL(ioc), "migration-xen-save-state");
  2725. f = qemu_file_new_output(QIO_CHANNEL(ioc));
  2726. object_unref(OBJECT(ioc));
  2727. ret = qemu_save_device_state(f);
  2728. if (ret < 0 || qemu_fclose(f) < 0) {
  2729. error_setg(errp, QERR_IO_ERROR);
  2730. } else {
  2731. /* libxl calls the QMP command "stop" before calling
  2732. * "xen-save-devices-state" and in case of migration failure, libxl
  2733. * would call "cont".
  2734. * So call bdrv_inactivate_all (release locks) here to let the other
  2735. * side of the migration take control of the images.
  2736. */
  2737. if (live && !saved_vm_running) {
  2738. ret = bdrv_inactivate_all();
  2739. if (ret) {
  2740. error_setg(errp, "%s: bdrv_inactivate_all() failed (%d)",
  2741. __func__, ret);
  2742. }
  2743. }
  2744. }
  2745. the_end:
  2746. if (saved_vm_running) {
  2747. vm_start();
  2748. }
  2749. }
  2750. void qmp_xen_load_devices_state(const char *filename, Error **errp)
  2751. {
  2752. QEMUFile *f;
  2753. QIOChannelFile *ioc;
  2754. int ret;
  2755. /* Guest must be paused before loading the device state; the RAM state
  2756. * will already have been loaded by xc
  2757. */
  2758. if (runstate_is_running()) {
  2759. error_setg(errp, "Cannot update device state while vm is running");
  2760. return;
  2761. }
  2762. vm_stop(RUN_STATE_RESTORE_VM);
  2763. ioc = qio_channel_file_new_path(filename, O_RDONLY | O_BINARY, 0, errp);
  2764. if (!ioc) {
  2765. return;
  2766. }
  2767. qio_channel_set_name(QIO_CHANNEL(ioc), "migration-xen-load-state");
  2768. f = qemu_file_new_input(QIO_CHANNEL(ioc));
  2769. object_unref(OBJECT(ioc));
  2770. ret = qemu_loadvm_state(f);
  2771. qemu_fclose(f);
  2772. if (ret < 0) {
  2773. error_setg(errp, QERR_IO_ERROR);
  2774. }
  2775. migration_incoming_state_destroy();
  2776. }
  2777. bool load_snapshot(const char *name, const char *vmstate,
  2778. bool has_devices, strList *devices, Error **errp)
  2779. {
  2780. BlockDriverState *bs_vm_state;
  2781. QEMUSnapshotInfo sn;
  2782. QEMUFile *f;
  2783. int ret;
  2784. MigrationIncomingState *mis = migration_incoming_get_current();
  2785. if (!bdrv_all_can_snapshot(has_devices, devices, errp)) {
  2786. return false;
  2787. }
  2788. ret = bdrv_all_has_snapshot(name, has_devices, devices, errp);
  2789. if (ret < 0) {
  2790. return false;
  2791. }
  2792. if (ret == 0) {
  2793. error_setg(errp, "Snapshot '%s' does not exist in one or more devices",
  2794. name);
  2795. return false;
  2796. }
  2797. bs_vm_state = bdrv_all_find_vmstate_bs(vmstate, has_devices, devices, errp);
  2798. if (!bs_vm_state) {
  2799. return false;
  2800. }
  2801. /* Don't even try to load empty VM states */
  2802. ret = bdrv_snapshot_find(bs_vm_state, &sn, name);
  2803. if (ret < 0) {
  2804. return false;
  2805. } else if (sn.vm_state_size == 0) {
  2806. error_setg(errp, "This is a disk-only snapshot. Revert to it "
  2807. " offline using qemu-img");
  2808. return false;
  2809. }
  2810. /*
  2811. * Flush the record/replay queue. Now the VM state is going
  2812. * to change. Therefore we don't need to preserve its consistency
  2813. */
  2814. replay_flush_events();
  2815. /* Flush all IO requests so they don't interfere with the new state. */
  2816. bdrv_drain_all_begin();
  2817. ret = bdrv_all_goto_snapshot(name, has_devices, devices, errp);
  2818. if (ret < 0) {
  2819. goto err_drain;
  2820. }
  2821. /* restore the VM state */
  2822. f = qemu_fopen_bdrv(bs_vm_state, 0);
  2823. if (!f) {
  2824. error_setg(errp, "Could not open VM state file");
  2825. goto err_drain;
  2826. }
  2827. qemu_system_reset(SHUTDOWN_CAUSE_SNAPSHOT_LOAD);
  2828. mis->from_src_file = f;
  2829. if (!yank_register_instance(MIGRATION_YANK_INSTANCE, errp)) {
  2830. ret = -EINVAL;
  2831. goto err_drain;
  2832. }
  2833. ret = qemu_loadvm_state(f);
  2834. migration_incoming_state_destroy();
  2835. bdrv_drain_all_end();
  2836. if (ret < 0) {
  2837. error_setg(errp, "Error %d while loading VM state", ret);
  2838. return false;
  2839. }
  2840. return true;
  2841. err_drain:
  2842. bdrv_drain_all_end();
  2843. return false;
  2844. }
  2845. bool delete_snapshot(const char *name, bool has_devices,
  2846. strList *devices, Error **errp)
  2847. {
  2848. if (!bdrv_all_can_snapshot(has_devices, devices, errp)) {
  2849. return false;
  2850. }
  2851. if (bdrv_all_delete_snapshot(name, has_devices, devices, errp) < 0) {
  2852. return false;
  2853. }
  2854. return true;
  2855. }
  2856. void vmstate_register_ram(MemoryRegion *mr, DeviceState *dev)
  2857. {
  2858. qemu_ram_set_idstr(mr->ram_block,
  2859. memory_region_name(mr), dev);
  2860. qemu_ram_set_migratable(mr->ram_block);
  2861. }
  2862. void vmstate_unregister_ram(MemoryRegion *mr, DeviceState *dev)
  2863. {
  2864. qemu_ram_unset_idstr(mr->ram_block);
  2865. qemu_ram_unset_migratable(mr->ram_block);
  2866. }
  2867. void vmstate_register_ram_global(MemoryRegion *mr)
  2868. {
  2869. vmstate_register_ram(mr, NULL);
  2870. }
  2871. bool vmstate_check_only_migratable(const VMStateDescription *vmsd)
  2872. {
  2873. /* check needed if --only-migratable is specified */
  2874. if (!only_migratable) {
  2875. return true;
  2876. }
  2877. return !(vmsd && vmsd->unmigratable);
  2878. }
  2879. typedef struct SnapshotJob {
  2880. Job common;
  2881. char *tag;
  2882. char *vmstate;
  2883. strList *devices;
  2884. Coroutine *co;
  2885. Error **errp;
  2886. bool ret;
  2887. } SnapshotJob;
  2888. static void qmp_snapshot_job_free(SnapshotJob *s)
  2889. {
  2890. g_free(s->tag);
  2891. g_free(s->vmstate);
  2892. qapi_free_strList(s->devices);
  2893. }
  2894. static void snapshot_load_job_bh(void *opaque)
  2895. {
  2896. Job *job = opaque;
  2897. SnapshotJob *s = container_of(job, SnapshotJob, common);
  2898. int orig_vm_running;
  2899. job_progress_set_remaining(&s->common, 1);
  2900. orig_vm_running = runstate_is_running();
  2901. vm_stop(RUN_STATE_RESTORE_VM);
  2902. s->ret = load_snapshot(s->tag, s->vmstate, true, s->devices, s->errp);
  2903. if (s->ret && orig_vm_running) {
  2904. vm_start();
  2905. }
  2906. job_progress_update(&s->common, 1);
  2907. qmp_snapshot_job_free(s);
  2908. aio_co_wake(s->co);
  2909. }
  2910. static void snapshot_save_job_bh(void *opaque)
  2911. {
  2912. Job *job = opaque;
  2913. SnapshotJob *s = container_of(job, SnapshotJob, common);
  2914. job_progress_set_remaining(&s->common, 1);
  2915. s->ret = save_snapshot(s->tag, false, s->vmstate,
  2916. true, s->devices, s->errp);
  2917. job_progress_update(&s->common, 1);
  2918. qmp_snapshot_job_free(s);
  2919. aio_co_wake(s->co);
  2920. }
  2921. static void snapshot_delete_job_bh(void *opaque)
  2922. {
  2923. Job *job = opaque;
  2924. SnapshotJob *s = container_of(job, SnapshotJob, common);
  2925. job_progress_set_remaining(&s->common, 1);
  2926. s->ret = delete_snapshot(s->tag, true, s->devices, s->errp);
  2927. job_progress_update(&s->common, 1);
  2928. qmp_snapshot_job_free(s);
  2929. aio_co_wake(s->co);
  2930. }
  2931. static int coroutine_fn snapshot_save_job_run(Job *job, Error **errp)
  2932. {
  2933. SnapshotJob *s = container_of(job, SnapshotJob, common);
  2934. s->errp = errp;
  2935. s->co = qemu_coroutine_self();
  2936. aio_bh_schedule_oneshot(qemu_get_aio_context(),
  2937. snapshot_save_job_bh, job);
  2938. qemu_coroutine_yield();
  2939. return s->ret ? 0 : -1;
  2940. }
  2941. static int coroutine_fn snapshot_load_job_run(Job *job, Error **errp)
  2942. {
  2943. SnapshotJob *s = container_of(job, SnapshotJob, common);
  2944. s->errp = errp;
  2945. s->co = qemu_coroutine_self();
  2946. aio_bh_schedule_oneshot(qemu_get_aio_context(),
  2947. snapshot_load_job_bh, job);
  2948. qemu_coroutine_yield();
  2949. return s->ret ? 0 : -1;
  2950. }
  2951. static int coroutine_fn snapshot_delete_job_run(Job *job, Error **errp)
  2952. {
  2953. SnapshotJob *s = container_of(job, SnapshotJob, common);
  2954. s->errp = errp;
  2955. s->co = qemu_coroutine_self();
  2956. aio_bh_schedule_oneshot(qemu_get_aio_context(),
  2957. snapshot_delete_job_bh, job);
  2958. qemu_coroutine_yield();
  2959. return s->ret ? 0 : -1;
  2960. }
  2961. static const JobDriver snapshot_load_job_driver = {
  2962. .instance_size = sizeof(SnapshotJob),
  2963. .job_type = JOB_TYPE_SNAPSHOT_LOAD,
  2964. .run = snapshot_load_job_run,
  2965. };
  2966. static const JobDriver snapshot_save_job_driver = {
  2967. .instance_size = sizeof(SnapshotJob),
  2968. .job_type = JOB_TYPE_SNAPSHOT_SAVE,
  2969. .run = snapshot_save_job_run,
  2970. };
  2971. static const JobDriver snapshot_delete_job_driver = {
  2972. .instance_size = sizeof(SnapshotJob),
  2973. .job_type = JOB_TYPE_SNAPSHOT_DELETE,
  2974. .run = snapshot_delete_job_run,
  2975. };
  2976. void qmp_snapshot_save(const char *job_id,
  2977. const char *tag,
  2978. const char *vmstate,
  2979. strList *devices,
  2980. Error **errp)
  2981. {
  2982. SnapshotJob *s;
  2983. s = job_create(job_id, &snapshot_save_job_driver, NULL,
  2984. qemu_get_aio_context(), JOB_MANUAL_DISMISS,
  2985. NULL, NULL, errp);
  2986. if (!s) {
  2987. return;
  2988. }
  2989. s->tag = g_strdup(tag);
  2990. s->vmstate = g_strdup(vmstate);
  2991. s->devices = QAPI_CLONE(strList, devices);
  2992. job_start(&s->common);
  2993. }
  2994. void qmp_snapshot_load(const char *job_id,
  2995. const char *tag,
  2996. const char *vmstate,
  2997. strList *devices,
  2998. Error **errp)
  2999. {
  3000. SnapshotJob *s;
  3001. s = job_create(job_id, &snapshot_load_job_driver, NULL,
  3002. qemu_get_aio_context(), JOB_MANUAL_DISMISS,
  3003. NULL, NULL, errp);
  3004. if (!s) {
  3005. return;
  3006. }
  3007. s->tag = g_strdup(tag);
  3008. s->vmstate = g_strdup(vmstate);
  3009. s->devices = QAPI_CLONE(strList, devices);
  3010. job_start(&s->common);
  3011. }
  3012. void qmp_snapshot_delete(const char *job_id,
  3013. const char *tag,
  3014. strList *devices,
  3015. Error **errp)
  3016. {
  3017. SnapshotJob *s;
  3018. s = job_create(job_id, &snapshot_delete_job_driver, NULL,
  3019. qemu_get_aio_context(), JOB_MANUAL_DISMISS,
  3020. NULL, NULL, errp);
  3021. if (!s) {
  3022. return;
  3023. }
  3024. s->tag = g_strdup(tag);
  3025. s->devices = QAPI_CLONE(strList, devices);
  3026. job_start(&s->common);
  3027. }