savevm.c 86 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983
  1. /*
  2. * QEMU System Emulator
  3. *
  4. * Copyright (c) 2003-2008 Fabrice Bellard
  5. * Copyright (c) 2009-2015 Red Hat Inc
  6. *
  7. * Authors:
  8. * Juan Quintela <quintela@redhat.com>
  9. *
  10. * Permission is hereby granted, free of charge, to any person obtaining a copy
  11. * of this software and associated documentation files (the "Software"), to deal
  12. * in the Software without restriction, including without limitation the rights
  13. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  14. * copies of the Software, and to permit persons to whom the Software is
  15. * furnished to do so, subject to the following conditions:
  16. *
  17. * The above copyright notice and this permission notice shall be included in
  18. * all copies or substantial portions of the Software.
  19. *
  20. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  21. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  22. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  23. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  24. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  25. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  26. * THE SOFTWARE.
  27. */
  28. #include "qemu/osdep.h"
  29. #include "hw/boards.h"
  30. #include "net/net.h"
  31. #include "migration.h"
  32. #include "migration/snapshot.h"
  33. #include "migration/vmstate.h"
  34. #include "migration/misc.h"
  35. #include "migration/register.h"
  36. #include "migration/global_state.h"
  37. #include "ram.h"
  38. #include "qemu-file-channel.h"
  39. #include "qemu-file.h"
  40. #include "savevm.h"
  41. #include "postcopy-ram.h"
  42. #include "qapi/error.h"
  43. #include "qapi/qapi-commands-migration.h"
  44. #include "qapi/qapi-commands-misc.h"
  45. #include "qapi/qmp/qerror.h"
  46. #include "qemu/error-report.h"
  47. #include "sysemu/cpus.h"
  48. #include "exec/memory.h"
  49. #include "exec/target_page.h"
  50. #include "trace.h"
  51. #include "qemu/iov.h"
  52. #include "qemu/main-loop.h"
  53. #include "block/snapshot.h"
  54. #include "qemu/cutils.h"
  55. #include "io/channel-buffer.h"
  56. #include "io/channel-file.h"
  57. #include "sysemu/replay.h"
  58. #include "sysemu/runstate.h"
  59. #include "sysemu/sysemu.h"
  60. #include "sysemu/xen.h"
  61. #include "qjson.h"
  62. #include "migration/colo.h"
  63. #include "qemu/bitmap.h"
  64. #include "net/announce.h"
  65. const unsigned int postcopy_ram_discard_version = 0;
  66. /* Subcommands for QEMU_VM_COMMAND */
  67. enum qemu_vm_cmd {
  68. MIG_CMD_INVALID = 0, /* Must be 0 */
  69. MIG_CMD_OPEN_RETURN_PATH, /* Tell the dest to open the Return path */
  70. MIG_CMD_PING, /* Request a PONG on the RP */
  71. MIG_CMD_POSTCOPY_ADVISE, /* Prior to any page transfers, just
  72. warn we might want to do PC */
  73. MIG_CMD_POSTCOPY_LISTEN, /* Start listening for incoming
  74. pages as it's running. */
  75. MIG_CMD_POSTCOPY_RUN, /* Start execution */
  76. MIG_CMD_POSTCOPY_RAM_DISCARD, /* A list of pages to discard that
  77. were previously sent during
  78. precopy but are dirty. */
  79. MIG_CMD_PACKAGED, /* Send a wrapped stream within this stream */
  80. MIG_CMD_ENABLE_COLO, /* Enable COLO */
  81. MIG_CMD_POSTCOPY_RESUME, /* resume postcopy on dest */
  82. MIG_CMD_RECV_BITMAP, /* Request for recved bitmap on dst */
  83. MIG_CMD_MAX
  84. };
  85. #define MAX_VM_CMD_PACKAGED_SIZE UINT32_MAX
  86. static struct mig_cmd_args {
  87. ssize_t len; /* -1 = variable */
  88. const char *name;
  89. } mig_cmd_args[] = {
  90. [MIG_CMD_INVALID] = { .len = -1, .name = "INVALID" },
  91. [MIG_CMD_OPEN_RETURN_PATH] = { .len = 0, .name = "OPEN_RETURN_PATH" },
  92. [MIG_CMD_PING] = { .len = sizeof(uint32_t), .name = "PING" },
  93. [MIG_CMD_POSTCOPY_ADVISE] = { .len = -1, .name = "POSTCOPY_ADVISE" },
  94. [MIG_CMD_POSTCOPY_LISTEN] = { .len = 0, .name = "POSTCOPY_LISTEN" },
  95. [MIG_CMD_POSTCOPY_RUN] = { .len = 0, .name = "POSTCOPY_RUN" },
  96. [MIG_CMD_POSTCOPY_RAM_DISCARD] = {
  97. .len = -1, .name = "POSTCOPY_RAM_DISCARD" },
  98. [MIG_CMD_POSTCOPY_RESUME] = { .len = 0, .name = "POSTCOPY_RESUME" },
  99. [MIG_CMD_PACKAGED] = { .len = 4, .name = "PACKAGED" },
  100. [MIG_CMD_RECV_BITMAP] = { .len = -1, .name = "RECV_BITMAP" },
  101. [MIG_CMD_MAX] = { .len = -1, .name = "MAX" },
  102. };
  103. /* Note for MIG_CMD_POSTCOPY_ADVISE:
  104. * The format of arguments is depending on postcopy mode:
  105. * - postcopy RAM only
  106. * uint64_t host page size
  107. * uint64_t taget page size
  108. *
  109. * - postcopy RAM and postcopy dirty bitmaps
  110. * format is the same as for postcopy RAM only
  111. *
  112. * - postcopy dirty bitmaps only
  113. * Nothing. Command length field is 0.
  114. *
  115. * Be careful: adding a new postcopy entity with some other parameters should
  116. * not break format self-description ability. Good way is to introduce some
  117. * generic extendable format with an exception for two old entities.
  118. */
  119. /***********************************************************/
  120. /* savevm/loadvm support */
  121. static ssize_t block_writev_buffer(void *opaque, struct iovec *iov, int iovcnt,
  122. int64_t pos, Error **errp)
  123. {
  124. int ret;
  125. QEMUIOVector qiov;
  126. qemu_iovec_init_external(&qiov, iov, iovcnt);
  127. ret = bdrv_writev_vmstate(opaque, &qiov, pos);
  128. if (ret < 0) {
  129. return ret;
  130. }
  131. return qiov.size;
  132. }
  133. static ssize_t block_get_buffer(void *opaque, uint8_t *buf, int64_t pos,
  134. size_t size, Error **errp)
  135. {
  136. return bdrv_load_vmstate(opaque, buf, pos, size);
  137. }
  138. static int bdrv_fclose(void *opaque, Error **errp)
  139. {
  140. return bdrv_flush(opaque);
  141. }
  142. static const QEMUFileOps bdrv_read_ops = {
  143. .get_buffer = block_get_buffer,
  144. .close = bdrv_fclose
  145. };
  146. static const QEMUFileOps bdrv_write_ops = {
  147. .writev_buffer = block_writev_buffer,
  148. .close = bdrv_fclose
  149. };
  150. static QEMUFile *qemu_fopen_bdrv(BlockDriverState *bs, int is_writable)
  151. {
  152. if (is_writable) {
  153. return qemu_fopen_ops(bs, &bdrv_write_ops);
  154. }
  155. return qemu_fopen_ops(bs, &bdrv_read_ops);
  156. }
  157. /* QEMUFile timer support.
  158. * Not in qemu-file.c to not add qemu-timer.c as dependency to qemu-file.c
  159. */
  160. void timer_put(QEMUFile *f, QEMUTimer *ts)
  161. {
  162. uint64_t expire_time;
  163. expire_time = timer_expire_time_ns(ts);
  164. qemu_put_be64(f, expire_time);
  165. }
  166. void timer_get(QEMUFile *f, QEMUTimer *ts)
  167. {
  168. uint64_t expire_time;
  169. expire_time = qemu_get_be64(f);
  170. if (expire_time != -1) {
  171. timer_mod_ns(ts, expire_time);
  172. } else {
  173. timer_del(ts);
  174. }
  175. }
  176. /* VMState timer support.
  177. * Not in vmstate.c to not add qemu-timer.c as dependency to vmstate.c
  178. */
  179. static int get_timer(QEMUFile *f, void *pv, size_t size,
  180. const VMStateField *field)
  181. {
  182. QEMUTimer *v = pv;
  183. timer_get(f, v);
  184. return 0;
  185. }
  186. static int put_timer(QEMUFile *f, void *pv, size_t size,
  187. const VMStateField *field, QJSON *vmdesc)
  188. {
  189. QEMUTimer *v = pv;
  190. timer_put(f, v);
  191. return 0;
  192. }
  193. const VMStateInfo vmstate_info_timer = {
  194. .name = "timer",
  195. .get = get_timer,
  196. .put = put_timer,
  197. };
  198. typedef struct CompatEntry {
  199. char idstr[256];
  200. int instance_id;
  201. } CompatEntry;
  202. typedef struct SaveStateEntry {
  203. QTAILQ_ENTRY(SaveStateEntry) entry;
  204. char idstr[256];
  205. uint32_t instance_id;
  206. int alias_id;
  207. int version_id;
  208. /* version id read from the stream */
  209. int load_version_id;
  210. int section_id;
  211. /* section id read from the stream */
  212. int load_section_id;
  213. const SaveVMHandlers *ops;
  214. const VMStateDescription *vmsd;
  215. void *opaque;
  216. CompatEntry *compat;
  217. int is_ram;
  218. } SaveStateEntry;
  219. typedef struct SaveState {
  220. QTAILQ_HEAD(, SaveStateEntry) handlers;
  221. SaveStateEntry *handler_pri_head[MIG_PRI_MAX + 1];
  222. int global_section_id;
  223. uint32_t len;
  224. const char *name;
  225. uint32_t target_page_bits;
  226. uint32_t caps_count;
  227. MigrationCapability *capabilities;
  228. QemuUUID uuid;
  229. } SaveState;
  230. static SaveState savevm_state = {
  231. .handlers = QTAILQ_HEAD_INITIALIZER(savevm_state.handlers),
  232. .handler_pri_head = { [MIG_PRI_DEFAULT ... MIG_PRI_MAX] = NULL },
  233. .global_section_id = 0,
  234. };
  235. static bool should_validate_capability(int capability)
  236. {
  237. assert(capability >= 0 && capability < MIGRATION_CAPABILITY__MAX);
  238. /* Validate only new capabilities to keep compatibility. */
  239. switch (capability) {
  240. case MIGRATION_CAPABILITY_X_IGNORE_SHARED:
  241. return true;
  242. default:
  243. return false;
  244. }
  245. }
  246. static uint32_t get_validatable_capabilities_count(void)
  247. {
  248. MigrationState *s = migrate_get_current();
  249. uint32_t result = 0;
  250. int i;
  251. for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
  252. if (should_validate_capability(i) && s->enabled_capabilities[i]) {
  253. result++;
  254. }
  255. }
  256. return result;
  257. }
  258. static int configuration_pre_save(void *opaque)
  259. {
  260. SaveState *state = opaque;
  261. const char *current_name = MACHINE_GET_CLASS(current_machine)->name;
  262. MigrationState *s = migrate_get_current();
  263. int i, j;
  264. state->len = strlen(current_name);
  265. state->name = current_name;
  266. state->target_page_bits = qemu_target_page_bits();
  267. state->caps_count = get_validatable_capabilities_count();
  268. state->capabilities = g_renew(MigrationCapability, state->capabilities,
  269. state->caps_count);
  270. for (i = j = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
  271. if (should_validate_capability(i) && s->enabled_capabilities[i]) {
  272. state->capabilities[j++] = i;
  273. }
  274. }
  275. state->uuid = qemu_uuid;
  276. return 0;
  277. }
  278. static int configuration_pre_load(void *opaque)
  279. {
  280. SaveState *state = opaque;
  281. /* If there is no target-page-bits subsection it means the source
  282. * predates the variable-target-page-bits support and is using the
  283. * minimum possible value for this CPU.
  284. */
  285. state->target_page_bits = qemu_target_page_bits_min();
  286. return 0;
  287. }
  288. static bool configuration_validate_capabilities(SaveState *state)
  289. {
  290. bool ret = true;
  291. MigrationState *s = migrate_get_current();
  292. unsigned long *source_caps_bm;
  293. int i;
  294. source_caps_bm = bitmap_new(MIGRATION_CAPABILITY__MAX);
  295. for (i = 0; i < state->caps_count; i++) {
  296. MigrationCapability capability = state->capabilities[i];
  297. set_bit(capability, source_caps_bm);
  298. }
  299. for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
  300. bool source_state, target_state;
  301. if (!should_validate_capability(i)) {
  302. continue;
  303. }
  304. source_state = test_bit(i, source_caps_bm);
  305. target_state = s->enabled_capabilities[i];
  306. if (source_state != target_state) {
  307. error_report("Capability %s is %s, but received capability is %s",
  308. MigrationCapability_str(i),
  309. target_state ? "on" : "off",
  310. source_state ? "on" : "off");
  311. ret = false;
  312. /* Don't break here to report all failed capabilities */
  313. }
  314. }
  315. g_free(source_caps_bm);
  316. return ret;
  317. }
  318. static int configuration_post_load(void *opaque, int version_id)
  319. {
  320. SaveState *state = opaque;
  321. const char *current_name = MACHINE_GET_CLASS(current_machine)->name;
  322. if (strncmp(state->name, current_name, state->len) != 0) {
  323. error_report("Machine type received is '%.*s' and local is '%s'",
  324. (int) state->len, state->name, current_name);
  325. return -EINVAL;
  326. }
  327. if (state->target_page_bits != qemu_target_page_bits()) {
  328. error_report("Received TARGET_PAGE_BITS is %d but local is %d",
  329. state->target_page_bits, qemu_target_page_bits());
  330. return -EINVAL;
  331. }
  332. if (!configuration_validate_capabilities(state)) {
  333. return -EINVAL;
  334. }
  335. return 0;
  336. }
  337. static int get_capability(QEMUFile *f, void *pv, size_t size,
  338. const VMStateField *field)
  339. {
  340. MigrationCapability *capability = pv;
  341. char capability_str[UINT8_MAX + 1];
  342. uint8_t len;
  343. int i;
  344. len = qemu_get_byte(f);
  345. qemu_get_buffer(f, (uint8_t *)capability_str, len);
  346. capability_str[len] = '\0';
  347. for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
  348. if (!strcmp(MigrationCapability_str(i), capability_str)) {
  349. *capability = i;
  350. return 0;
  351. }
  352. }
  353. error_report("Received unknown capability %s", capability_str);
  354. return -EINVAL;
  355. }
  356. static int put_capability(QEMUFile *f, void *pv, size_t size,
  357. const VMStateField *field, QJSON *vmdesc)
  358. {
  359. MigrationCapability *capability = pv;
  360. const char *capability_str = MigrationCapability_str(*capability);
  361. size_t len = strlen(capability_str);
  362. assert(len <= UINT8_MAX);
  363. qemu_put_byte(f, len);
  364. qemu_put_buffer(f, (uint8_t *)capability_str, len);
  365. return 0;
  366. }
  367. static const VMStateInfo vmstate_info_capability = {
  368. .name = "capability",
  369. .get = get_capability,
  370. .put = put_capability,
  371. };
  372. /* The target-page-bits subsection is present only if the
  373. * target page size is not the same as the default (ie the
  374. * minimum page size for a variable-page-size guest CPU).
  375. * If it is present then it contains the actual target page
  376. * bits for the machine, and migration will fail if the
  377. * two ends don't agree about it.
  378. */
  379. static bool vmstate_target_page_bits_needed(void *opaque)
  380. {
  381. return qemu_target_page_bits()
  382. > qemu_target_page_bits_min();
  383. }
  384. static const VMStateDescription vmstate_target_page_bits = {
  385. .name = "configuration/target-page-bits",
  386. .version_id = 1,
  387. .minimum_version_id = 1,
  388. .needed = vmstate_target_page_bits_needed,
  389. .fields = (VMStateField[]) {
  390. VMSTATE_UINT32(target_page_bits, SaveState),
  391. VMSTATE_END_OF_LIST()
  392. }
  393. };
  394. static bool vmstate_capabilites_needed(void *opaque)
  395. {
  396. return get_validatable_capabilities_count() > 0;
  397. }
  398. static const VMStateDescription vmstate_capabilites = {
  399. .name = "configuration/capabilities",
  400. .version_id = 1,
  401. .minimum_version_id = 1,
  402. .needed = vmstate_capabilites_needed,
  403. .fields = (VMStateField[]) {
  404. VMSTATE_UINT32_V(caps_count, SaveState, 1),
  405. VMSTATE_VARRAY_UINT32_ALLOC(capabilities, SaveState, caps_count, 1,
  406. vmstate_info_capability,
  407. MigrationCapability),
  408. VMSTATE_END_OF_LIST()
  409. }
  410. };
  411. static bool vmstate_uuid_needed(void *opaque)
  412. {
  413. return qemu_uuid_set && migrate_validate_uuid();
  414. }
  415. static int vmstate_uuid_post_load(void *opaque, int version_id)
  416. {
  417. SaveState *state = opaque;
  418. char uuid_src[UUID_FMT_LEN + 1];
  419. char uuid_dst[UUID_FMT_LEN + 1];
  420. if (!qemu_uuid_set) {
  421. /*
  422. * It's warning because user might not know UUID in some cases,
  423. * e.g. load an old snapshot
  424. */
  425. qemu_uuid_unparse(&state->uuid, uuid_src);
  426. warn_report("UUID is received %s, but local uuid isn't set",
  427. uuid_src);
  428. return 0;
  429. }
  430. if (!qemu_uuid_is_equal(&state->uuid, &qemu_uuid)) {
  431. qemu_uuid_unparse(&state->uuid, uuid_src);
  432. qemu_uuid_unparse(&qemu_uuid, uuid_dst);
  433. error_report("UUID received is %s and local is %s", uuid_src, uuid_dst);
  434. return -EINVAL;
  435. }
  436. return 0;
  437. }
  438. static const VMStateDescription vmstate_uuid = {
  439. .name = "configuration/uuid",
  440. .version_id = 1,
  441. .minimum_version_id = 1,
  442. .needed = vmstate_uuid_needed,
  443. .post_load = vmstate_uuid_post_load,
  444. .fields = (VMStateField[]) {
  445. VMSTATE_UINT8_ARRAY_V(uuid.data, SaveState, sizeof(QemuUUID), 1),
  446. VMSTATE_END_OF_LIST()
  447. }
  448. };
  449. static const VMStateDescription vmstate_configuration = {
  450. .name = "configuration",
  451. .version_id = 1,
  452. .pre_load = configuration_pre_load,
  453. .post_load = configuration_post_load,
  454. .pre_save = configuration_pre_save,
  455. .fields = (VMStateField[]) {
  456. VMSTATE_UINT32(len, SaveState),
  457. VMSTATE_VBUFFER_ALLOC_UINT32(name, SaveState, 0, NULL, len),
  458. VMSTATE_END_OF_LIST()
  459. },
  460. .subsections = (const VMStateDescription*[]) {
  461. &vmstate_target_page_bits,
  462. &vmstate_capabilites,
  463. &vmstate_uuid,
  464. NULL
  465. }
  466. };
  467. static void dump_vmstate_vmsd(FILE *out_file,
  468. const VMStateDescription *vmsd, int indent,
  469. bool is_subsection);
  470. static void dump_vmstate_vmsf(FILE *out_file, const VMStateField *field,
  471. int indent)
  472. {
  473. fprintf(out_file, "%*s{\n", indent, "");
  474. indent += 2;
  475. fprintf(out_file, "%*s\"field\": \"%s\",\n", indent, "", field->name);
  476. fprintf(out_file, "%*s\"version_id\": %d,\n", indent, "",
  477. field->version_id);
  478. fprintf(out_file, "%*s\"field_exists\": %s,\n", indent, "",
  479. field->field_exists ? "true" : "false");
  480. fprintf(out_file, "%*s\"size\": %zu", indent, "", field->size);
  481. if (field->vmsd != NULL) {
  482. fprintf(out_file, ",\n");
  483. dump_vmstate_vmsd(out_file, field->vmsd, indent, false);
  484. }
  485. fprintf(out_file, "\n%*s}", indent - 2, "");
  486. }
  487. static void dump_vmstate_vmss(FILE *out_file,
  488. const VMStateDescription **subsection,
  489. int indent)
  490. {
  491. if (*subsection != NULL) {
  492. dump_vmstate_vmsd(out_file, *subsection, indent, true);
  493. }
  494. }
  495. static void dump_vmstate_vmsd(FILE *out_file,
  496. const VMStateDescription *vmsd, int indent,
  497. bool is_subsection)
  498. {
  499. if (is_subsection) {
  500. fprintf(out_file, "%*s{\n", indent, "");
  501. } else {
  502. fprintf(out_file, "%*s\"%s\": {\n", indent, "", "Description");
  503. }
  504. indent += 2;
  505. fprintf(out_file, "%*s\"name\": \"%s\",\n", indent, "", vmsd->name);
  506. fprintf(out_file, "%*s\"version_id\": %d,\n", indent, "",
  507. vmsd->version_id);
  508. fprintf(out_file, "%*s\"minimum_version_id\": %d", indent, "",
  509. vmsd->minimum_version_id);
  510. if (vmsd->fields != NULL) {
  511. const VMStateField *field = vmsd->fields;
  512. bool first;
  513. fprintf(out_file, ",\n%*s\"Fields\": [\n", indent, "");
  514. first = true;
  515. while (field->name != NULL) {
  516. if (field->flags & VMS_MUST_EXIST) {
  517. /* Ignore VMSTATE_VALIDATE bits; these don't get migrated */
  518. field++;
  519. continue;
  520. }
  521. if (!first) {
  522. fprintf(out_file, ",\n");
  523. }
  524. dump_vmstate_vmsf(out_file, field, indent + 2);
  525. field++;
  526. first = false;
  527. }
  528. fprintf(out_file, "\n%*s]", indent, "");
  529. }
  530. if (vmsd->subsections != NULL) {
  531. const VMStateDescription **subsection = vmsd->subsections;
  532. bool first;
  533. fprintf(out_file, ",\n%*s\"Subsections\": [\n", indent, "");
  534. first = true;
  535. while (*subsection != NULL) {
  536. if (!first) {
  537. fprintf(out_file, ",\n");
  538. }
  539. dump_vmstate_vmss(out_file, subsection, indent + 2);
  540. subsection++;
  541. first = false;
  542. }
  543. fprintf(out_file, "\n%*s]", indent, "");
  544. }
  545. fprintf(out_file, "\n%*s}", indent - 2, "");
  546. }
  547. static void dump_machine_type(FILE *out_file)
  548. {
  549. MachineClass *mc;
  550. mc = MACHINE_GET_CLASS(current_machine);
  551. fprintf(out_file, " \"vmschkmachine\": {\n");
  552. fprintf(out_file, " \"Name\": \"%s\"\n", mc->name);
  553. fprintf(out_file, " },\n");
  554. }
  555. void dump_vmstate_json_to_file(FILE *out_file)
  556. {
  557. GSList *list, *elt;
  558. bool first;
  559. fprintf(out_file, "{\n");
  560. dump_machine_type(out_file);
  561. first = true;
  562. list = object_class_get_list(TYPE_DEVICE, true);
  563. for (elt = list; elt; elt = elt->next) {
  564. DeviceClass *dc = OBJECT_CLASS_CHECK(DeviceClass, elt->data,
  565. TYPE_DEVICE);
  566. const char *name;
  567. int indent = 2;
  568. if (!dc->vmsd) {
  569. continue;
  570. }
  571. if (!first) {
  572. fprintf(out_file, ",\n");
  573. }
  574. name = object_class_get_name(OBJECT_CLASS(dc));
  575. fprintf(out_file, "%*s\"%s\": {\n", indent, "", name);
  576. indent += 2;
  577. fprintf(out_file, "%*s\"Name\": \"%s\",\n", indent, "", name);
  578. fprintf(out_file, "%*s\"version_id\": %d,\n", indent, "",
  579. dc->vmsd->version_id);
  580. fprintf(out_file, "%*s\"minimum_version_id\": %d,\n", indent, "",
  581. dc->vmsd->minimum_version_id);
  582. dump_vmstate_vmsd(out_file, dc->vmsd, indent, false);
  583. fprintf(out_file, "\n%*s}", indent - 2, "");
  584. first = false;
  585. }
  586. fprintf(out_file, "\n}\n");
  587. fclose(out_file);
  588. g_slist_free(list);
  589. }
  590. static uint32_t calculate_new_instance_id(const char *idstr)
  591. {
  592. SaveStateEntry *se;
  593. uint32_t instance_id = 0;
  594. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  595. if (strcmp(idstr, se->idstr) == 0
  596. && instance_id <= se->instance_id) {
  597. instance_id = se->instance_id + 1;
  598. }
  599. }
  600. /* Make sure we never loop over without being noticed */
  601. assert(instance_id != VMSTATE_INSTANCE_ID_ANY);
  602. return instance_id;
  603. }
  604. static int calculate_compat_instance_id(const char *idstr)
  605. {
  606. SaveStateEntry *se;
  607. int instance_id = 0;
  608. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  609. if (!se->compat) {
  610. continue;
  611. }
  612. if (strcmp(idstr, se->compat->idstr) == 0
  613. && instance_id <= se->compat->instance_id) {
  614. instance_id = se->compat->instance_id + 1;
  615. }
  616. }
  617. return instance_id;
  618. }
  619. static inline MigrationPriority save_state_priority(SaveStateEntry *se)
  620. {
  621. if (se->vmsd) {
  622. return se->vmsd->priority;
  623. }
  624. return MIG_PRI_DEFAULT;
  625. }
  626. static void savevm_state_handler_insert(SaveStateEntry *nse)
  627. {
  628. MigrationPriority priority = save_state_priority(nse);
  629. SaveStateEntry *se;
  630. int i;
  631. assert(priority <= MIG_PRI_MAX);
  632. for (i = priority - 1; i >= 0; i--) {
  633. se = savevm_state.handler_pri_head[i];
  634. if (se != NULL) {
  635. assert(save_state_priority(se) < priority);
  636. break;
  637. }
  638. }
  639. if (i >= 0) {
  640. QTAILQ_INSERT_BEFORE(se, nse, entry);
  641. } else {
  642. QTAILQ_INSERT_TAIL(&savevm_state.handlers, nse, entry);
  643. }
  644. if (savevm_state.handler_pri_head[priority] == NULL) {
  645. savevm_state.handler_pri_head[priority] = nse;
  646. }
  647. }
  648. static void savevm_state_handler_remove(SaveStateEntry *se)
  649. {
  650. SaveStateEntry *next;
  651. MigrationPriority priority = save_state_priority(se);
  652. if (se == savevm_state.handler_pri_head[priority]) {
  653. next = QTAILQ_NEXT(se, entry);
  654. if (next != NULL && save_state_priority(next) == priority) {
  655. savevm_state.handler_pri_head[priority] = next;
  656. } else {
  657. savevm_state.handler_pri_head[priority] = NULL;
  658. }
  659. }
  660. QTAILQ_REMOVE(&savevm_state.handlers, se, entry);
  661. }
  662. /* TODO: Individual devices generally have very little idea about the rest
  663. of the system, so instance_id should be removed/replaced.
  664. Meanwhile pass -1 as instance_id if you do not already have a clearly
  665. distinguishing id for all instances of your device class. */
  666. int register_savevm_live(const char *idstr,
  667. uint32_t instance_id,
  668. int version_id,
  669. const SaveVMHandlers *ops,
  670. void *opaque)
  671. {
  672. SaveStateEntry *se;
  673. se = g_new0(SaveStateEntry, 1);
  674. se->version_id = version_id;
  675. se->section_id = savevm_state.global_section_id++;
  676. se->ops = ops;
  677. se->opaque = opaque;
  678. se->vmsd = NULL;
  679. /* if this is a live_savem then set is_ram */
  680. if (ops->save_setup != NULL) {
  681. se->is_ram = 1;
  682. }
  683. pstrcat(se->idstr, sizeof(se->idstr), idstr);
  684. if (instance_id == VMSTATE_INSTANCE_ID_ANY) {
  685. se->instance_id = calculate_new_instance_id(se->idstr);
  686. } else {
  687. se->instance_id = instance_id;
  688. }
  689. assert(!se->compat || se->instance_id == 0);
  690. savevm_state_handler_insert(se);
  691. return 0;
  692. }
  693. void unregister_savevm(VMStateIf *obj, const char *idstr, void *opaque)
  694. {
  695. SaveStateEntry *se, *new_se;
  696. char id[256] = "";
  697. if (obj) {
  698. char *oid = vmstate_if_get_id(obj);
  699. if (oid) {
  700. pstrcpy(id, sizeof(id), oid);
  701. pstrcat(id, sizeof(id), "/");
  702. g_free(oid);
  703. }
  704. }
  705. pstrcat(id, sizeof(id), idstr);
  706. QTAILQ_FOREACH_SAFE(se, &savevm_state.handlers, entry, new_se) {
  707. if (strcmp(se->idstr, id) == 0 && se->opaque == opaque) {
  708. savevm_state_handler_remove(se);
  709. g_free(se->compat);
  710. g_free(se);
  711. }
  712. }
  713. }
  714. int vmstate_register_with_alias_id(VMStateIf *obj, uint32_t instance_id,
  715. const VMStateDescription *vmsd,
  716. void *opaque, int alias_id,
  717. int required_for_version,
  718. Error **errp)
  719. {
  720. SaveStateEntry *se;
  721. /* If this triggers, alias support can be dropped for the vmsd. */
  722. assert(alias_id == -1 || required_for_version >= vmsd->minimum_version_id);
  723. se = g_new0(SaveStateEntry, 1);
  724. se->version_id = vmsd->version_id;
  725. se->section_id = savevm_state.global_section_id++;
  726. se->opaque = opaque;
  727. se->vmsd = vmsd;
  728. se->alias_id = alias_id;
  729. if (obj) {
  730. char *id = vmstate_if_get_id(obj);
  731. if (id) {
  732. if (snprintf(se->idstr, sizeof(se->idstr), "%s/", id) >=
  733. sizeof(se->idstr)) {
  734. error_setg(errp, "Path too long for VMState (%s)", id);
  735. g_free(id);
  736. g_free(se);
  737. return -1;
  738. }
  739. g_free(id);
  740. se->compat = g_new0(CompatEntry, 1);
  741. pstrcpy(se->compat->idstr, sizeof(se->compat->idstr), vmsd->name);
  742. se->compat->instance_id = instance_id == VMSTATE_INSTANCE_ID_ANY ?
  743. calculate_compat_instance_id(vmsd->name) : instance_id;
  744. instance_id = VMSTATE_INSTANCE_ID_ANY;
  745. }
  746. }
  747. pstrcat(se->idstr, sizeof(se->idstr), vmsd->name);
  748. if (instance_id == VMSTATE_INSTANCE_ID_ANY) {
  749. se->instance_id = calculate_new_instance_id(se->idstr);
  750. } else {
  751. se->instance_id = instance_id;
  752. }
  753. assert(!se->compat || se->instance_id == 0);
  754. savevm_state_handler_insert(se);
  755. return 0;
  756. }
  757. void vmstate_unregister(VMStateIf *obj, const VMStateDescription *vmsd,
  758. void *opaque)
  759. {
  760. SaveStateEntry *se, *new_se;
  761. QTAILQ_FOREACH_SAFE(se, &savevm_state.handlers, entry, new_se) {
  762. if (se->vmsd == vmsd && se->opaque == opaque) {
  763. savevm_state_handler_remove(se);
  764. g_free(se->compat);
  765. g_free(se);
  766. }
  767. }
  768. }
  769. static int vmstate_load(QEMUFile *f, SaveStateEntry *se)
  770. {
  771. trace_vmstate_load(se->idstr, se->vmsd ? se->vmsd->name : "(old)");
  772. if (!se->vmsd) { /* Old style */
  773. return se->ops->load_state(f, se->opaque, se->load_version_id);
  774. }
  775. return vmstate_load_state(f, se->vmsd, se->opaque, se->load_version_id);
  776. }
  777. static void vmstate_save_old_style(QEMUFile *f, SaveStateEntry *se, QJSON *vmdesc)
  778. {
  779. int64_t old_offset, size;
  780. old_offset = qemu_ftell_fast(f);
  781. se->ops->save_state(f, se->opaque);
  782. size = qemu_ftell_fast(f) - old_offset;
  783. if (vmdesc) {
  784. json_prop_int(vmdesc, "size", size);
  785. json_start_array(vmdesc, "fields");
  786. json_start_object(vmdesc, NULL);
  787. json_prop_str(vmdesc, "name", "data");
  788. json_prop_int(vmdesc, "size", size);
  789. json_prop_str(vmdesc, "type", "buffer");
  790. json_end_object(vmdesc);
  791. json_end_array(vmdesc);
  792. }
  793. }
  794. static int vmstate_save(QEMUFile *f, SaveStateEntry *se, QJSON *vmdesc)
  795. {
  796. trace_vmstate_save(se->idstr, se->vmsd ? se->vmsd->name : "(old)");
  797. if (!se->vmsd) {
  798. vmstate_save_old_style(f, se, vmdesc);
  799. return 0;
  800. }
  801. return vmstate_save_state(f, se->vmsd, se->opaque, vmdesc);
  802. }
  803. /*
  804. * Write the header for device section (QEMU_VM_SECTION START/END/PART/FULL)
  805. */
  806. static void save_section_header(QEMUFile *f, SaveStateEntry *se,
  807. uint8_t section_type)
  808. {
  809. qemu_put_byte(f, section_type);
  810. qemu_put_be32(f, se->section_id);
  811. if (section_type == QEMU_VM_SECTION_FULL ||
  812. section_type == QEMU_VM_SECTION_START) {
  813. /* ID string */
  814. size_t len = strlen(se->idstr);
  815. qemu_put_byte(f, len);
  816. qemu_put_buffer(f, (uint8_t *)se->idstr, len);
  817. qemu_put_be32(f, se->instance_id);
  818. qemu_put_be32(f, se->version_id);
  819. }
  820. }
  821. /*
  822. * Write a footer onto device sections that catches cases misformatted device
  823. * sections.
  824. */
  825. static void save_section_footer(QEMUFile *f, SaveStateEntry *se)
  826. {
  827. if (migrate_get_current()->send_section_footer) {
  828. qemu_put_byte(f, QEMU_VM_SECTION_FOOTER);
  829. qemu_put_be32(f, se->section_id);
  830. }
  831. }
  832. /**
  833. * qemu_savevm_command_send: Send a 'QEMU_VM_COMMAND' type element with the
  834. * command and associated data.
  835. *
  836. * @f: File to send command on
  837. * @command: Command type to send
  838. * @len: Length of associated data
  839. * @data: Data associated with command.
  840. */
  841. static void qemu_savevm_command_send(QEMUFile *f,
  842. enum qemu_vm_cmd command,
  843. uint16_t len,
  844. uint8_t *data)
  845. {
  846. trace_savevm_command_send(command, len);
  847. qemu_put_byte(f, QEMU_VM_COMMAND);
  848. qemu_put_be16(f, (uint16_t)command);
  849. qemu_put_be16(f, len);
  850. qemu_put_buffer(f, data, len);
  851. qemu_fflush(f);
  852. }
  853. void qemu_savevm_send_colo_enable(QEMUFile *f)
  854. {
  855. trace_savevm_send_colo_enable();
  856. qemu_savevm_command_send(f, MIG_CMD_ENABLE_COLO, 0, NULL);
  857. }
  858. void qemu_savevm_send_ping(QEMUFile *f, uint32_t value)
  859. {
  860. uint32_t buf;
  861. trace_savevm_send_ping(value);
  862. buf = cpu_to_be32(value);
  863. qemu_savevm_command_send(f, MIG_CMD_PING, sizeof(value), (uint8_t *)&buf);
  864. }
  865. void qemu_savevm_send_open_return_path(QEMUFile *f)
  866. {
  867. trace_savevm_send_open_return_path();
  868. qemu_savevm_command_send(f, MIG_CMD_OPEN_RETURN_PATH, 0, NULL);
  869. }
  870. /* We have a buffer of data to send; we don't want that all to be loaded
  871. * by the command itself, so the command contains just the length of the
  872. * extra buffer that we then send straight after it.
  873. * TODO: Must be a better way to organise that
  874. *
  875. * Returns:
  876. * 0 on success
  877. * -ve on error
  878. */
  879. int qemu_savevm_send_packaged(QEMUFile *f, const uint8_t *buf, size_t len)
  880. {
  881. uint32_t tmp;
  882. if (len > MAX_VM_CMD_PACKAGED_SIZE) {
  883. error_report("%s: Unreasonably large packaged state: %zu",
  884. __func__, len);
  885. return -1;
  886. }
  887. tmp = cpu_to_be32(len);
  888. trace_qemu_savevm_send_packaged();
  889. qemu_savevm_command_send(f, MIG_CMD_PACKAGED, 4, (uint8_t *)&tmp);
  890. qemu_put_buffer(f, buf, len);
  891. return 0;
  892. }
  893. /* Send prior to any postcopy transfer */
  894. void qemu_savevm_send_postcopy_advise(QEMUFile *f)
  895. {
  896. if (migrate_postcopy_ram()) {
  897. uint64_t tmp[2];
  898. tmp[0] = cpu_to_be64(ram_pagesize_summary());
  899. tmp[1] = cpu_to_be64(qemu_target_page_size());
  900. trace_qemu_savevm_send_postcopy_advise();
  901. qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_ADVISE,
  902. 16, (uint8_t *)tmp);
  903. } else {
  904. qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_ADVISE, 0, NULL);
  905. }
  906. }
  907. /* Sent prior to starting the destination running in postcopy, discard pages
  908. * that have already been sent but redirtied on the source.
  909. * CMD_POSTCOPY_RAM_DISCARD consist of:
  910. * byte version (0)
  911. * byte Length of name field (not including 0)
  912. * n x byte RAM block name
  913. * byte 0 terminator (just for safety)
  914. * n x Byte ranges within the named RAMBlock
  915. * be64 Start of the range
  916. * be64 Length
  917. *
  918. * name: RAMBlock name that these entries are part of
  919. * len: Number of page entries
  920. * start_list: 'len' addresses
  921. * length_list: 'len' addresses
  922. *
  923. */
  924. void qemu_savevm_send_postcopy_ram_discard(QEMUFile *f, const char *name,
  925. uint16_t len,
  926. uint64_t *start_list,
  927. uint64_t *length_list)
  928. {
  929. uint8_t *buf;
  930. uint16_t tmplen;
  931. uint16_t t;
  932. size_t name_len = strlen(name);
  933. trace_qemu_savevm_send_postcopy_ram_discard(name, len);
  934. assert(name_len < 256);
  935. buf = g_malloc0(1 + 1 + name_len + 1 + (8 + 8) * len);
  936. buf[0] = postcopy_ram_discard_version;
  937. buf[1] = name_len;
  938. memcpy(buf + 2, name, name_len);
  939. tmplen = 2 + name_len;
  940. buf[tmplen++] = '\0';
  941. for (t = 0; t < len; t++) {
  942. stq_be_p(buf + tmplen, start_list[t]);
  943. tmplen += 8;
  944. stq_be_p(buf + tmplen, length_list[t]);
  945. tmplen += 8;
  946. }
  947. qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RAM_DISCARD, tmplen, buf);
  948. g_free(buf);
  949. }
  950. /* Get the destination into a state where it can receive postcopy data. */
  951. void qemu_savevm_send_postcopy_listen(QEMUFile *f)
  952. {
  953. trace_savevm_send_postcopy_listen();
  954. qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_LISTEN, 0, NULL);
  955. }
  956. /* Kick the destination into running */
  957. void qemu_savevm_send_postcopy_run(QEMUFile *f)
  958. {
  959. trace_savevm_send_postcopy_run();
  960. qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RUN, 0, NULL);
  961. }
  962. void qemu_savevm_send_postcopy_resume(QEMUFile *f)
  963. {
  964. trace_savevm_send_postcopy_resume();
  965. qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RESUME, 0, NULL);
  966. }
  967. void qemu_savevm_send_recv_bitmap(QEMUFile *f, char *block_name)
  968. {
  969. size_t len;
  970. char buf[256];
  971. trace_savevm_send_recv_bitmap(block_name);
  972. buf[0] = len = strlen(block_name);
  973. memcpy(buf + 1, block_name, len);
  974. qemu_savevm_command_send(f, MIG_CMD_RECV_BITMAP, len + 1, (uint8_t *)buf);
  975. }
  976. bool qemu_savevm_state_blocked(Error **errp)
  977. {
  978. SaveStateEntry *se;
  979. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  980. if (se->vmsd && se->vmsd->unmigratable) {
  981. error_setg(errp, "State blocked by non-migratable device '%s'",
  982. se->idstr);
  983. return true;
  984. }
  985. }
  986. return false;
  987. }
  988. void qemu_savevm_state_header(QEMUFile *f)
  989. {
  990. trace_savevm_state_header();
  991. qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
  992. qemu_put_be32(f, QEMU_VM_FILE_VERSION);
  993. if (migrate_get_current()->send_configuration) {
  994. qemu_put_byte(f, QEMU_VM_CONFIGURATION);
  995. vmstate_save_state(f, &vmstate_configuration, &savevm_state, 0);
  996. }
  997. }
  998. bool qemu_savevm_state_guest_unplug_pending(void)
  999. {
  1000. SaveStateEntry *se;
  1001. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  1002. if (se->vmsd && se->vmsd->dev_unplug_pending &&
  1003. se->vmsd->dev_unplug_pending(se->opaque)) {
  1004. return true;
  1005. }
  1006. }
  1007. return false;
  1008. }
  1009. void qemu_savevm_state_setup(QEMUFile *f)
  1010. {
  1011. SaveStateEntry *se;
  1012. Error *local_err = NULL;
  1013. int ret;
  1014. trace_savevm_state_setup();
  1015. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  1016. if (!se->ops || !se->ops->save_setup) {
  1017. continue;
  1018. }
  1019. if (se->ops->is_active) {
  1020. if (!se->ops->is_active(se->opaque)) {
  1021. continue;
  1022. }
  1023. }
  1024. save_section_header(f, se, QEMU_VM_SECTION_START);
  1025. ret = se->ops->save_setup(f, se->opaque);
  1026. save_section_footer(f, se);
  1027. if (ret < 0) {
  1028. qemu_file_set_error(f, ret);
  1029. break;
  1030. }
  1031. }
  1032. if (precopy_notify(PRECOPY_NOTIFY_SETUP, &local_err)) {
  1033. error_report_err(local_err);
  1034. }
  1035. }
  1036. int qemu_savevm_state_resume_prepare(MigrationState *s)
  1037. {
  1038. SaveStateEntry *se;
  1039. int ret;
  1040. trace_savevm_state_resume_prepare();
  1041. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  1042. if (!se->ops || !se->ops->resume_prepare) {
  1043. continue;
  1044. }
  1045. if (se->ops->is_active) {
  1046. if (!se->ops->is_active(se->opaque)) {
  1047. continue;
  1048. }
  1049. }
  1050. ret = se->ops->resume_prepare(s, se->opaque);
  1051. if (ret < 0) {
  1052. return ret;
  1053. }
  1054. }
  1055. return 0;
  1056. }
  1057. /*
  1058. * this function has three return values:
  1059. * negative: there was one error, and we have -errno.
  1060. * 0 : We haven't finished, caller have to go again
  1061. * 1 : We have finished, we can go to complete phase
  1062. */
  1063. int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy)
  1064. {
  1065. SaveStateEntry *se;
  1066. int ret = 1;
  1067. trace_savevm_state_iterate();
  1068. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  1069. if (!se->ops || !se->ops->save_live_iterate) {
  1070. continue;
  1071. }
  1072. if (se->ops->is_active &&
  1073. !se->ops->is_active(se->opaque)) {
  1074. continue;
  1075. }
  1076. if (se->ops->is_active_iterate &&
  1077. !se->ops->is_active_iterate(se->opaque)) {
  1078. continue;
  1079. }
  1080. /*
  1081. * In the postcopy phase, any device that doesn't know how to
  1082. * do postcopy should have saved it's state in the _complete
  1083. * call that's already run, it might get confused if we call
  1084. * iterate afterwards.
  1085. */
  1086. if (postcopy &&
  1087. !(se->ops->has_postcopy && se->ops->has_postcopy(se->opaque))) {
  1088. continue;
  1089. }
  1090. if (qemu_file_rate_limit(f)) {
  1091. return 0;
  1092. }
  1093. trace_savevm_section_start(se->idstr, se->section_id);
  1094. save_section_header(f, se, QEMU_VM_SECTION_PART);
  1095. ret = se->ops->save_live_iterate(f, se->opaque);
  1096. trace_savevm_section_end(se->idstr, se->section_id, ret);
  1097. save_section_footer(f, se);
  1098. if (ret < 0) {
  1099. error_report("failed to save SaveStateEntry with id(name): %d(%s)",
  1100. se->section_id, se->idstr);
  1101. qemu_file_set_error(f, ret);
  1102. }
  1103. if (ret <= 0) {
  1104. /* Do not proceed to the next vmstate before this one reported
  1105. completion of the current stage. This serializes the migration
  1106. and reduces the probability that a faster changing state is
  1107. synchronized over and over again. */
  1108. break;
  1109. }
  1110. }
  1111. return ret;
  1112. }
  1113. static bool should_send_vmdesc(void)
  1114. {
  1115. MachineState *machine = MACHINE(qdev_get_machine());
  1116. bool in_postcopy = migration_in_postcopy();
  1117. return !machine->suppress_vmdesc && !in_postcopy;
  1118. }
  1119. /*
  1120. * Calls the save_live_complete_postcopy methods
  1121. * causing the last few pages to be sent immediately and doing any associated
  1122. * cleanup.
  1123. * Note postcopy also calls qemu_savevm_state_complete_precopy to complete
  1124. * all the other devices, but that happens at the point we switch to postcopy.
  1125. */
  1126. void qemu_savevm_state_complete_postcopy(QEMUFile *f)
  1127. {
  1128. SaveStateEntry *se;
  1129. int ret;
  1130. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  1131. if (!se->ops || !se->ops->save_live_complete_postcopy) {
  1132. continue;
  1133. }
  1134. if (se->ops->is_active) {
  1135. if (!se->ops->is_active(se->opaque)) {
  1136. continue;
  1137. }
  1138. }
  1139. trace_savevm_section_start(se->idstr, se->section_id);
  1140. /* Section type */
  1141. qemu_put_byte(f, QEMU_VM_SECTION_END);
  1142. qemu_put_be32(f, se->section_id);
  1143. ret = se->ops->save_live_complete_postcopy(f, se->opaque);
  1144. trace_savevm_section_end(se->idstr, se->section_id, ret);
  1145. save_section_footer(f, se);
  1146. if (ret < 0) {
  1147. qemu_file_set_error(f, ret);
  1148. return;
  1149. }
  1150. }
  1151. qemu_put_byte(f, QEMU_VM_EOF);
  1152. qemu_fflush(f);
  1153. }
  1154. static
  1155. int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy)
  1156. {
  1157. SaveStateEntry *se;
  1158. int ret;
  1159. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  1160. if (!se->ops ||
  1161. (in_postcopy && se->ops->has_postcopy &&
  1162. se->ops->has_postcopy(se->opaque)) ||
  1163. !se->ops->save_live_complete_precopy) {
  1164. continue;
  1165. }
  1166. if (se->ops->is_active) {
  1167. if (!se->ops->is_active(se->opaque)) {
  1168. continue;
  1169. }
  1170. }
  1171. trace_savevm_section_start(se->idstr, se->section_id);
  1172. save_section_header(f, se, QEMU_VM_SECTION_END);
  1173. ret = se->ops->save_live_complete_precopy(f, se->opaque);
  1174. trace_savevm_section_end(se->idstr, se->section_id, ret);
  1175. save_section_footer(f, se);
  1176. if (ret < 0) {
  1177. qemu_file_set_error(f, ret);
  1178. return -1;
  1179. }
  1180. }
  1181. return 0;
  1182. }
  1183. static
  1184. int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
  1185. bool in_postcopy,
  1186. bool inactivate_disks)
  1187. {
  1188. g_autoptr(QJSON) vmdesc = NULL;
  1189. int vmdesc_len;
  1190. SaveStateEntry *se;
  1191. int ret;
  1192. vmdesc = qjson_new();
  1193. json_prop_int(vmdesc, "page_size", qemu_target_page_size());
  1194. json_start_array(vmdesc, "devices");
  1195. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  1196. if ((!se->ops || !se->ops->save_state) && !se->vmsd) {
  1197. continue;
  1198. }
  1199. if (se->vmsd && !vmstate_save_needed(se->vmsd, se->opaque)) {
  1200. trace_savevm_section_skip(se->idstr, se->section_id);
  1201. continue;
  1202. }
  1203. trace_savevm_section_start(se->idstr, se->section_id);
  1204. json_start_object(vmdesc, NULL);
  1205. json_prop_str(vmdesc, "name", se->idstr);
  1206. json_prop_int(vmdesc, "instance_id", se->instance_id);
  1207. save_section_header(f, se, QEMU_VM_SECTION_FULL);
  1208. ret = vmstate_save(f, se, vmdesc);
  1209. if (ret) {
  1210. qemu_file_set_error(f, ret);
  1211. return ret;
  1212. }
  1213. trace_savevm_section_end(se->idstr, se->section_id, 0);
  1214. save_section_footer(f, se);
  1215. json_end_object(vmdesc);
  1216. }
  1217. if (inactivate_disks) {
  1218. /* Inactivate before sending QEMU_VM_EOF so that the
  1219. * bdrv_invalidate_cache_all() on the other end won't fail. */
  1220. ret = bdrv_inactivate_all();
  1221. if (ret) {
  1222. error_report("%s: bdrv_inactivate_all() failed (%d)",
  1223. __func__, ret);
  1224. qemu_file_set_error(f, ret);
  1225. return ret;
  1226. }
  1227. }
  1228. if (!in_postcopy) {
  1229. /* Postcopy stream will still be going */
  1230. qemu_put_byte(f, QEMU_VM_EOF);
  1231. }
  1232. json_end_array(vmdesc);
  1233. qjson_finish(vmdesc);
  1234. vmdesc_len = strlen(qjson_get_str(vmdesc));
  1235. if (should_send_vmdesc()) {
  1236. qemu_put_byte(f, QEMU_VM_VMDESCRIPTION);
  1237. qemu_put_be32(f, vmdesc_len);
  1238. qemu_put_buffer(f, (uint8_t *)qjson_get_str(vmdesc), vmdesc_len);
  1239. }
  1240. return 0;
  1241. }
  1242. int qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only,
  1243. bool inactivate_disks)
  1244. {
  1245. int ret;
  1246. Error *local_err = NULL;
  1247. bool in_postcopy = migration_in_postcopy();
  1248. if (precopy_notify(PRECOPY_NOTIFY_COMPLETE, &local_err)) {
  1249. error_report_err(local_err);
  1250. }
  1251. trace_savevm_state_complete_precopy();
  1252. cpu_synchronize_all_states();
  1253. if (!in_postcopy || iterable_only) {
  1254. ret = qemu_savevm_state_complete_precopy_iterable(f, in_postcopy);
  1255. if (ret) {
  1256. return ret;
  1257. }
  1258. }
  1259. if (iterable_only) {
  1260. goto flush;
  1261. }
  1262. ret = qemu_savevm_state_complete_precopy_non_iterable(f, in_postcopy,
  1263. inactivate_disks);
  1264. if (ret) {
  1265. return ret;
  1266. }
  1267. flush:
  1268. qemu_fflush(f);
  1269. return 0;
  1270. }
  1271. /* Give an estimate of the amount left to be transferred,
  1272. * the result is split into the amount for units that can and
  1273. * for units that can't do postcopy.
  1274. */
  1275. void qemu_savevm_state_pending(QEMUFile *f, uint64_t threshold_size,
  1276. uint64_t *res_precopy_only,
  1277. uint64_t *res_compatible,
  1278. uint64_t *res_postcopy_only)
  1279. {
  1280. SaveStateEntry *se;
  1281. *res_precopy_only = 0;
  1282. *res_compatible = 0;
  1283. *res_postcopy_only = 0;
  1284. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  1285. if (!se->ops || !se->ops->save_live_pending) {
  1286. continue;
  1287. }
  1288. if (se->ops->is_active) {
  1289. if (!se->ops->is_active(se->opaque)) {
  1290. continue;
  1291. }
  1292. }
  1293. se->ops->save_live_pending(f, se->opaque, threshold_size,
  1294. res_precopy_only, res_compatible,
  1295. res_postcopy_only);
  1296. }
  1297. }
  1298. void qemu_savevm_state_cleanup(void)
  1299. {
  1300. SaveStateEntry *se;
  1301. Error *local_err = NULL;
  1302. if (precopy_notify(PRECOPY_NOTIFY_CLEANUP, &local_err)) {
  1303. error_report_err(local_err);
  1304. }
  1305. trace_savevm_state_cleanup();
  1306. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  1307. if (se->ops && se->ops->save_cleanup) {
  1308. se->ops->save_cleanup(se->opaque);
  1309. }
  1310. }
  1311. }
  1312. static int qemu_savevm_state(QEMUFile *f, Error **errp)
  1313. {
  1314. int ret;
  1315. MigrationState *ms = migrate_get_current();
  1316. MigrationStatus status;
  1317. if (migration_is_running(ms->state)) {
  1318. error_setg(errp, QERR_MIGRATION_ACTIVE);
  1319. return -EINVAL;
  1320. }
  1321. if (migrate_use_block()) {
  1322. error_setg(errp, "Block migration and snapshots are incompatible");
  1323. return -EINVAL;
  1324. }
  1325. migrate_init(ms);
  1326. memset(&ram_counters, 0, sizeof(ram_counters));
  1327. ms->to_dst_file = f;
  1328. qemu_mutex_unlock_iothread();
  1329. qemu_savevm_state_header(f);
  1330. qemu_savevm_state_setup(f);
  1331. qemu_mutex_lock_iothread();
  1332. while (qemu_file_get_error(f) == 0) {
  1333. if (qemu_savevm_state_iterate(f, false) > 0) {
  1334. break;
  1335. }
  1336. }
  1337. ret = qemu_file_get_error(f);
  1338. if (ret == 0) {
  1339. qemu_savevm_state_complete_precopy(f, false, false);
  1340. ret = qemu_file_get_error(f);
  1341. }
  1342. qemu_savevm_state_cleanup();
  1343. if (ret != 0) {
  1344. error_setg_errno(errp, -ret, "Error while writing VM state");
  1345. }
  1346. if (ret != 0) {
  1347. status = MIGRATION_STATUS_FAILED;
  1348. } else {
  1349. status = MIGRATION_STATUS_COMPLETED;
  1350. }
  1351. migrate_set_state(&ms->state, MIGRATION_STATUS_SETUP, status);
  1352. /* f is outer parameter, it should not stay in global migration state after
  1353. * this function finished */
  1354. ms->to_dst_file = NULL;
  1355. return ret;
  1356. }
  1357. void qemu_savevm_live_state(QEMUFile *f)
  1358. {
  1359. /* save QEMU_VM_SECTION_END section */
  1360. qemu_savevm_state_complete_precopy(f, true, false);
  1361. qemu_put_byte(f, QEMU_VM_EOF);
  1362. }
  1363. int qemu_save_device_state(QEMUFile *f)
  1364. {
  1365. SaveStateEntry *se;
  1366. if (!migration_in_colo_state()) {
  1367. qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
  1368. qemu_put_be32(f, QEMU_VM_FILE_VERSION);
  1369. }
  1370. cpu_synchronize_all_states();
  1371. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  1372. int ret;
  1373. if (se->is_ram) {
  1374. continue;
  1375. }
  1376. if ((!se->ops || !se->ops->save_state) && !se->vmsd) {
  1377. continue;
  1378. }
  1379. if (se->vmsd && !vmstate_save_needed(se->vmsd, se->opaque)) {
  1380. continue;
  1381. }
  1382. save_section_header(f, se, QEMU_VM_SECTION_FULL);
  1383. ret = vmstate_save(f, se, NULL);
  1384. if (ret) {
  1385. return ret;
  1386. }
  1387. save_section_footer(f, se);
  1388. }
  1389. qemu_put_byte(f, QEMU_VM_EOF);
  1390. return qemu_file_get_error(f);
  1391. }
  1392. static SaveStateEntry *find_se(const char *idstr, uint32_t instance_id)
  1393. {
  1394. SaveStateEntry *se;
  1395. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  1396. if (!strcmp(se->idstr, idstr) &&
  1397. (instance_id == se->instance_id ||
  1398. instance_id == se->alias_id))
  1399. return se;
  1400. /* Migrating from an older version? */
  1401. if (strstr(se->idstr, idstr) && se->compat) {
  1402. if (!strcmp(se->compat->idstr, idstr) &&
  1403. (instance_id == se->compat->instance_id ||
  1404. instance_id == se->alias_id))
  1405. return se;
  1406. }
  1407. }
  1408. return NULL;
  1409. }
  1410. enum LoadVMExitCodes {
  1411. /* Allow a command to quit all layers of nested loadvm loops */
  1412. LOADVM_QUIT = 1,
  1413. };
  1414. /* ------ incoming postcopy messages ------ */
  1415. /* 'advise' arrives before any transfers just to tell us that a postcopy
  1416. * *might* happen - it might be skipped if precopy transferred everything
  1417. * quickly.
  1418. */
  1419. static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis,
  1420. uint16_t len)
  1421. {
  1422. PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_ADVISE);
  1423. uint64_t remote_pagesize_summary, local_pagesize_summary, remote_tps;
  1424. Error *local_err = NULL;
  1425. trace_loadvm_postcopy_handle_advise();
  1426. if (ps != POSTCOPY_INCOMING_NONE) {
  1427. error_report("CMD_POSTCOPY_ADVISE in wrong postcopy state (%d)", ps);
  1428. return -1;
  1429. }
  1430. switch (len) {
  1431. case 0:
  1432. if (migrate_postcopy_ram()) {
  1433. error_report("RAM postcopy is enabled but have 0 byte advise");
  1434. return -EINVAL;
  1435. }
  1436. return 0;
  1437. case 8 + 8:
  1438. if (!migrate_postcopy_ram()) {
  1439. error_report("RAM postcopy is disabled but have 16 byte advise");
  1440. return -EINVAL;
  1441. }
  1442. break;
  1443. default:
  1444. error_report("CMD_POSTCOPY_ADVISE invalid length (%d)", len);
  1445. return -EINVAL;
  1446. }
  1447. if (!postcopy_ram_supported_by_host(mis)) {
  1448. postcopy_state_set(POSTCOPY_INCOMING_NONE);
  1449. return -1;
  1450. }
  1451. remote_pagesize_summary = qemu_get_be64(mis->from_src_file);
  1452. local_pagesize_summary = ram_pagesize_summary();
  1453. if (remote_pagesize_summary != local_pagesize_summary) {
  1454. /*
  1455. * This detects two potential causes of mismatch:
  1456. * a) A mismatch in host page sizes
  1457. * Some combinations of mismatch are probably possible but it gets
  1458. * a bit more complicated. In particular we need to place whole
  1459. * host pages on the dest at once, and we need to ensure that we
  1460. * handle dirtying to make sure we never end up sending part of
  1461. * a hostpage on it's own.
  1462. * b) The use of different huge page sizes on source/destination
  1463. * a more fine grain test is performed during RAM block migration
  1464. * but this test here causes a nice early clear failure, and
  1465. * also fails when passed to an older qemu that doesn't
  1466. * do huge pages.
  1467. */
  1468. error_report("Postcopy needs matching RAM page sizes (s=%" PRIx64
  1469. " d=%" PRIx64 ")",
  1470. remote_pagesize_summary, local_pagesize_summary);
  1471. return -1;
  1472. }
  1473. remote_tps = qemu_get_be64(mis->from_src_file);
  1474. if (remote_tps != qemu_target_page_size()) {
  1475. /*
  1476. * Again, some differences could be dealt with, but for now keep it
  1477. * simple.
  1478. */
  1479. error_report("Postcopy needs matching target page sizes (s=%d d=%zd)",
  1480. (int)remote_tps, qemu_target_page_size());
  1481. return -1;
  1482. }
  1483. if (postcopy_notify(POSTCOPY_NOTIFY_INBOUND_ADVISE, &local_err)) {
  1484. error_report_err(local_err);
  1485. return -1;
  1486. }
  1487. if (ram_postcopy_incoming_init(mis)) {
  1488. return -1;
  1489. }
  1490. return 0;
  1491. }
  1492. /* After postcopy we will be told to throw some pages away since they're
  1493. * dirty and will have to be demand fetched. Must happen before CPU is
  1494. * started.
  1495. * There can be 0..many of these messages, each encoding multiple pages.
  1496. */
  1497. static int loadvm_postcopy_ram_handle_discard(MigrationIncomingState *mis,
  1498. uint16_t len)
  1499. {
  1500. int tmp;
  1501. char ramid[256];
  1502. PostcopyState ps = postcopy_state_get();
  1503. trace_loadvm_postcopy_ram_handle_discard();
  1504. switch (ps) {
  1505. case POSTCOPY_INCOMING_ADVISE:
  1506. /* 1st discard */
  1507. tmp = postcopy_ram_prepare_discard(mis);
  1508. if (tmp) {
  1509. return tmp;
  1510. }
  1511. break;
  1512. case POSTCOPY_INCOMING_DISCARD:
  1513. /* Expected state */
  1514. break;
  1515. default:
  1516. error_report("CMD_POSTCOPY_RAM_DISCARD in wrong postcopy state (%d)",
  1517. ps);
  1518. return -1;
  1519. }
  1520. /* We're expecting a
  1521. * Version (0)
  1522. * a RAM ID string (length byte, name, 0 term)
  1523. * then at least 1 16 byte chunk
  1524. */
  1525. if (len < (1 + 1 + 1 + 1 + 2 * 8)) {
  1526. error_report("CMD_POSTCOPY_RAM_DISCARD invalid length (%d)", len);
  1527. return -1;
  1528. }
  1529. tmp = qemu_get_byte(mis->from_src_file);
  1530. if (tmp != postcopy_ram_discard_version) {
  1531. error_report("CMD_POSTCOPY_RAM_DISCARD invalid version (%d)", tmp);
  1532. return -1;
  1533. }
  1534. if (!qemu_get_counted_string(mis->from_src_file, ramid)) {
  1535. error_report("CMD_POSTCOPY_RAM_DISCARD Failed to read RAMBlock ID");
  1536. return -1;
  1537. }
  1538. tmp = qemu_get_byte(mis->from_src_file);
  1539. if (tmp != 0) {
  1540. error_report("CMD_POSTCOPY_RAM_DISCARD missing nil (%d)", tmp);
  1541. return -1;
  1542. }
  1543. len -= 3 + strlen(ramid);
  1544. if (len % 16) {
  1545. error_report("CMD_POSTCOPY_RAM_DISCARD invalid length (%d)", len);
  1546. return -1;
  1547. }
  1548. trace_loadvm_postcopy_ram_handle_discard_header(ramid, len);
  1549. while (len) {
  1550. uint64_t start_addr, block_length;
  1551. start_addr = qemu_get_be64(mis->from_src_file);
  1552. block_length = qemu_get_be64(mis->from_src_file);
  1553. len -= 16;
  1554. int ret = ram_discard_range(ramid, start_addr, block_length);
  1555. if (ret) {
  1556. return ret;
  1557. }
  1558. }
  1559. trace_loadvm_postcopy_ram_handle_discard_end();
  1560. return 0;
  1561. }
  1562. /*
  1563. * Triggered by a postcopy_listen command; this thread takes over reading
  1564. * the input stream, leaving the main thread free to carry on loading the rest
  1565. * of the device state (from RAM).
  1566. * (TODO:This could do with being in a postcopy file - but there again it's
  1567. * just another input loop, not that postcopy specific)
  1568. */
  1569. static void *postcopy_ram_listen_thread(void *opaque)
  1570. {
  1571. MigrationIncomingState *mis = migration_incoming_get_current();
  1572. QEMUFile *f = mis->from_src_file;
  1573. int load_res;
  1574. MigrationState *migr = migrate_get_current();
  1575. object_ref(OBJECT(migr));
  1576. migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
  1577. MIGRATION_STATUS_POSTCOPY_ACTIVE);
  1578. qemu_sem_post(&mis->listen_thread_sem);
  1579. trace_postcopy_ram_listen_thread_start();
  1580. rcu_register_thread();
  1581. /*
  1582. * Because we're a thread and not a coroutine we can't yield
  1583. * in qemu_file, and thus we must be blocking now.
  1584. */
  1585. qemu_file_set_blocking(f, true);
  1586. load_res = qemu_loadvm_state_main(f, mis);
  1587. /*
  1588. * This is tricky, but, mis->from_src_file can change after it
  1589. * returns, when postcopy recovery happened. In the future, we may
  1590. * want a wrapper for the QEMUFile handle.
  1591. */
  1592. f = mis->from_src_file;
  1593. /* And non-blocking again so we don't block in any cleanup */
  1594. qemu_file_set_blocking(f, false);
  1595. trace_postcopy_ram_listen_thread_exit();
  1596. if (load_res < 0) {
  1597. qemu_file_set_error(f, load_res);
  1598. dirty_bitmap_mig_cancel_incoming();
  1599. if (postcopy_state_get() == POSTCOPY_INCOMING_RUNNING &&
  1600. !migrate_postcopy_ram() && migrate_dirty_bitmaps())
  1601. {
  1602. error_report("%s: loadvm failed during postcopy: %d. All states "
  1603. "are migrated except dirty bitmaps. Some dirty "
  1604. "bitmaps may be lost, and present migrated dirty "
  1605. "bitmaps are correctly migrated and valid.",
  1606. __func__, load_res);
  1607. load_res = 0; /* prevent further exit() */
  1608. } else {
  1609. error_report("%s: loadvm failed: %d", __func__, load_res);
  1610. migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
  1611. MIGRATION_STATUS_FAILED);
  1612. }
  1613. }
  1614. if (load_res >= 0) {
  1615. /*
  1616. * This looks good, but it's possible that the device loading in the
  1617. * main thread hasn't finished yet, and so we might not be in 'RUN'
  1618. * state yet; wait for the end of the main thread.
  1619. */
  1620. qemu_event_wait(&mis->main_thread_load_event);
  1621. }
  1622. postcopy_ram_incoming_cleanup(mis);
  1623. if (load_res < 0) {
  1624. /*
  1625. * If something went wrong then we have a bad state so exit;
  1626. * depending how far we got it might be possible at this point
  1627. * to leave the guest running and fire MCEs for pages that never
  1628. * arrived as a desperate recovery step.
  1629. */
  1630. rcu_unregister_thread();
  1631. exit(EXIT_FAILURE);
  1632. }
  1633. migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
  1634. MIGRATION_STATUS_COMPLETED);
  1635. /*
  1636. * If everything has worked fine, then the main thread has waited
  1637. * for us to start, and we're the last use of the mis.
  1638. * (If something broke then qemu will have to exit anyway since it's
  1639. * got a bad migration state).
  1640. */
  1641. migration_incoming_state_destroy();
  1642. qemu_loadvm_state_cleanup();
  1643. rcu_unregister_thread();
  1644. mis->have_listen_thread = false;
  1645. postcopy_state_set(POSTCOPY_INCOMING_END);
  1646. object_unref(OBJECT(migr));
  1647. return NULL;
  1648. }
  1649. /* After this message we must be able to immediately receive postcopy data */
  1650. static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
  1651. {
  1652. PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_LISTENING);
  1653. trace_loadvm_postcopy_handle_listen();
  1654. Error *local_err = NULL;
  1655. if (ps != POSTCOPY_INCOMING_ADVISE && ps != POSTCOPY_INCOMING_DISCARD) {
  1656. error_report("CMD_POSTCOPY_LISTEN in wrong postcopy state (%d)", ps);
  1657. return -1;
  1658. }
  1659. if (ps == POSTCOPY_INCOMING_ADVISE) {
  1660. /*
  1661. * A rare case, we entered listen without having to do any discards,
  1662. * so do the setup that's normally done at the time of the 1st discard.
  1663. */
  1664. if (migrate_postcopy_ram()) {
  1665. postcopy_ram_prepare_discard(mis);
  1666. }
  1667. }
  1668. /*
  1669. * Sensitise RAM - can now generate requests for blocks that don't exist
  1670. * However, at this point the CPU shouldn't be running, and the IO
  1671. * shouldn't be doing anything yet so don't actually expect requests
  1672. */
  1673. if (migrate_postcopy_ram()) {
  1674. if (postcopy_ram_incoming_setup(mis)) {
  1675. postcopy_ram_incoming_cleanup(mis);
  1676. return -1;
  1677. }
  1678. }
  1679. if (postcopy_notify(POSTCOPY_NOTIFY_INBOUND_LISTEN, &local_err)) {
  1680. error_report_err(local_err);
  1681. return -1;
  1682. }
  1683. mis->have_listen_thread = true;
  1684. /* Start up the listening thread and wait for it to signal ready */
  1685. qemu_sem_init(&mis->listen_thread_sem, 0);
  1686. qemu_thread_create(&mis->listen_thread, "postcopy/listen",
  1687. postcopy_ram_listen_thread, NULL,
  1688. QEMU_THREAD_DETACHED);
  1689. qemu_sem_wait(&mis->listen_thread_sem);
  1690. qemu_sem_destroy(&mis->listen_thread_sem);
  1691. return 0;
  1692. }
  1693. static void loadvm_postcopy_handle_run_bh(void *opaque)
  1694. {
  1695. Error *local_err = NULL;
  1696. MigrationIncomingState *mis = opaque;
  1697. /* TODO we should move all of this lot into postcopy_ram.c or a shared code
  1698. * in migration.c
  1699. */
  1700. cpu_synchronize_all_post_init();
  1701. qemu_announce_self(&mis->announce_timer, migrate_announce_params());
  1702. /* Make sure all file formats flush their mutable metadata.
  1703. * If we get an error here, just don't restart the VM yet. */
  1704. bdrv_invalidate_cache_all(&local_err);
  1705. if (local_err) {
  1706. error_report_err(local_err);
  1707. local_err = NULL;
  1708. autostart = false;
  1709. }
  1710. trace_loadvm_postcopy_handle_run_cpu_sync();
  1711. trace_loadvm_postcopy_handle_run_vmstart();
  1712. dirty_bitmap_mig_before_vm_start();
  1713. if (autostart) {
  1714. /* Hold onto your hats, starting the CPU */
  1715. vm_start();
  1716. } else {
  1717. /* leave it paused and let management decide when to start the CPU */
  1718. runstate_set(RUN_STATE_PAUSED);
  1719. }
  1720. qemu_bh_delete(mis->bh);
  1721. }
  1722. /* After all discards we can start running and asking for pages */
  1723. static int loadvm_postcopy_handle_run(MigrationIncomingState *mis)
  1724. {
  1725. PostcopyState ps = postcopy_state_get();
  1726. trace_loadvm_postcopy_handle_run();
  1727. if (ps != POSTCOPY_INCOMING_LISTENING) {
  1728. error_report("CMD_POSTCOPY_RUN in wrong postcopy state (%d)", ps);
  1729. return -1;
  1730. }
  1731. postcopy_state_set(POSTCOPY_INCOMING_RUNNING);
  1732. mis->bh = qemu_bh_new(loadvm_postcopy_handle_run_bh, mis);
  1733. qemu_bh_schedule(mis->bh);
  1734. /* We need to finish reading the stream from the package
  1735. * and also stop reading anything more from the stream that loaded the
  1736. * package (since it's now being read by the listener thread).
  1737. * LOADVM_QUIT will quit all the layers of nested loadvm loops.
  1738. */
  1739. return LOADVM_QUIT;
  1740. }
  1741. static int loadvm_postcopy_handle_resume(MigrationIncomingState *mis)
  1742. {
  1743. if (mis->state != MIGRATION_STATUS_POSTCOPY_RECOVER) {
  1744. error_report("%s: illegal resume received", __func__);
  1745. /* Don't fail the load, only for this. */
  1746. return 0;
  1747. }
  1748. /*
  1749. * This means source VM is ready to resume the postcopy migration.
  1750. * It's time to switch state and release the fault thread to
  1751. * continue service page faults.
  1752. */
  1753. migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_RECOVER,
  1754. MIGRATION_STATUS_POSTCOPY_ACTIVE);
  1755. qemu_sem_post(&mis->postcopy_pause_sem_fault);
  1756. trace_loadvm_postcopy_handle_resume();
  1757. /* Tell source that "we are ready" */
  1758. migrate_send_rp_resume_ack(mis, MIGRATION_RESUME_ACK_VALUE);
  1759. return 0;
  1760. }
  1761. /**
  1762. * Immediately following this command is a blob of data containing an embedded
  1763. * chunk of migration stream; read it and load it.
  1764. *
  1765. * @mis: Incoming state
  1766. * @length: Length of packaged data to read
  1767. *
  1768. * Returns: Negative values on error
  1769. *
  1770. */
  1771. static int loadvm_handle_cmd_packaged(MigrationIncomingState *mis)
  1772. {
  1773. int ret;
  1774. size_t length;
  1775. QIOChannelBuffer *bioc;
  1776. length = qemu_get_be32(mis->from_src_file);
  1777. trace_loadvm_handle_cmd_packaged(length);
  1778. if (length > MAX_VM_CMD_PACKAGED_SIZE) {
  1779. error_report("Unreasonably large packaged state: %zu", length);
  1780. return -1;
  1781. }
  1782. bioc = qio_channel_buffer_new(length);
  1783. qio_channel_set_name(QIO_CHANNEL(bioc), "migration-loadvm-buffer");
  1784. ret = qemu_get_buffer(mis->from_src_file,
  1785. bioc->data,
  1786. length);
  1787. if (ret != length) {
  1788. object_unref(OBJECT(bioc));
  1789. error_report("CMD_PACKAGED: Buffer receive fail ret=%d length=%zu",
  1790. ret, length);
  1791. return (ret < 0) ? ret : -EAGAIN;
  1792. }
  1793. bioc->usage += length;
  1794. trace_loadvm_handle_cmd_packaged_received(ret);
  1795. QEMUFile *packf = qemu_fopen_channel_input(QIO_CHANNEL(bioc));
  1796. ret = qemu_loadvm_state_main(packf, mis);
  1797. trace_loadvm_handle_cmd_packaged_main(ret);
  1798. qemu_fclose(packf);
  1799. object_unref(OBJECT(bioc));
  1800. return ret;
  1801. }
  1802. /*
  1803. * Handle request that source requests for recved_bitmap on
  1804. * destination. Payload format:
  1805. *
  1806. * len (1 byte) + ramblock_name (<255 bytes)
  1807. */
  1808. static int loadvm_handle_recv_bitmap(MigrationIncomingState *mis,
  1809. uint16_t len)
  1810. {
  1811. QEMUFile *file = mis->from_src_file;
  1812. RAMBlock *rb;
  1813. char block_name[256];
  1814. size_t cnt;
  1815. cnt = qemu_get_counted_string(file, block_name);
  1816. if (!cnt) {
  1817. error_report("%s: failed to read block name", __func__);
  1818. return -EINVAL;
  1819. }
  1820. /* Validate before using the data */
  1821. if (qemu_file_get_error(file)) {
  1822. return qemu_file_get_error(file);
  1823. }
  1824. if (len != cnt + 1) {
  1825. error_report("%s: invalid payload length (%d)", __func__, len);
  1826. return -EINVAL;
  1827. }
  1828. rb = qemu_ram_block_by_name(block_name);
  1829. if (!rb) {
  1830. error_report("%s: block '%s' not found", __func__, block_name);
  1831. return -EINVAL;
  1832. }
  1833. migrate_send_rp_recv_bitmap(mis, block_name);
  1834. trace_loadvm_handle_recv_bitmap(block_name);
  1835. return 0;
  1836. }
  1837. static int loadvm_process_enable_colo(MigrationIncomingState *mis)
  1838. {
  1839. int ret = migration_incoming_enable_colo();
  1840. if (!ret) {
  1841. ret = colo_init_ram_cache();
  1842. if (ret) {
  1843. migration_incoming_disable_colo();
  1844. }
  1845. }
  1846. return ret;
  1847. }
  1848. /*
  1849. * Process an incoming 'QEMU_VM_COMMAND'
  1850. * 0 just a normal return
  1851. * LOADVM_QUIT All good, but exit the loop
  1852. * <0 Error
  1853. */
  1854. static int loadvm_process_command(QEMUFile *f)
  1855. {
  1856. MigrationIncomingState *mis = migration_incoming_get_current();
  1857. uint16_t cmd;
  1858. uint16_t len;
  1859. uint32_t tmp32;
  1860. cmd = qemu_get_be16(f);
  1861. len = qemu_get_be16(f);
  1862. /* Check validity before continue processing of cmds */
  1863. if (qemu_file_get_error(f)) {
  1864. return qemu_file_get_error(f);
  1865. }
  1866. trace_loadvm_process_command(cmd, len);
  1867. if (cmd >= MIG_CMD_MAX || cmd == MIG_CMD_INVALID) {
  1868. error_report("MIG_CMD 0x%x unknown (len 0x%x)", cmd, len);
  1869. return -EINVAL;
  1870. }
  1871. if (mig_cmd_args[cmd].len != -1 && mig_cmd_args[cmd].len != len) {
  1872. error_report("%s received with bad length - expecting %zu, got %d",
  1873. mig_cmd_args[cmd].name,
  1874. (size_t)mig_cmd_args[cmd].len, len);
  1875. return -ERANGE;
  1876. }
  1877. switch (cmd) {
  1878. case MIG_CMD_OPEN_RETURN_PATH:
  1879. if (mis->to_src_file) {
  1880. error_report("CMD_OPEN_RETURN_PATH called when RP already open");
  1881. /* Not really a problem, so don't give up */
  1882. return 0;
  1883. }
  1884. mis->to_src_file = qemu_file_get_return_path(f);
  1885. if (!mis->to_src_file) {
  1886. error_report("CMD_OPEN_RETURN_PATH failed");
  1887. return -1;
  1888. }
  1889. break;
  1890. case MIG_CMD_PING:
  1891. tmp32 = qemu_get_be32(f);
  1892. trace_loadvm_process_command_ping(tmp32);
  1893. if (!mis->to_src_file) {
  1894. error_report("CMD_PING (0x%x) received with no return path",
  1895. tmp32);
  1896. return -1;
  1897. }
  1898. migrate_send_rp_pong(mis, tmp32);
  1899. break;
  1900. case MIG_CMD_PACKAGED:
  1901. return loadvm_handle_cmd_packaged(mis);
  1902. case MIG_CMD_POSTCOPY_ADVISE:
  1903. return loadvm_postcopy_handle_advise(mis, len);
  1904. case MIG_CMD_POSTCOPY_LISTEN:
  1905. return loadvm_postcopy_handle_listen(mis);
  1906. case MIG_CMD_POSTCOPY_RUN:
  1907. return loadvm_postcopy_handle_run(mis);
  1908. case MIG_CMD_POSTCOPY_RAM_DISCARD:
  1909. return loadvm_postcopy_ram_handle_discard(mis, len);
  1910. case MIG_CMD_POSTCOPY_RESUME:
  1911. return loadvm_postcopy_handle_resume(mis);
  1912. case MIG_CMD_RECV_BITMAP:
  1913. return loadvm_handle_recv_bitmap(mis, len);
  1914. case MIG_CMD_ENABLE_COLO:
  1915. return loadvm_process_enable_colo(mis);
  1916. }
  1917. return 0;
  1918. }
  1919. /*
  1920. * Read a footer off the wire and check that it matches the expected section
  1921. *
  1922. * Returns: true if the footer was good
  1923. * false if there is a problem (and calls error_report to say why)
  1924. */
  1925. static bool check_section_footer(QEMUFile *f, SaveStateEntry *se)
  1926. {
  1927. int ret;
  1928. uint8_t read_mark;
  1929. uint32_t read_section_id;
  1930. if (!migrate_get_current()->send_section_footer) {
  1931. /* No footer to check */
  1932. return true;
  1933. }
  1934. read_mark = qemu_get_byte(f);
  1935. ret = qemu_file_get_error(f);
  1936. if (ret) {
  1937. error_report("%s: Read section footer failed: %d",
  1938. __func__, ret);
  1939. return false;
  1940. }
  1941. if (read_mark != QEMU_VM_SECTION_FOOTER) {
  1942. error_report("Missing section footer for %s", se->idstr);
  1943. return false;
  1944. }
  1945. read_section_id = qemu_get_be32(f);
  1946. if (read_section_id != se->load_section_id) {
  1947. error_report("Mismatched section id in footer for %s -"
  1948. " read 0x%x expected 0x%x",
  1949. se->idstr, read_section_id, se->load_section_id);
  1950. return false;
  1951. }
  1952. /* All good */
  1953. return true;
  1954. }
  1955. static int
  1956. qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis)
  1957. {
  1958. uint32_t instance_id, version_id, section_id;
  1959. SaveStateEntry *se;
  1960. char idstr[256];
  1961. int ret;
  1962. /* Read section start */
  1963. section_id = qemu_get_be32(f);
  1964. if (!qemu_get_counted_string(f, idstr)) {
  1965. error_report("Unable to read ID string for section %u",
  1966. section_id);
  1967. return -EINVAL;
  1968. }
  1969. instance_id = qemu_get_be32(f);
  1970. version_id = qemu_get_be32(f);
  1971. ret = qemu_file_get_error(f);
  1972. if (ret) {
  1973. error_report("%s: Failed to read instance/version ID: %d",
  1974. __func__, ret);
  1975. return ret;
  1976. }
  1977. trace_qemu_loadvm_state_section_startfull(section_id, idstr,
  1978. instance_id, version_id);
  1979. /* Find savevm section */
  1980. se = find_se(idstr, instance_id);
  1981. if (se == NULL) {
  1982. error_report("Unknown savevm section or instance '%s' %"PRIu32". "
  1983. "Make sure that your current VM setup matches your "
  1984. "saved VM setup, including any hotplugged devices",
  1985. idstr, instance_id);
  1986. return -EINVAL;
  1987. }
  1988. /* Validate version */
  1989. if (version_id > se->version_id) {
  1990. error_report("savevm: unsupported version %d for '%s' v%d",
  1991. version_id, idstr, se->version_id);
  1992. return -EINVAL;
  1993. }
  1994. se->load_version_id = version_id;
  1995. se->load_section_id = section_id;
  1996. /* Validate if it is a device's state */
  1997. if (xen_enabled() && se->is_ram) {
  1998. error_report("loadvm: %s RAM loading not allowed on Xen", idstr);
  1999. return -EINVAL;
  2000. }
  2001. ret = vmstate_load(f, se);
  2002. if (ret < 0) {
  2003. error_report("error while loading state for instance 0x%"PRIx32" of"
  2004. " device '%s'", instance_id, idstr);
  2005. return ret;
  2006. }
  2007. if (!check_section_footer(f, se)) {
  2008. return -EINVAL;
  2009. }
  2010. return 0;
  2011. }
  2012. static int
  2013. qemu_loadvm_section_part_end(QEMUFile *f, MigrationIncomingState *mis)
  2014. {
  2015. uint32_t section_id;
  2016. SaveStateEntry *se;
  2017. int ret;
  2018. section_id = qemu_get_be32(f);
  2019. ret = qemu_file_get_error(f);
  2020. if (ret) {
  2021. error_report("%s: Failed to read section ID: %d",
  2022. __func__, ret);
  2023. return ret;
  2024. }
  2025. trace_qemu_loadvm_state_section_partend(section_id);
  2026. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  2027. if (se->load_section_id == section_id) {
  2028. break;
  2029. }
  2030. }
  2031. if (se == NULL) {
  2032. error_report("Unknown savevm section %d", section_id);
  2033. return -EINVAL;
  2034. }
  2035. ret = vmstate_load(f, se);
  2036. if (ret < 0) {
  2037. error_report("error while loading state section id %d(%s)",
  2038. section_id, se->idstr);
  2039. return ret;
  2040. }
  2041. if (!check_section_footer(f, se)) {
  2042. return -EINVAL;
  2043. }
  2044. return 0;
  2045. }
  2046. static int qemu_loadvm_state_header(QEMUFile *f)
  2047. {
  2048. unsigned int v;
  2049. int ret;
  2050. v = qemu_get_be32(f);
  2051. if (v != QEMU_VM_FILE_MAGIC) {
  2052. error_report("Not a migration stream");
  2053. return -EINVAL;
  2054. }
  2055. v = qemu_get_be32(f);
  2056. if (v == QEMU_VM_FILE_VERSION_COMPAT) {
  2057. error_report("SaveVM v2 format is obsolete and don't work anymore");
  2058. return -ENOTSUP;
  2059. }
  2060. if (v != QEMU_VM_FILE_VERSION) {
  2061. error_report("Unsupported migration stream version");
  2062. return -ENOTSUP;
  2063. }
  2064. if (migrate_get_current()->send_configuration) {
  2065. if (qemu_get_byte(f) != QEMU_VM_CONFIGURATION) {
  2066. error_report("Configuration section missing");
  2067. qemu_loadvm_state_cleanup();
  2068. return -EINVAL;
  2069. }
  2070. ret = vmstate_load_state(f, &vmstate_configuration, &savevm_state, 0);
  2071. if (ret) {
  2072. qemu_loadvm_state_cleanup();
  2073. return ret;
  2074. }
  2075. }
  2076. return 0;
  2077. }
  2078. static int qemu_loadvm_state_setup(QEMUFile *f)
  2079. {
  2080. SaveStateEntry *se;
  2081. int ret;
  2082. trace_loadvm_state_setup();
  2083. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  2084. if (!se->ops || !se->ops->load_setup) {
  2085. continue;
  2086. }
  2087. if (se->ops->is_active) {
  2088. if (!se->ops->is_active(se->opaque)) {
  2089. continue;
  2090. }
  2091. }
  2092. ret = se->ops->load_setup(f, se->opaque);
  2093. if (ret < 0) {
  2094. qemu_file_set_error(f, ret);
  2095. error_report("Load state of device %s failed", se->idstr);
  2096. return ret;
  2097. }
  2098. }
  2099. return 0;
  2100. }
  2101. void qemu_loadvm_state_cleanup(void)
  2102. {
  2103. SaveStateEntry *se;
  2104. trace_loadvm_state_cleanup();
  2105. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  2106. if (se->ops && se->ops->load_cleanup) {
  2107. se->ops->load_cleanup(se->opaque);
  2108. }
  2109. }
  2110. }
  2111. /* Return true if we should continue the migration, or false. */
  2112. static bool postcopy_pause_incoming(MigrationIncomingState *mis)
  2113. {
  2114. trace_postcopy_pause_incoming();
  2115. assert(migrate_postcopy_ram());
  2116. /* Clear the triggered bit to allow one recovery */
  2117. mis->postcopy_recover_triggered = false;
  2118. assert(mis->from_src_file);
  2119. qemu_file_shutdown(mis->from_src_file);
  2120. qemu_fclose(mis->from_src_file);
  2121. mis->from_src_file = NULL;
  2122. assert(mis->to_src_file);
  2123. qemu_file_shutdown(mis->to_src_file);
  2124. qemu_mutex_lock(&mis->rp_mutex);
  2125. qemu_fclose(mis->to_src_file);
  2126. mis->to_src_file = NULL;
  2127. qemu_mutex_unlock(&mis->rp_mutex);
  2128. migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
  2129. MIGRATION_STATUS_POSTCOPY_PAUSED);
  2130. /* Notify the fault thread for the invalidated file handle */
  2131. postcopy_fault_thread_notify(mis);
  2132. error_report("Detected IO failure for postcopy. "
  2133. "Migration paused.");
  2134. while (mis->state == MIGRATION_STATUS_POSTCOPY_PAUSED) {
  2135. qemu_sem_wait(&mis->postcopy_pause_sem_dst);
  2136. }
  2137. trace_postcopy_pause_incoming_continued();
  2138. return true;
  2139. }
  2140. int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis)
  2141. {
  2142. uint8_t section_type;
  2143. int ret = 0;
  2144. retry:
  2145. while (true) {
  2146. section_type = qemu_get_byte(f);
  2147. if (qemu_file_get_error(f)) {
  2148. ret = qemu_file_get_error(f);
  2149. break;
  2150. }
  2151. trace_qemu_loadvm_state_section(section_type);
  2152. switch (section_type) {
  2153. case QEMU_VM_SECTION_START:
  2154. case QEMU_VM_SECTION_FULL:
  2155. ret = qemu_loadvm_section_start_full(f, mis);
  2156. if (ret < 0) {
  2157. goto out;
  2158. }
  2159. break;
  2160. case QEMU_VM_SECTION_PART:
  2161. case QEMU_VM_SECTION_END:
  2162. ret = qemu_loadvm_section_part_end(f, mis);
  2163. if (ret < 0) {
  2164. goto out;
  2165. }
  2166. break;
  2167. case QEMU_VM_COMMAND:
  2168. ret = loadvm_process_command(f);
  2169. trace_qemu_loadvm_state_section_command(ret);
  2170. if ((ret < 0) || (ret == LOADVM_QUIT)) {
  2171. goto out;
  2172. }
  2173. break;
  2174. case QEMU_VM_EOF:
  2175. /* This is the end of migration */
  2176. goto out;
  2177. default:
  2178. error_report("Unknown savevm section type %d", section_type);
  2179. ret = -EINVAL;
  2180. goto out;
  2181. }
  2182. }
  2183. out:
  2184. if (ret < 0) {
  2185. qemu_file_set_error(f, ret);
  2186. /* Cancel bitmaps incoming regardless of recovery */
  2187. dirty_bitmap_mig_cancel_incoming();
  2188. /*
  2189. * If we are during an active postcopy, then we pause instead
  2190. * of bail out to at least keep the VM's dirty data. Note
  2191. * that POSTCOPY_INCOMING_LISTENING stage is still not enough,
  2192. * during which we're still receiving device states and we
  2193. * still haven't yet started the VM on destination.
  2194. *
  2195. * Only RAM postcopy supports recovery. Still, if RAM postcopy is
  2196. * enabled, canceled bitmaps postcopy will not affect RAM postcopy
  2197. * recovering.
  2198. */
  2199. if (postcopy_state_get() == POSTCOPY_INCOMING_RUNNING &&
  2200. migrate_postcopy_ram() && postcopy_pause_incoming(mis)) {
  2201. /* Reset f to point to the newly created channel */
  2202. f = mis->from_src_file;
  2203. goto retry;
  2204. }
  2205. }
  2206. return ret;
  2207. }
  2208. int qemu_loadvm_state(QEMUFile *f)
  2209. {
  2210. MigrationIncomingState *mis = migration_incoming_get_current();
  2211. Error *local_err = NULL;
  2212. int ret;
  2213. if (qemu_savevm_state_blocked(&local_err)) {
  2214. error_report_err(local_err);
  2215. return -EINVAL;
  2216. }
  2217. ret = qemu_loadvm_state_header(f);
  2218. if (ret) {
  2219. return ret;
  2220. }
  2221. if (qemu_loadvm_state_setup(f) != 0) {
  2222. return -EINVAL;
  2223. }
  2224. cpu_synchronize_all_pre_loadvm();
  2225. ret = qemu_loadvm_state_main(f, mis);
  2226. qemu_event_set(&mis->main_thread_load_event);
  2227. trace_qemu_loadvm_state_post_main(ret);
  2228. if (mis->have_listen_thread) {
  2229. /* Listen thread still going, can't clean up yet */
  2230. return ret;
  2231. }
  2232. if (ret == 0) {
  2233. ret = qemu_file_get_error(f);
  2234. }
  2235. /*
  2236. * Try to read in the VMDESC section as well, so that dumping tools that
  2237. * intercept our migration stream have the chance to see it.
  2238. */
  2239. /* We've got to be careful; if we don't read the data and just shut the fd
  2240. * then the sender can error if we close while it's still sending.
  2241. * We also mustn't read data that isn't there; some transports (RDMA)
  2242. * will stall waiting for that data when the source has already closed.
  2243. */
  2244. if (ret == 0 && should_send_vmdesc()) {
  2245. uint8_t *buf;
  2246. uint32_t size;
  2247. uint8_t section_type = qemu_get_byte(f);
  2248. if (section_type != QEMU_VM_VMDESCRIPTION) {
  2249. error_report("Expected vmdescription section, but got %d",
  2250. section_type);
  2251. /*
  2252. * It doesn't seem worth failing at this point since
  2253. * we apparently have an otherwise valid VM state
  2254. */
  2255. } else {
  2256. buf = g_malloc(0x1000);
  2257. size = qemu_get_be32(f);
  2258. while (size > 0) {
  2259. uint32_t read_chunk = MIN(size, 0x1000);
  2260. qemu_get_buffer(f, buf, read_chunk);
  2261. size -= read_chunk;
  2262. }
  2263. g_free(buf);
  2264. }
  2265. }
  2266. qemu_loadvm_state_cleanup();
  2267. cpu_synchronize_all_post_init();
  2268. return ret;
  2269. }
  2270. int qemu_load_device_state(QEMUFile *f)
  2271. {
  2272. MigrationIncomingState *mis = migration_incoming_get_current();
  2273. int ret;
  2274. /* Load QEMU_VM_SECTION_FULL section */
  2275. ret = qemu_loadvm_state_main(f, mis);
  2276. if (ret < 0) {
  2277. error_report("Failed to load device state: %d", ret);
  2278. return ret;
  2279. }
  2280. cpu_synchronize_all_post_init();
  2281. return 0;
  2282. }
  2283. int save_snapshot(const char *name, Error **errp)
  2284. {
  2285. BlockDriverState *bs, *bs1;
  2286. QEMUSnapshotInfo sn1, *sn = &sn1, old_sn1, *old_sn = &old_sn1;
  2287. int ret = -1, ret2;
  2288. QEMUFile *f;
  2289. int saved_vm_running;
  2290. uint64_t vm_state_size;
  2291. qemu_timeval tv;
  2292. struct tm tm;
  2293. AioContext *aio_context;
  2294. if (migration_is_blocked(errp)) {
  2295. return ret;
  2296. }
  2297. if (!replay_can_snapshot()) {
  2298. error_setg(errp, "Record/replay does not allow making snapshot "
  2299. "right now. Try once more later.");
  2300. return ret;
  2301. }
  2302. if (!bdrv_all_can_snapshot(&bs)) {
  2303. error_setg(errp, "Device '%s' is writable but does not support "
  2304. "snapshots", bdrv_get_device_name(bs));
  2305. return ret;
  2306. }
  2307. /* Delete old snapshots of the same name */
  2308. if (name) {
  2309. ret = bdrv_all_delete_snapshot(name, &bs1, errp);
  2310. if (ret < 0) {
  2311. error_prepend(errp, "Error while deleting snapshot on device "
  2312. "'%s': ", bdrv_get_device_name(bs1));
  2313. return ret;
  2314. }
  2315. }
  2316. bs = bdrv_all_find_vmstate_bs();
  2317. if (bs == NULL) {
  2318. error_setg(errp, "No block device can accept snapshots");
  2319. return ret;
  2320. }
  2321. aio_context = bdrv_get_aio_context(bs);
  2322. saved_vm_running = runstate_is_running();
  2323. ret = global_state_store();
  2324. if (ret) {
  2325. error_setg(errp, "Error saving global state");
  2326. return ret;
  2327. }
  2328. vm_stop(RUN_STATE_SAVE_VM);
  2329. bdrv_drain_all_begin();
  2330. aio_context_acquire(aio_context);
  2331. memset(sn, 0, sizeof(*sn));
  2332. /* fill auxiliary fields */
  2333. qemu_gettimeofday(&tv);
  2334. sn->date_sec = tv.tv_sec;
  2335. sn->date_nsec = tv.tv_usec * 1000;
  2336. sn->vm_clock_nsec = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
  2337. if (name) {
  2338. ret = bdrv_snapshot_find(bs, old_sn, name);
  2339. if (ret >= 0) {
  2340. pstrcpy(sn->name, sizeof(sn->name), old_sn->name);
  2341. pstrcpy(sn->id_str, sizeof(sn->id_str), old_sn->id_str);
  2342. } else {
  2343. pstrcpy(sn->name, sizeof(sn->name), name);
  2344. }
  2345. } else {
  2346. /* cast below needed for OpenBSD where tv_sec is still 'long' */
  2347. localtime_r((const time_t *)&tv.tv_sec, &tm);
  2348. strftime(sn->name, sizeof(sn->name), "vm-%Y%m%d%H%M%S", &tm);
  2349. }
  2350. /* save the VM state */
  2351. f = qemu_fopen_bdrv(bs, 1);
  2352. if (!f) {
  2353. error_setg(errp, "Could not open VM state file");
  2354. goto the_end;
  2355. }
  2356. ret = qemu_savevm_state(f, errp);
  2357. vm_state_size = qemu_ftell(f);
  2358. ret2 = qemu_fclose(f);
  2359. if (ret < 0) {
  2360. goto the_end;
  2361. }
  2362. if (ret2 < 0) {
  2363. ret = ret2;
  2364. goto the_end;
  2365. }
  2366. /* The bdrv_all_create_snapshot() call that follows acquires the AioContext
  2367. * for itself. BDRV_POLL_WHILE() does not support nested locking because
  2368. * it only releases the lock once. Therefore synchronous I/O will deadlock
  2369. * unless we release the AioContext before bdrv_all_create_snapshot().
  2370. */
  2371. aio_context_release(aio_context);
  2372. aio_context = NULL;
  2373. ret = bdrv_all_create_snapshot(sn, bs, vm_state_size, &bs);
  2374. if (ret < 0) {
  2375. error_setg(errp, "Error while creating snapshot on '%s'",
  2376. bdrv_get_device_name(bs));
  2377. goto the_end;
  2378. }
  2379. ret = 0;
  2380. the_end:
  2381. if (aio_context) {
  2382. aio_context_release(aio_context);
  2383. }
  2384. bdrv_drain_all_end();
  2385. if (saved_vm_running) {
  2386. vm_start();
  2387. }
  2388. return ret;
  2389. }
  2390. void qmp_xen_save_devices_state(const char *filename, bool has_live, bool live,
  2391. Error **errp)
  2392. {
  2393. QEMUFile *f;
  2394. QIOChannelFile *ioc;
  2395. int saved_vm_running;
  2396. int ret;
  2397. if (!has_live) {
  2398. /* live default to true so old version of Xen tool stack can have a
  2399. * successfull live migration */
  2400. live = true;
  2401. }
  2402. saved_vm_running = runstate_is_running();
  2403. vm_stop(RUN_STATE_SAVE_VM);
  2404. global_state_store_running();
  2405. ioc = qio_channel_file_new_path(filename, O_WRONLY | O_CREAT, 0660, errp);
  2406. if (!ioc) {
  2407. goto the_end;
  2408. }
  2409. qio_channel_set_name(QIO_CHANNEL(ioc), "migration-xen-save-state");
  2410. f = qemu_fopen_channel_output(QIO_CHANNEL(ioc));
  2411. object_unref(OBJECT(ioc));
  2412. ret = qemu_save_device_state(f);
  2413. if (ret < 0 || qemu_fclose(f) < 0) {
  2414. error_setg(errp, QERR_IO_ERROR);
  2415. } else {
  2416. /* libxl calls the QMP command "stop" before calling
  2417. * "xen-save-devices-state" and in case of migration failure, libxl
  2418. * would call "cont".
  2419. * So call bdrv_inactivate_all (release locks) here to let the other
  2420. * side of the migration take controle of the images.
  2421. */
  2422. if (live && !saved_vm_running) {
  2423. ret = bdrv_inactivate_all();
  2424. if (ret) {
  2425. error_setg(errp, "%s: bdrv_inactivate_all() failed (%d)",
  2426. __func__, ret);
  2427. }
  2428. }
  2429. }
  2430. the_end:
  2431. if (saved_vm_running) {
  2432. vm_start();
  2433. }
  2434. }
  2435. void qmp_xen_load_devices_state(const char *filename, Error **errp)
  2436. {
  2437. QEMUFile *f;
  2438. QIOChannelFile *ioc;
  2439. int ret;
  2440. /* Guest must be paused before loading the device state; the RAM state
  2441. * will already have been loaded by xc
  2442. */
  2443. if (runstate_is_running()) {
  2444. error_setg(errp, "Cannot update device state while vm is running");
  2445. return;
  2446. }
  2447. vm_stop(RUN_STATE_RESTORE_VM);
  2448. ioc = qio_channel_file_new_path(filename, O_RDONLY | O_BINARY, 0, errp);
  2449. if (!ioc) {
  2450. return;
  2451. }
  2452. qio_channel_set_name(QIO_CHANNEL(ioc), "migration-xen-load-state");
  2453. f = qemu_fopen_channel_input(QIO_CHANNEL(ioc));
  2454. object_unref(OBJECT(ioc));
  2455. ret = qemu_loadvm_state(f);
  2456. qemu_fclose(f);
  2457. if (ret < 0) {
  2458. error_setg(errp, QERR_IO_ERROR);
  2459. }
  2460. migration_incoming_state_destroy();
  2461. }
  2462. int load_snapshot(const char *name, Error **errp)
  2463. {
  2464. BlockDriverState *bs, *bs_vm_state;
  2465. QEMUSnapshotInfo sn;
  2466. QEMUFile *f;
  2467. int ret;
  2468. AioContext *aio_context;
  2469. MigrationIncomingState *mis = migration_incoming_get_current();
  2470. if (!replay_can_snapshot()) {
  2471. error_setg(errp, "Record/replay does not allow loading snapshot "
  2472. "right now. Try once more later.");
  2473. return -EINVAL;
  2474. }
  2475. if (!bdrv_all_can_snapshot(&bs)) {
  2476. error_setg(errp,
  2477. "Device '%s' is writable but does not support snapshots",
  2478. bdrv_get_device_name(bs));
  2479. return -ENOTSUP;
  2480. }
  2481. ret = bdrv_all_find_snapshot(name, &bs);
  2482. if (ret < 0) {
  2483. error_setg(errp,
  2484. "Device '%s' does not have the requested snapshot '%s'",
  2485. bdrv_get_device_name(bs), name);
  2486. return ret;
  2487. }
  2488. bs_vm_state = bdrv_all_find_vmstate_bs();
  2489. if (!bs_vm_state) {
  2490. error_setg(errp, "No block device supports snapshots");
  2491. return -ENOTSUP;
  2492. }
  2493. aio_context = bdrv_get_aio_context(bs_vm_state);
  2494. /* Don't even try to load empty VM states */
  2495. aio_context_acquire(aio_context);
  2496. ret = bdrv_snapshot_find(bs_vm_state, &sn, name);
  2497. aio_context_release(aio_context);
  2498. if (ret < 0) {
  2499. return ret;
  2500. } else if (sn.vm_state_size == 0) {
  2501. error_setg(errp, "This is a disk-only snapshot. Revert to it "
  2502. " offline using qemu-img");
  2503. return -EINVAL;
  2504. }
  2505. /* Flush all IO requests so they don't interfere with the new state. */
  2506. bdrv_drain_all_begin();
  2507. ret = bdrv_all_goto_snapshot(name, &bs, errp);
  2508. if (ret < 0) {
  2509. error_prepend(errp, "Could not load snapshot '%s' on '%s': ",
  2510. name, bdrv_get_device_name(bs));
  2511. goto err_drain;
  2512. }
  2513. /* restore the VM state */
  2514. f = qemu_fopen_bdrv(bs_vm_state, 0);
  2515. if (!f) {
  2516. error_setg(errp, "Could not open VM state file");
  2517. ret = -EINVAL;
  2518. goto err_drain;
  2519. }
  2520. qemu_system_reset(SHUTDOWN_CAUSE_NONE);
  2521. mis->from_src_file = f;
  2522. aio_context_acquire(aio_context);
  2523. ret = qemu_loadvm_state(f);
  2524. migration_incoming_state_destroy();
  2525. aio_context_release(aio_context);
  2526. bdrv_drain_all_end();
  2527. if (ret < 0) {
  2528. error_setg(errp, "Error %d while loading VM state", ret);
  2529. return ret;
  2530. }
  2531. return 0;
  2532. err_drain:
  2533. bdrv_drain_all_end();
  2534. return ret;
  2535. }
  2536. void vmstate_register_ram(MemoryRegion *mr, DeviceState *dev)
  2537. {
  2538. qemu_ram_set_idstr(mr->ram_block,
  2539. memory_region_name(mr), dev);
  2540. qemu_ram_set_migratable(mr->ram_block);
  2541. }
  2542. void vmstate_unregister_ram(MemoryRegion *mr, DeviceState *dev)
  2543. {
  2544. qemu_ram_unset_idstr(mr->ram_block);
  2545. qemu_ram_unset_migratable(mr->ram_block);
  2546. }
  2547. void vmstate_register_ram_global(MemoryRegion *mr)
  2548. {
  2549. vmstate_register_ram(mr, NULL);
  2550. }
  2551. bool vmstate_check_only_migratable(const VMStateDescription *vmsd)
  2552. {
  2553. /* check needed if --only-migratable is specified */
  2554. if (!only_migratable) {
  2555. return true;
  2556. }
  2557. return !(vmsd && vmsd->unmigratable);
  2558. }