savevm.c 84 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937
  1. /*
  2. * QEMU System Emulator
  3. *
  4. * Copyright (c) 2003-2008 Fabrice Bellard
  5. * Copyright (c) 2009-2015 Red Hat Inc
  6. *
  7. * Authors:
  8. * Juan Quintela <quintela@redhat.com>
  9. *
  10. * Permission is hereby granted, free of charge, to any person obtaining a copy
  11. * of this software and associated documentation files (the "Software"), to deal
  12. * in the Software without restriction, including without limitation the rights
  13. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  14. * copies of the Software, and to permit persons to whom the Software is
  15. * furnished to do so, subject to the following conditions:
  16. *
  17. * The above copyright notice and this permission notice shall be included in
  18. * all copies or substantial portions of the Software.
  19. *
  20. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  21. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  22. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  23. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  24. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  25. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  26. * THE SOFTWARE.
  27. */
  28. #include "qemu/osdep.h"
  29. #include "hw/boards.h"
  30. #include "hw/xen/xen.h"
  31. #include "net/net.h"
  32. #include "migration.h"
  33. #include "migration/snapshot.h"
  34. #include "migration/vmstate.h"
  35. #include "migration/misc.h"
  36. #include "migration/register.h"
  37. #include "migration/global_state.h"
  38. #include "ram.h"
  39. #include "qemu-file-channel.h"
  40. #include "qemu-file.h"
  41. #include "savevm.h"
  42. #include "postcopy-ram.h"
  43. #include "qapi/error.h"
  44. #include "qapi/qapi-commands-migration.h"
  45. #include "qapi/qapi-commands-misc.h"
  46. #include "qapi/qmp/qerror.h"
  47. #include "qemu/error-report.h"
  48. #include "sysemu/cpus.h"
  49. #include "exec/memory.h"
  50. #include "exec/target_page.h"
  51. #include "trace.h"
  52. #include "qemu/iov.h"
  53. #include "qemu/main-loop.h"
  54. #include "block/snapshot.h"
  55. #include "qemu/cutils.h"
  56. #include "io/channel-buffer.h"
  57. #include "io/channel-file.h"
  58. #include "sysemu/replay.h"
  59. #include "sysemu/runstate.h"
  60. #include "sysemu/sysemu.h"
  61. #include "qjson.h"
  62. #include "migration/colo.h"
  63. #include "qemu/bitmap.h"
  64. #include "net/announce.h"
  65. const unsigned int postcopy_ram_discard_version = 0;
  66. /* Subcommands for QEMU_VM_COMMAND */
  67. enum qemu_vm_cmd {
  68. MIG_CMD_INVALID = 0, /* Must be 0 */
  69. MIG_CMD_OPEN_RETURN_PATH, /* Tell the dest to open the Return path */
  70. MIG_CMD_PING, /* Request a PONG on the RP */
  71. MIG_CMD_POSTCOPY_ADVISE, /* Prior to any page transfers, just
  72. warn we might want to do PC */
  73. MIG_CMD_POSTCOPY_LISTEN, /* Start listening for incoming
  74. pages as it's running. */
  75. MIG_CMD_POSTCOPY_RUN, /* Start execution */
  76. MIG_CMD_POSTCOPY_RAM_DISCARD, /* A list of pages to discard that
  77. were previously sent during
  78. precopy but are dirty. */
  79. MIG_CMD_PACKAGED, /* Send a wrapped stream within this stream */
  80. MIG_CMD_ENABLE_COLO, /* Enable COLO */
  81. MIG_CMD_POSTCOPY_RESUME, /* resume postcopy on dest */
  82. MIG_CMD_RECV_BITMAP, /* Request for recved bitmap on dst */
  83. MIG_CMD_MAX
  84. };
  85. #define MAX_VM_CMD_PACKAGED_SIZE UINT32_MAX
  86. static struct mig_cmd_args {
  87. ssize_t len; /* -1 = variable */
  88. const char *name;
  89. } mig_cmd_args[] = {
  90. [MIG_CMD_INVALID] = { .len = -1, .name = "INVALID" },
  91. [MIG_CMD_OPEN_RETURN_PATH] = { .len = 0, .name = "OPEN_RETURN_PATH" },
  92. [MIG_CMD_PING] = { .len = sizeof(uint32_t), .name = "PING" },
  93. [MIG_CMD_POSTCOPY_ADVISE] = { .len = -1, .name = "POSTCOPY_ADVISE" },
  94. [MIG_CMD_POSTCOPY_LISTEN] = { .len = 0, .name = "POSTCOPY_LISTEN" },
  95. [MIG_CMD_POSTCOPY_RUN] = { .len = 0, .name = "POSTCOPY_RUN" },
  96. [MIG_CMD_POSTCOPY_RAM_DISCARD] = {
  97. .len = -1, .name = "POSTCOPY_RAM_DISCARD" },
  98. [MIG_CMD_POSTCOPY_RESUME] = { .len = 0, .name = "POSTCOPY_RESUME" },
  99. [MIG_CMD_PACKAGED] = { .len = 4, .name = "PACKAGED" },
  100. [MIG_CMD_RECV_BITMAP] = { .len = -1, .name = "RECV_BITMAP" },
  101. [MIG_CMD_MAX] = { .len = -1, .name = "MAX" },
  102. };
  103. /* Note for MIG_CMD_POSTCOPY_ADVISE:
  104. * The format of arguments is depending on postcopy mode:
  105. * - postcopy RAM only
  106. * uint64_t host page size
  107. * uint64_t taget page size
  108. *
  109. * - postcopy RAM and postcopy dirty bitmaps
  110. * format is the same as for postcopy RAM only
  111. *
  112. * - postcopy dirty bitmaps only
  113. * Nothing. Command length field is 0.
  114. *
  115. * Be careful: adding a new postcopy entity with some other parameters should
  116. * not break format self-description ability. Good way is to introduce some
  117. * generic extendable format with an exception for two old entities.
  118. */
  119. /***********************************************************/
  120. /* savevm/loadvm support */
  121. static ssize_t block_writev_buffer(void *opaque, struct iovec *iov, int iovcnt,
  122. int64_t pos, Error **errp)
  123. {
  124. int ret;
  125. QEMUIOVector qiov;
  126. qemu_iovec_init_external(&qiov, iov, iovcnt);
  127. ret = bdrv_writev_vmstate(opaque, &qiov, pos);
  128. if (ret < 0) {
  129. return ret;
  130. }
  131. return qiov.size;
  132. }
  133. static ssize_t block_get_buffer(void *opaque, uint8_t *buf, int64_t pos,
  134. size_t size, Error **errp)
  135. {
  136. return bdrv_load_vmstate(opaque, buf, pos, size);
  137. }
  138. static int bdrv_fclose(void *opaque, Error **errp)
  139. {
  140. return bdrv_flush(opaque);
  141. }
  142. static const QEMUFileOps bdrv_read_ops = {
  143. .get_buffer = block_get_buffer,
  144. .close = bdrv_fclose
  145. };
  146. static const QEMUFileOps bdrv_write_ops = {
  147. .writev_buffer = block_writev_buffer,
  148. .close = bdrv_fclose
  149. };
  150. static QEMUFile *qemu_fopen_bdrv(BlockDriverState *bs, int is_writable)
  151. {
  152. if (is_writable) {
  153. return qemu_fopen_ops(bs, &bdrv_write_ops);
  154. }
  155. return qemu_fopen_ops(bs, &bdrv_read_ops);
  156. }
  157. /* QEMUFile timer support.
  158. * Not in qemu-file.c to not add qemu-timer.c as dependency to qemu-file.c
  159. */
  160. void timer_put(QEMUFile *f, QEMUTimer *ts)
  161. {
  162. uint64_t expire_time;
  163. expire_time = timer_expire_time_ns(ts);
  164. qemu_put_be64(f, expire_time);
  165. }
  166. void timer_get(QEMUFile *f, QEMUTimer *ts)
  167. {
  168. uint64_t expire_time;
  169. expire_time = qemu_get_be64(f);
  170. if (expire_time != -1) {
  171. timer_mod_ns(ts, expire_time);
  172. } else {
  173. timer_del(ts);
  174. }
  175. }
  176. /* VMState timer support.
  177. * Not in vmstate.c to not add qemu-timer.c as dependency to vmstate.c
  178. */
  179. static int get_timer(QEMUFile *f, void *pv, size_t size,
  180. const VMStateField *field)
  181. {
  182. QEMUTimer *v = pv;
  183. timer_get(f, v);
  184. return 0;
  185. }
  186. static int put_timer(QEMUFile *f, void *pv, size_t size,
  187. const VMStateField *field, QJSON *vmdesc)
  188. {
  189. QEMUTimer *v = pv;
  190. timer_put(f, v);
  191. return 0;
  192. }
  193. const VMStateInfo vmstate_info_timer = {
  194. .name = "timer",
  195. .get = get_timer,
  196. .put = put_timer,
  197. };
  198. typedef struct CompatEntry {
  199. char idstr[256];
  200. int instance_id;
  201. } CompatEntry;
  202. typedef struct SaveStateEntry {
  203. QTAILQ_ENTRY(SaveStateEntry) entry;
  204. char idstr[256];
  205. int instance_id;
  206. int alias_id;
  207. int version_id;
  208. /* version id read from the stream */
  209. int load_version_id;
  210. int section_id;
  211. /* section id read from the stream */
  212. int load_section_id;
  213. const SaveVMHandlers *ops;
  214. const VMStateDescription *vmsd;
  215. void *opaque;
  216. CompatEntry *compat;
  217. int is_ram;
  218. } SaveStateEntry;
  219. typedef struct SaveState {
  220. QTAILQ_HEAD(, SaveStateEntry) handlers;
  221. int global_section_id;
  222. uint32_t len;
  223. const char *name;
  224. uint32_t target_page_bits;
  225. uint32_t caps_count;
  226. MigrationCapability *capabilities;
  227. QemuUUID uuid;
  228. } SaveState;
  229. static SaveState savevm_state = {
  230. .handlers = QTAILQ_HEAD_INITIALIZER(savevm_state.handlers),
  231. .global_section_id = 0,
  232. };
  233. static bool should_validate_capability(int capability)
  234. {
  235. assert(capability >= 0 && capability < MIGRATION_CAPABILITY__MAX);
  236. /* Validate only new capabilities to keep compatibility. */
  237. switch (capability) {
  238. case MIGRATION_CAPABILITY_X_IGNORE_SHARED:
  239. return true;
  240. default:
  241. return false;
  242. }
  243. }
  244. static uint32_t get_validatable_capabilities_count(void)
  245. {
  246. MigrationState *s = migrate_get_current();
  247. uint32_t result = 0;
  248. int i;
  249. for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
  250. if (should_validate_capability(i) && s->enabled_capabilities[i]) {
  251. result++;
  252. }
  253. }
  254. return result;
  255. }
  256. static int configuration_pre_save(void *opaque)
  257. {
  258. SaveState *state = opaque;
  259. const char *current_name = MACHINE_GET_CLASS(current_machine)->name;
  260. MigrationState *s = migrate_get_current();
  261. int i, j;
  262. state->len = strlen(current_name);
  263. state->name = current_name;
  264. state->target_page_bits = qemu_target_page_bits();
  265. state->caps_count = get_validatable_capabilities_count();
  266. state->capabilities = g_renew(MigrationCapability, state->capabilities,
  267. state->caps_count);
  268. for (i = j = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
  269. if (should_validate_capability(i) && s->enabled_capabilities[i]) {
  270. state->capabilities[j++] = i;
  271. }
  272. }
  273. state->uuid = qemu_uuid;
  274. return 0;
  275. }
  276. static int configuration_pre_load(void *opaque)
  277. {
  278. SaveState *state = opaque;
  279. /* If there is no target-page-bits subsection it means the source
  280. * predates the variable-target-page-bits support and is using the
  281. * minimum possible value for this CPU.
  282. */
  283. state->target_page_bits = qemu_target_page_bits_min();
  284. return 0;
  285. }
  286. static bool configuration_validate_capabilities(SaveState *state)
  287. {
  288. bool ret = true;
  289. MigrationState *s = migrate_get_current();
  290. unsigned long *source_caps_bm;
  291. int i;
  292. source_caps_bm = bitmap_new(MIGRATION_CAPABILITY__MAX);
  293. for (i = 0; i < state->caps_count; i++) {
  294. MigrationCapability capability = state->capabilities[i];
  295. set_bit(capability, source_caps_bm);
  296. }
  297. for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
  298. bool source_state, target_state;
  299. if (!should_validate_capability(i)) {
  300. continue;
  301. }
  302. source_state = test_bit(i, source_caps_bm);
  303. target_state = s->enabled_capabilities[i];
  304. if (source_state != target_state) {
  305. error_report("Capability %s is %s, but received capability is %s",
  306. MigrationCapability_str(i),
  307. target_state ? "on" : "off",
  308. source_state ? "on" : "off");
  309. ret = false;
  310. /* Don't break here to report all failed capabilities */
  311. }
  312. }
  313. g_free(source_caps_bm);
  314. return ret;
  315. }
  316. static int configuration_post_load(void *opaque, int version_id)
  317. {
  318. SaveState *state = opaque;
  319. const char *current_name = MACHINE_GET_CLASS(current_machine)->name;
  320. if (strncmp(state->name, current_name, state->len) != 0) {
  321. error_report("Machine type received is '%.*s' and local is '%s'",
  322. (int) state->len, state->name, current_name);
  323. return -EINVAL;
  324. }
  325. if (state->target_page_bits != qemu_target_page_bits()) {
  326. error_report("Received TARGET_PAGE_BITS is %d but local is %d",
  327. state->target_page_bits, qemu_target_page_bits());
  328. return -EINVAL;
  329. }
  330. if (!configuration_validate_capabilities(state)) {
  331. return -EINVAL;
  332. }
  333. return 0;
  334. }
  335. static int get_capability(QEMUFile *f, void *pv, size_t size,
  336. const VMStateField *field)
  337. {
  338. MigrationCapability *capability = pv;
  339. char capability_str[UINT8_MAX + 1];
  340. uint8_t len;
  341. int i;
  342. len = qemu_get_byte(f);
  343. qemu_get_buffer(f, (uint8_t *)capability_str, len);
  344. capability_str[len] = '\0';
  345. for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
  346. if (!strcmp(MigrationCapability_str(i), capability_str)) {
  347. *capability = i;
  348. return 0;
  349. }
  350. }
  351. error_report("Received unknown capability %s", capability_str);
  352. return -EINVAL;
  353. }
  354. static int put_capability(QEMUFile *f, void *pv, size_t size,
  355. const VMStateField *field, QJSON *vmdesc)
  356. {
  357. MigrationCapability *capability = pv;
  358. const char *capability_str = MigrationCapability_str(*capability);
  359. size_t len = strlen(capability_str);
  360. assert(len <= UINT8_MAX);
  361. qemu_put_byte(f, len);
  362. qemu_put_buffer(f, (uint8_t *)capability_str, len);
  363. return 0;
  364. }
  365. static const VMStateInfo vmstate_info_capability = {
  366. .name = "capability",
  367. .get = get_capability,
  368. .put = put_capability,
  369. };
  370. /* The target-page-bits subsection is present only if the
  371. * target page size is not the same as the default (ie the
  372. * minimum page size for a variable-page-size guest CPU).
  373. * If it is present then it contains the actual target page
  374. * bits for the machine, and migration will fail if the
  375. * two ends don't agree about it.
  376. */
  377. static bool vmstate_target_page_bits_needed(void *opaque)
  378. {
  379. return qemu_target_page_bits()
  380. > qemu_target_page_bits_min();
  381. }
  382. static const VMStateDescription vmstate_target_page_bits = {
  383. .name = "configuration/target-page-bits",
  384. .version_id = 1,
  385. .minimum_version_id = 1,
  386. .needed = vmstate_target_page_bits_needed,
  387. .fields = (VMStateField[]) {
  388. VMSTATE_UINT32(target_page_bits, SaveState),
  389. VMSTATE_END_OF_LIST()
  390. }
  391. };
  392. static bool vmstate_capabilites_needed(void *opaque)
  393. {
  394. return get_validatable_capabilities_count() > 0;
  395. }
  396. static const VMStateDescription vmstate_capabilites = {
  397. .name = "configuration/capabilities",
  398. .version_id = 1,
  399. .minimum_version_id = 1,
  400. .needed = vmstate_capabilites_needed,
  401. .fields = (VMStateField[]) {
  402. VMSTATE_UINT32_V(caps_count, SaveState, 1),
  403. VMSTATE_VARRAY_UINT32_ALLOC(capabilities, SaveState, caps_count, 1,
  404. vmstate_info_capability,
  405. MigrationCapability),
  406. VMSTATE_END_OF_LIST()
  407. }
  408. };
  409. static bool vmstate_uuid_needed(void *opaque)
  410. {
  411. return qemu_uuid_set && migrate_validate_uuid();
  412. }
  413. static int vmstate_uuid_post_load(void *opaque, int version_id)
  414. {
  415. SaveState *state = opaque;
  416. char uuid_src[UUID_FMT_LEN + 1];
  417. char uuid_dst[UUID_FMT_LEN + 1];
  418. if (!qemu_uuid_set) {
  419. /*
  420. * It's warning because user might not know UUID in some cases,
  421. * e.g. load an old snapshot
  422. */
  423. qemu_uuid_unparse(&state->uuid, uuid_src);
  424. warn_report("UUID is received %s, but local uuid isn't set",
  425. uuid_src);
  426. return 0;
  427. }
  428. if (!qemu_uuid_is_equal(&state->uuid, &qemu_uuid)) {
  429. qemu_uuid_unparse(&state->uuid, uuid_src);
  430. qemu_uuid_unparse(&qemu_uuid, uuid_dst);
  431. error_report("UUID received is %s and local is %s", uuid_src, uuid_dst);
  432. return -EINVAL;
  433. }
  434. return 0;
  435. }
  436. static const VMStateDescription vmstate_uuid = {
  437. .name = "configuration/uuid",
  438. .version_id = 1,
  439. .minimum_version_id = 1,
  440. .needed = vmstate_uuid_needed,
  441. .post_load = vmstate_uuid_post_load,
  442. .fields = (VMStateField[]) {
  443. VMSTATE_UINT8_ARRAY_V(uuid.data, SaveState, sizeof(QemuUUID), 1),
  444. VMSTATE_END_OF_LIST()
  445. }
  446. };
  447. static const VMStateDescription vmstate_configuration = {
  448. .name = "configuration",
  449. .version_id = 1,
  450. .pre_load = configuration_pre_load,
  451. .post_load = configuration_post_load,
  452. .pre_save = configuration_pre_save,
  453. .fields = (VMStateField[]) {
  454. VMSTATE_UINT32(len, SaveState),
  455. VMSTATE_VBUFFER_ALLOC_UINT32(name, SaveState, 0, NULL, len),
  456. VMSTATE_END_OF_LIST()
  457. },
  458. .subsections = (const VMStateDescription*[]) {
  459. &vmstate_target_page_bits,
  460. &vmstate_capabilites,
  461. &vmstate_uuid,
  462. NULL
  463. }
  464. };
  465. static void dump_vmstate_vmsd(FILE *out_file,
  466. const VMStateDescription *vmsd, int indent,
  467. bool is_subsection);
  468. static void dump_vmstate_vmsf(FILE *out_file, const VMStateField *field,
  469. int indent)
  470. {
  471. fprintf(out_file, "%*s{\n", indent, "");
  472. indent += 2;
  473. fprintf(out_file, "%*s\"field\": \"%s\",\n", indent, "", field->name);
  474. fprintf(out_file, "%*s\"version_id\": %d,\n", indent, "",
  475. field->version_id);
  476. fprintf(out_file, "%*s\"field_exists\": %s,\n", indent, "",
  477. field->field_exists ? "true" : "false");
  478. fprintf(out_file, "%*s\"size\": %zu", indent, "", field->size);
  479. if (field->vmsd != NULL) {
  480. fprintf(out_file, ",\n");
  481. dump_vmstate_vmsd(out_file, field->vmsd, indent, false);
  482. }
  483. fprintf(out_file, "\n%*s}", indent - 2, "");
  484. }
  485. static void dump_vmstate_vmss(FILE *out_file,
  486. const VMStateDescription **subsection,
  487. int indent)
  488. {
  489. if (*subsection != NULL) {
  490. dump_vmstate_vmsd(out_file, *subsection, indent, true);
  491. }
  492. }
  493. static void dump_vmstate_vmsd(FILE *out_file,
  494. const VMStateDescription *vmsd, int indent,
  495. bool is_subsection)
  496. {
  497. if (is_subsection) {
  498. fprintf(out_file, "%*s{\n", indent, "");
  499. } else {
  500. fprintf(out_file, "%*s\"%s\": {\n", indent, "", "Description");
  501. }
  502. indent += 2;
  503. fprintf(out_file, "%*s\"name\": \"%s\",\n", indent, "", vmsd->name);
  504. fprintf(out_file, "%*s\"version_id\": %d,\n", indent, "",
  505. vmsd->version_id);
  506. fprintf(out_file, "%*s\"minimum_version_id\": %d", indent, "",
  507. vmsd->minimum_version_id);
  508. if (vmsd->fields != NULL) {
  509. const VMStateField *field = vmsd->fields;
  510. bool first;
  511. fprintf(out_file, ",\n%*s\"Fields\": [\n", indent, "");
  512. first = true;
  513. while (field->name != NULL) {
  514. if (field->flags & VMS_MUST_EXIST) {
  515. /* Ignore VMSTATE_VALIDATE bits; these don't get migrated */
  516. field++;
  517. continue;
  518. }
  519. if (!first) {
  520. fprintf(out_file, ",\n");
  521. }
  522. dump_vmstate_vmsf(out_file, field, indent + 2);
  523. field++;
  524. first = false;
  525. }
  526. fprintf(out_file, "\n%*s]", indent, "");
  527. }
  528. if (vmsd->subsections != NULL) {
  529. const VMStateDescription **subsection = vmsd->subsections;
  530. bool first;
  531. fprintf(out_file, ",\n%*s\"Subsections\": [\n", indent, "");
  532. first = true;
  533. while (*subsection != NULL) {
  534. if (!first) {
  535. fprintf(out_file, ",\n");
  536. }
  537. dump_vmstate_vmss(out_file, subsection, indent + 2);
  538. subsection++;
  539. first = false;
  540. }
  541. fprintf(out_file, "\n%*s]", indent, "");
  542. }
  543. fprintf(out_file, "\n%*s}", indent - 2, "");
  544. }
  545. static void dump_machine_type(FILE *out_file)
  546. {
  547. MachineClass *mc;
  548. mc = MACHINE_GET_CLASS(current_machine);
  549. fprintf(out_file, " \"vmschkmachine\": {\n");
  550. fprintf(out_file, " \"Name\": \"%s\"\n", mc->name);
  551. fprintf(out_file, " },\n");
  552. }
  553. void dump_vmstate_json_to_file(FILE *out_file)
  554. {
  555. GSList *list, *elt;
  556. bool first;
  557. fprintf(out_file, "{\n");
  558. dump_machine_type(out_file);
  559. first = true;
  560. list = object_class_get_list(TYPE_DEVICE, true);
  561. for (elt = list; elt; elt = elt->next) {
  562. DeviceClass *dc = OBJECT_CLASS_CHECK(DeviceClass, elt->data,
  563. TYPE_DEVICE);
  564. const char *name;
  565. int indent = 2;
  566. if (!dc->vmsd) {
  567. continue;
  568. }
  569. if (!first) {
  570. fprintf(out_file, ",\n");
  571. }
  572. name = object_class_get_name(OBJECT_CLASS(dc));
  573. fprintf(out_file, "%*s\"%s\": {\n", indent, "", name);
  574. indent += 2;
  575. fprintf(out_file, "%*s\"Name\": \"%s\",\n", indent, "", name);
  576. fprintf(out_file, "%*s\"version_id\": %d,\n", indent, "",
  577. dc->vmsd->version_id);
  578. fprintf(out_file, "%*s\"minimum_version_id\": %d,\n", indent, "",
  579. dc->vmsd->minimum_version_id);
  580. dump_vmstate_vmsd(out_file, dc->vmsd, indent, false);
  581. fprintf(out_file, "\n%*s}", indent - 2, "");
  582. first = false;
  583. }
  584. fprintf(out_file, "\n}\n");
  585. fclose(out_file);
  586. }
  587. static int calculate_new_instance_id(const char *idstr)
  588. {
  589. SaveStateEntry *se;
  590. int instance_id = 0;
  591. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  592. if (strcmp(idstr, se->idstr) == 0
  593. && instance_id <= se->instance_id) {
  594. instance_id = se->instance_id + 1;
  595. }
  596. }
  597. return instance_id;
  598. }
  599. static int calculate_compat_instance_id(const char *idstr)
  600. {
  601. SaveStateEntry *se;
  602. int instance_id = 0;
  603. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  604. if (!se->compat) {
  605. continue;
  606. }
  607. if (strcmp(idstr, se->compat->idstr) == 0
  608. && instance_id <= se->compat->instance_id) {
  609. instance_id = se->compat->instance_id + 1;
  610. }
  611. }
  612. return instance_id;
  613. }
  614. static inline MigrationPriority save_state_priority(SaveStateEntry *se)
  615. {
  616. if (se->vmsd) {
  617. return se->vmsd->priority;
  618. }
  619. return MIG_PRI_DEFAULT;
  620. }
  621. static void savevm_state_handler_insert(SaveStateEntry *nse)
  622. {
  623. MigrationPriority priority = save_state_priority(nse);
  624. SaveStateEntry *se;
  625. assert(priority <= MIG_PRI_MAX);
  626. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  627. if (save_state_priority(se) < priority) {
  628. break;
  629. }
  630. }
  631. if (se) {
  632. QTAILQ_INSERT_BEFORE(se, nse, entry);
  633. } else {
  634. QTAILQ_INSERT_TAIL(&savevm_state.handlers, nse, entry);
  635. }
  636. }
  637. /* TODO: Individual devices generally have very little idea about the rest
  638. of the system, so instance_id should be removed/replaced.
  639. Meanwhile pass -1 as instance_id if you do not already have a clearly
  640. distinguishing id for all instances of your device class. */
  641. int register_savevm_live(const char *idstr,
  642. int instance_id,
  643. int version_id,
  644. const SaveVMHandlers *ops,
  645. void *opaque)
  646. {
  647. SaveStateEntry *se;
  648. se = g_new0(SaveStateEntry, 1);
  649. se->version_id = version_id;
  650. se->section_id = savevm_state.global_section_id++;
  651. se->ops = ops;
  652. se->opaque = opaque;
  653. se->vmsd = NULL;
  654. /* if this is a live_savem then set is_ram */
  655. if (ops->save_setup != NULL) {
  656. se->is_ram = 1;
  657. }
  658. pstrcat(se->idstr, sizeof(se->idstr), idstr);
  659. if (instance_id == -1) {
  660. se->instance_id = calculate_new_instance_id(se->idstr);
  661. } else {
  662. se->instance_id = instance_id;
  663. }
  664. assert(!se->compat || se->instance_id == 0);
  665. savevm_state_handler_insert(se);
  666. return 0;
  667. }
  668. void unregister_savevm(DeviceState *dev, const char *idstr, void *opaque)
  669. {
  670. SaveStateEntry *se, *new_se;
  671. char id[256] = "";
  672. if (dev) {
  673. char *path = qdev_get_dev_path(dev);
  674. if (path) {
  675. pstrcpy(id, sizeof(id), path);
  676. pstrcat(id, sizeof(id), "/");
  677. g_free(path);
  678. }
  679. }
  680. pstrcat(id, sizeof(id), idstr);
  681. QTAILQ_FOREACH_SAFE(se, &savevm_state.handlers, entry, new_se) {
  682. if (strcmp(se->idstr, id) == 0 && se->opaque == opaque) {
  683. QTAILQ_REMOVE(&savevm_state.handlers, se, entry);
  684. g_free(se->compat);
  685. g_free(se);
  686. }
  687. }
  688. }
  689. int vmstate_register_with_alias_id(DeviceState *dev, int instance_id,
  690. const VMStateDescription *vmsd,
  691. void *opaque, int alias_id,
  692. int required_for_version,
  693. Error **errp)
  694. {
  695. SaveStateEntry *se;
  696. /* If this triggers, alias support can be dropped for the vmsd. */
  697. assert(alias_id == -1 || required_for_version >= vmsd->minimum_version_id);
  698. se = g_new0(SaveStateEntry, 1);
  699. se->version_id = vmsd->version_id;
  700. se->section_id = savevm_state.global_section_id++;
  701. se->opaque = opaque;
  702. se->vmsd = vmsd;
  703. se->alias_id = alias_id;
  704. if (dev) {
  705. char *id = qdev_get_dev_path(dev);
  706. if (id) {
  707. if (snprintf(se->idstr, sizeof(se->idstr), "%s/", id) >=
  708. sizeof(se->idstr)) {
  709. error_setg(errp, "Path too long for VMState (%s)", id);
  710. g_free(id);
  711. g_free(se);
  712. return -1;
  713. }
  714. g_free(id);
  715. se->compat = g_new0(CompatEntry, 1);
  716. pstrcpy(se->compat->idstr, sizeof(se->compat->idstr), vmsd->name);
  717. se->compat->instance_id = instance_id == -1 ?
  718. calculate_compat_instance_id(vmsd->name) : instance_id;
  719. instance_id = -1;
  720. }
  721. }
  722. pstrcat(se->idstr, sizeof(se->idstr), vmsd->name);
  723. if (instance_id == -1) {
  724. se->instance_id = calculate_new_instance_id(se->idstr);
  725. } else {
  726. se->instance_id = instance_id;
  727. }
  728. assert(!se->compat || se->instance_id == 0);
  729. savevm_state_handler_insert(se);
  730. return 0;
  731. }
  732. void vmstate_unregister(DeviceState *dev, const VMStateDescription *vmsd,
  733. void *opaque)
  734. {
  735. SaveStateEntry *se, *new_se;
  736. QTAILQ_FOREACH_SAFE(se, &savevm_state.handlers, entry, new_se) {
  737. if (se->vmsd == vmsd && se->opaque == opaque) {
  738. QTAILQ_REMOVE(&savevm_state.handlers, se, entry);
  739. g_free(se->compat);
  740. g_free(se);
  741. }
  742. }
  743. }
  744. static int vmstate_load(QEMUFile *f, SaveStateEntry *se)
  745. {
  746. trace_vmstate_load(se->idstr, se->vmsd ? se->vmsd->name : "(old)");
  747. if (!se->vmsd) { /* Old style */
  748. return se->ops->load_state(f, se->opaque, se->load_version_id);
  749. }
  750. return vmstate_load_state(f, se->vmsd, se->opaque, se->load_version_id);
  751. }
  752. static void vmstate_save_old_style(QEMUFile *f, SaveStateEntry *se, QJSON *vmdesc)
  753. {
  754. int64_t old_offset, size;
  755. old_offset = qemu_ftell_fast(f);
  756. se->ops->save_state(f, se->opaque);
  757. size = qemu_ftell_fast(f) - old_offset;
  758. if (vmdesc) {
  759. json_prop_int(vmdesc, "size", size);
  760. json_start_array(vmdesc, "fields");
  761. json_start_object(vmdesc, NULL);
  762. json_prop_str(vmdesc, "name", "data");
  763. json_prop_int(vmdesc, "size", size);
  764. json_prop_str(vmdesc, "type", "buffer");
  765. json_end_object(vmdesc);
  766. json_end_array(vmdesc);
  767. }
  768. }
  769. static int vmstate_save(QEMUFile *f, SaveStateEntry *se, QJSON *vmdesc)
  770. {
  771. trace_vmstate_save(se->idstr, se->vmsd ? se->vmsd->name : "(old)");
  772. if (!se->vmsd) {
  773. vmstate_save_old_style(f, se, vmdesc);
  774. return 0;
  775. }
  776. return vmstate_save_state(f, se->vmsd, se->opaque, vmdesc);
  777. }
  778. /*
  779. * Write the header for device section (QEMU_VM_SECTION START/END/PART/FULL)
  780. */
  781. static void save_section_header(QEMUFile *f, SaveStateEntry *se,
  782. uint8_t section_type)
  783. {
  784. qemu_put_byte(f, section_type);
  785. qemu_put_be32(f, se->section_id);
  786. if (section_type == QEMU_VM_SECTION_FULL ||
  787. section_type == QEMU_VM_SECTION_START) {
  788. /* ID string */
  789. size_t len = strlen(se->idstr);
  790. qemu_put_byte(f, len);
  791. qemu_put_buffer(f, (uint8_t *)se->idstr, len);
  792. qemu_put_be32(f, se->instance_id);
  793. qemu_put_be32(f, se->version_id);
  794. }
  795. }
  796. /*
  797. * Write a footer onto device sections that catches cases misformatted device
  798. * sections.
  799. */
  800. static void save_section_footer(QEMUFile *f, SaveStateEntry *se)
  801. {
  802. if (migrate_get_current()->send_section_footer) {
  803. qemu_put_byte(f, QEMU_VM_SECTION_FOOTER);
  804. qemu_put_be32(f, se->section_id);
  805. }
  806. }
  807. /**
  808. * qemu_savevm_command_send: Send a 'QEMU_VM_COMMAND' type element with the
  809. * command and associated data.
  810. *
  811. * @f: File to send command on
  812. * @command: Command type to send
  813. * @len: Length of associated data
  814. * @data: Data associated with command.
  815. */
  816. static void qemu_savevm_command_send(QEMUFile *f,
  817. enum qemu_vm_cmd command,
  818. uint16_t len,
  819. uint8_t *data)
  820. {
  821. trace_savevm_command_send(command, len);
  822. qemu_put_byte(f, QEMU_VM_COMMAND);
  823. qemu_put_be16(f, (uint16_t)command);
  824. qemu_put_be16(f, len);
  825. qemu_put_buffer(f, data, len);
  826. qemu_fflush(f);
  827. }
  828. void qemu_savevm_send_colo_enable(QEMUFile *f)
  829. {
  830. trace_savevm_send_colo_enable();
  831. qemu_savevm_command_send(f, MIG_CMD_ENABLE_COLO, 0, NULL);
  832. }
  833. void qemu_savevm_send_ping(QEMUFile *f, uint32_t value)
  834. {
  835. uint32_t buf;
  836. trace_savevm_send_ping(value);
  837. buf = cpu_to_be32(value);
  838. qemu_savevm_command_send(f, MIG_CMD_PING, sizeof(value), (uint8_t *)&buf);
  839. }
  840. void qemu_savevm_send_open_return_path(QEMUFile *f)
  841. {
  842. trace_savevm_send_open_return_path();
  843. qemu_savevm_command_send(f, MIG_CMD_OPEN_RETURN_PATH, 0, NULL);
  844. }
  845. /* We have a buffer of data to send; we don't want that all to be loaded
  846. * by the command itself, so the command contains just the length of the
  847. * extra buffer that we then send straight after it.
  848. * TODO: Must be a better way to organise that
  849. *
  850. * Returns:
  851. * 0 on success
  852. * -ve on error
  853. */
  854. int qemu_savevm_send_packaged(QEMUFile *f, const uint8_t *buf, size_t len)
  855. {
  856. uint32_t tmp;
  857. if (len > MAX_VM_CMD_PACKAGED_SIZE) {
  858. error_report("%s: Unreasonably large packaged state: %zu",
  859. __func__, len);
  860. return -1;
  861. }
  862. tmp = cpu_to_be32(len);
  863. trace_qemu_savevm_send_packaged();
  864. qemu_savevm_command_send(f, MIG_CMD_PACKAGED, 4, (uint8_t *)&tmp);
  865. qemu_put_buffer(f, buf, len);
  866. return 0;
  867. }
  868. /* Send prior to any postcopy transfer */
  869. void qemu_savevm_send_postcopy_advise(QEMUFile *f)
  870. {
  871. if (migrate_postcopy_ram()) {
  872. uint64_t tmp[2];
  873. tmp[0] = cpu_to_be64(ram_pagesize_summary());
  874. tmp[1] = cpu_to_be64(qemu_target_page_size());
  875. trace_qemu_savevm_send_postcopy_advise();
  876. qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_ADVISE,
  877. 16, (uint8_t *)tmp);
  878. } else {
  879. qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_ADVISE, 0, NULL);
  880. }
  881. }
  882. /* Sent prior to starting the destination running in postcopy, discard pages
  883. * that have already been sent but redirtied on the source.
  884. * CMD_POSTCOPY_RAM_DISCARD consist of:
  885. * byte version (0)
  886. * byte Length of name field (not including 0)
  887. * n x byte RAM block name
  888. * byte 0 terminator (just for safety)
  889. * n x Byte ranges within the named RAMBlock
  890. * be64 Start of the range
  891. * be64 Length
  892. *
  893. * name: RAMBlock name that these entries are part of
  894. * len: Number of page entries
  895. * start_list: 'len' addresses
  896. * length_list: 'len' addresses
  897. *
  898. */
  899. void qemu_savevm_send_postcopy_ram_discard(QEMUFile *f, const char *name,
  900. uint16_t len,
  901. uint64_t *start_list,
  902. uint64_t *length_list)
  903. {
  904. uint8_t *buf;
  905. uint16_t tmplen;
  906. uint16_t t;
  907. size_t name_len = strlen(name);
  908. trace_qemu_savevm_send_postcopy_ram_discard(name, len);
  909. assert(name_len < 256);
  910. buf = g_malloc0(1 + 1 + name_len + 1 + (8 + 8) * len);
  911. buf[0] = postcopy_ram_discard_version;
  912. buf[1] = name_len;
  913. memcpy(buf + 2, name, name_len);
  914. tmplen = 2 + name_len;
  915. buf[tmplen++] = '\0';
  916. for (t = 0; t < len; t++) {
  917. stq_be_p(buf + tmplen, start_list[t]);
  918. tmplen += 8;
  919. stq_be_p(buf + tmplen, length_list[t]);
  920. tmplen += 8;
  921. }
  922. qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RAM_DISCARD, tmplen, buf);
  923. g_free(buf);
  924. }
  925. /* Get the destination into a state where it can receive postcopy data. */
  926. void qemu_savevm_send_postcopy_listen(QEMUFile *f)
  927. {
  928. trace_savevm_send_postcopy_listen();
  929. qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_LISTEN, 0, NULL);
  930. }
  931. /* Kick the destination into running */
  932. void qemu_savevm_send_postcopy_run(QEMUFile *f)
  933. {
  934. trace_savevm_send_postcopy_run();
  935. qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RUN, 0, NULL);
  936. }
  937. void qemu_savevm_send_postcopy_resume(QEMUFile *f)
  938. {
  939. trace_savevm_send_postcopy_resume();
  940. qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RESUME, 0, NULL);
  941. }
  942. void qemu_savevm_send_recv_bitmap(QEMUFile *f, char *block_name)
  943. {
  944. size_t len;
  945. char buf[256];
  946. trace_savevm_send_recv_bitmap(block_name);
  947. buf[0] = len = strlen(block_name);
  948. memcpy(buf + 1, block_name, len);
  949. qemu_savevm_command_send(f, MIG_CMD_RECV_BITMAP, len + 1, (uint8_t *)buf);
  950. }
  951. bool qemu_savevm_state_blocked(Error **errp)
  952. {
  953. SaveStateEntry *se;
  954. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  955. if (se->vmsd && se->vmsd->unmigratable) {
  956. error_setg(errp, "State blocked by non-migratable device '%s'",
  957. se->idstr);
  958. return true;
  959. }
  960. }
  961. return false;
  962. }
  963. void qemu_savevm_state_header(QEMUFile *f)
  964. {
  965. trace_savevm_state_header();
  966. qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
  967. qemu_put_be32(f, QEMU_VM_FILE_VERSION);
  968. if (migrate_get_current()->send_configuration) {
  969. qemu_put_byte(f, QEMU_VM_CONFIGURATION);
  970. vmstate_save_state(f, &vmstate_configuration, &savevm_state, 0);
  971. }
  972. }
  973. int qemu_savevm_nr_failover_devices(void)
  974. {
  975. SaveStateEntry *se;
  976. int n = 0;
  977. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  978. if (se->vmsd && se->vmsd->dev_unplug_pending &&
  979. se->vmsd->dev_unplug_pending(se->opaque)) {
  980. n++;
  981. }
  982. }
  983. return n;
  984. }
  985. bool qemu_savevm_state_guest_unplug_pending(void)
  986. {
  987. SaveStateEntry *se;
  988. int n = 0;
  989. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  990. if (!se->vmsd || !se->vmsd->dev_unplug_pending) {
  991. continue;
  992. }
  993. if (se->vmsd->dev_unplug_pending(se->opaque)) {
  994. n++;
  995. }
  996. }
  997. return n > 0;
  998. }
  999. void qemu_savevm_state_setup(QEMUFile *f)
  1000. {
  1001. SaveStateEntry *se;
  1002. Error *local_err = NULL;
  1003. int ret;
  1004. trace_savevm_state_setup();
  1005. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  1006. if (!se->ops || !se->ops->save_setup) {
  1007. continue;
  1008. }
  1009. if (se->ops->is_active) {
  1010. if (!se->ops->is_active(se->opaque)) {
  1011. continue;
  1012. }
  1013. }
  1014. save_section_header(f, se, QEMU_VM_SECTION_START);
  1015. ret = se->ops->save_setup(f, se->opaque);
  1016. save_section_footer(f, se);
  1017. if (ret < 0) {
  1018. qemu_file_set_error(f, ret);
  1019. break;
  1020. }
  1021. }
  1022. if (precopy_notify(PRECOPY_NOTIFY_SETUP, &local_err)) {
  1023. error_report_err(local_err);
  1024. }
  1025. }
  1026. int qemu_savevm_state_resume_prepare(MigrationState *s)
  1027. {
  1028. SaveStateEntry *se;
  1029. int ret;
  1030. trace_savevm_state_resume_prepare();
  1031. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  1032. if (!se->ops || !se->ops->resume_prepare) {
  1033. continue;
  1034. }
  1035. if (se->ops->is_active) {
  1036. if (!se->ops->is_active(se->opaque)) {
  1037. continue;
  1038. }
  1039. }
  1040. ret = se->ops->resume_prepare(s, se->opaque);
  1041. if (ret < 0) {
  1042. return ret;
  1043. }
  1044. }
  1045. return 0;
  1046. }
  1047. /*
  1048. * this function has three return values:
  1049. * negative: there was one error, and we have -errno.
  1050. * 0 : We haven't finished, caller have to go again
  1051. * 1 : We have finished, we can go to complete phase
  1052. */
  1053. int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy)
  1054. {
  1055. SaveStateEntry *se;
  1056. int ret = 1;
  1057. trace_savevm_state_iterate();
  1058. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  1059. if (!se->ops || !se->ops->save_live_iterate) {
  1060. continue;
  1061. }
  1062. if (se->ops->is_active &&
  1063. !se->ops->is_active(se->opaque)) {
  1064. continue;
  1065. }
  1066. if (se->ops->is_active_iterate &&
  1067. !se->ops->is_active_iterate(se->opaque)) {
  1068. continue;
  1069. }
  1070. /*
  1071. * In the postcopy phase, any device that doesn't know how to
  1072. * do postcopy should have saved it's state in the _complete
  1073. * call that's already run, it might get confused if we call
  1074. * iterate afterwards.
  1075. */
  1076. if (postcopy &&
  1077. !(se->ops->has_postcopy && se->ops->has_postcopy(se->opaque))) {
  1078. continue;
  1079. }
  1080. if (qemu_file_rate_limit(f)) {
  1081. return 0;
  1082. }
  1083. trace_savevm_section_start(se->idstr, se->section_id);
  1084. save_section_header(f, se, QEMU_VM_SECTION_PART);
  1085. ret = se->ops->save_live_iterate(f, se->opaque);
  1086. trace_savevm_section_end(se->idstr, se->section_id, ret);
  1087. save_section_footer(f, se);
  1088. if (ret < 0) {
  1089. error_report("failed to save SaveStateEntry with id(name): %d(%s)",
  1090. se->section_id, se->idstr);
  1091. qemu_file_set_error(f, ret);
  1092. }
  1093. if (ret <= 0) {
  1094. /* Do not proceed to the next vmstate before this one reported
  1095. completion of the current stage. This serializes the migration
  1096. and reduces the probability that a faster changing state is
  1097. synchronized over and over again. */
  1098. break;
  1099. }
  1100. }
  1101. return ret;
  1102. }
  1103. static bool should_send_vmdesc(void)
  1104. {
  1105. MachineState *machine = MACHINE(qdev_get_machine());
  1106. bool in_postcopy = migration_in_postcopy();
  1107. return !machine->suppress_vmdesc && !in_postcopy;
  1108. }
  1109. /*
  1110. * Calls the save_live_complete_postcopy methods
  1111. * causing the last few pages to be sent immediately and doing any associated
  1112. * cleanup.
  1113. * Note postcopy also calls qemu_savevm_state_complete_precopy to complete
  1114. * all the other devices, but that happens at the point we switch to postcopy.
  1115. */
  1116. void qemu_savevm_state_complete_postcopy(QEMUFile *f)
  1117. {
  1118. SaveStateEntry *se;
  1119. int ret;
  1120. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  1121. if (!se->ops || !se->ops->save_live_complete_postcopy) {
  1122. continue;
  1123. }
  1124. if (se->ops->is_active) {
  1125. if (!se->ops->is_active(se->opaque)) {
  1126. continue;
  1127. }
  1128. }
  1129. trace_savevm_section_start(se->idstr, se->section_id);
  1130. /* Section type */
  1131. qemu_put_byte(f, QEMU_VM_SECTION_END);
  1132. qemu_put_be32(f, se->section_id);
  1133. ret = se->ops->save_live_complete_postcopy(f, se->opaque);
  1134. trace_savevm_section_end(se->idstr, se->section_id, ret);
  1135. save_section_footer(f, se);
  1136. if (ret < 0) {
  1137. qemu_file_set_error(f, ret);
  1138. return;
  1139. }
  1140. }
  1141. qemu_put_byte(f, QEMU_VM_EOF);
  1142. qemu_fflush(f);
  1143. }
  1144. static
  1145. int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy)
  1146. {
  1147. SaveStateEntry *se;
  1148. int ret;
  1149. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  1150. if (!se->ops ||
  1151. (in_postcopy && se->ops->has_postcopy &&
  1152. se->ops->has_postcopy(se->opaque)) ||
  1153. !se->ops->save_live_complete_precopy) {
  1154. continue;
  1155. }
  1156. if (se->ops->is_active) {
  1157. if (!se->ops->is_active(se->opaque)) {
  1158. continue;
  1159. }
  1160. }
  1161. trace_savevm_section_start(se->idstr, se->section_id);
  1162. save_section_header(f, se, QEMU_VM_SECTION_END);
  1163. ret = se->ops->save_live_complete_precopy(f, se->opaque);
  1164. trace_savevm_section_end(se->idstr, se->section_id, ret);
  1165. save_section_footer(f, se);
  1166. if (ret < 0) {
  1167. qemu_file_set_error(f, ret);
  1168. return -1;
  1169. }
  1170. }
  1171. return 0;
  1172. }
  1173. static
  1174. int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
  1175. bool in_postcopy,
  1176. bool inactivate_disks)
  1177. {
  1178. g_autoptr(QJSON) vmdesc = NULL;
  1179. int vmdesc_len;
  1180. SaveStateEntry *se;
  1181. int ret;
  1182. vmdesc = qjson_new();
  1183. json_prop_int(vmdesc, "page_size", qemu_target_page_size());
  1184. json_start_array(vmdesc, "devices");
  1185. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  1186. if ((!se->ops || !se->ops->save_state) && !se->vmsd) {
  1187. continue;
  1188. }
  1189. if (se->vmsd && !vmstate_save_needed(se->vmsd, se->opaque)) {
  1190. trace_savevm_section_skip(se->idstr, se->section_id);
  1191. continue;
  1192. }
  1193. trace_savevm_section_start(se->idstr, se->section_id);
  1194. json_start_object(vmdesc, NULL);
  1195. json_prop_str(vmdesc, "name", se->idstr);
  1196. json_prop_int(vmdesc, "instance_id", se->instance_id);
  1197. save_section_header(f, se, QEMU_VM_SECTION_FULL);
  1198. ret = vmstate_save(f, se, vmdesc);
  1199. if (ret) {
  1200. qemu_file_set_error(f, ret);
  1201. return ret;
  1202. }
  1203. trace_savevm_section_end(se->idstr, se->section_id, 0);
  1204. save_section_footer(f, se);
  1205. json_end_object(vmdesc);
  1206. }
  1207. if (inactivate_disks) {
  1208. /* Inactivate before sending QEMU_VM_EOF so that the
  1209. * bdrv_invalidate_cache_all() on the other end won't fail. */
  1210. ret = bdrv_inactivate_all();
  1211. if (ret) {
  1212. error_report("%s: bdrv_inactivate_all() failed (%d)",
  1213. __func__, ret);
  1214. qemu_file_set_error(f, ret);
  1215. return ret;
  1216. }
  1217. }
  1218. if (!in_postcopy) {
  1219. /* Postcopy stream will still be going */
  1220. qemu_put_byte(f, QEMU_VM_EOF);
  1221. }
  1222. json_end_array(vmdesc);
  1223. qjson_finish(vmdesc);
  1224. vmdesc_len = strlen(qjson_get_str(vmdesc));
  1225. if (should_send_vmdesc()) {
  1226. qemu_put_byte(f, QEMU_VM_VMDESCRIPTION);
  1227. qemu_put_be32(f, vmdesc_len);
  1228. qemu_put_buffer(f, (uint8_t *)qjson_get_str(vmdesc), vmdesc_len);
  1229. }
  1230. return 0;
  1231. }
  1232. int qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only,
  1233. bool inactivate_disks)
  1234. {
  1235. int ret;
  1236. Error *local_err = NULL;
  1237. bool in_postcopy = migration_in_postcopy();
  1238. if (precopy_notify(PRECOPY_NOTIFY_COMPLETE, &local_err)) {
  1239. error_report_err(local_err);
  1240. }
  1241. trace_savevm_state_complete_precopy();
  1242. cpu_synchronize_all_states();
  1243. if (!in_postcopy || iterable_only) {
  1244. ret = qemu_savevm_state_complete_precopy_iterable(f, in_postcopy);
  1245. if (ret) {
  1246. return ret;
  1247. }
  1248. }
  1249. if (iterable_only) {
  1250. goto flush;
  1251. }
  1252. ret = qemu_savevm_state_complete_precopy_non_iterable(f, in_postcopy,
  1253. inactivate_disks);
  1254. if (ret) {
  1255. return ret;
  1256. }
  1257. flush:
  1258. qemu_fflush(f);
  1259. return 0;
  1260. }
  1261. /* Give an estimate of the amount left to be transferred,
  1262. * the result is split into the amount for units that can and
  1263. * for units that can't do postcopy.
  1264. */
  1265. void qemu_savevm_state_pending(QEMUFile *f, uint64_t threshold_size,
  1266. uint64_t *res_precopy_only,
  1267. uint64_t *res_compatible,
  1268. uint64_t *res_postcopy_only)
  1269. {
  1270. SaveStateEntry *se;
  1271. *res_precopy_only = 0;
  1272. *res_compatible = 0;
  1273. *res_postcopy_only = 0;
  1274. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  1275. if (!se->ops || !se->ops->save_live_pending) {
  1276. continue;
  1277. }
  1278. if (se->ops->is_active) {
  1279. if (!se->ops->is_active(se->opaque)) {
  1280. continue;
  1281. }
  1282. }
  1283. se->ops->save_live_pending(f, se->opaque, threshold_size,
  1284. res_precopy_only, res_compatible,
  1285. res_postcopy_only);
  1286. }
  1287. }
  1288. void qemu_savevm_state_cleanup(void)
  1289. {
  1290. SaveStateEntry *se;
  1291. Error *local_err = NULL;
  1292. if (precopy_notify(PRECOPY_NOTIFY_CLEANUP, &local_err)) {
  1293. error_report_err(local_err);
  1294. }
  1295. trace_savevm_state_cleanup();
  1296. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  1297. if (se->ops && se->ops->save_cleanup) {
  1298. se->ops->save_cleanup(se->opaque);
  1299. }
  1300. }
  1301. }
  1302. static int qemu_savevm_state(QEMUFile *f, Error **errp)
  1303. {
  1304. int ret;
  1305. MigrationState *ms = migrate_get_current();
  1306. MigrationStatus status;
  1307. if (migration_is_setup_or_active(ms->state) ||
  1308. ms->state == MIGRATION_STATUS_CANCELLING ||
  1309. ms->state == MIGRATION_STATUS_COLO) {
  1310. error_setg(errp, QERR_MIGRATION_ACTIVE);
  1311. return -EINVAL;
  1312. }
  1313. if (migrate_use_block()) {
  1314. error_setg(errp, "Block migration and snapshots are incompatible");
  1315. return -EINVAL;
  1316. }
  1317. migrate_init(ms);
  1318. memset(&ram_counters, 0, sizeof(ram_counters));
  1319. ms->to_dst_file = f;
  1320. qemu_mutex_unlock_iothread();
  1321. qemu_savevm_state_header(f);
  1322. qemu_savevm_state_setup(f);
  1323. qemu_mutex_lock_iothread();
  1324. while (qemu_file_get_error(f) == 0) {
  1325. if (qemu_savevm_state_iterate(f, false) > 0) {
  1326. break;
  1327. }
  1328. }
  1329. ret = qemu_file_get_error(f);
  1330. if (ret == 0) {
  1331. qemu_savevm_state_complete_precopy(f, false, false);
  1332. ret = qemu_file_get_error(f);
  1333. }
  1334. qemu_savevm_state_cleanup();
  1335. if (ret != 0) {
  1336. error_setg_errno(errp, -ret, "Error while writing VM state");
  1337. }
  1338. if (ret != 0) {
  1339. status = MIGRATION_STATUS_FAILED;
  1340. } else {
  1341. status = MIGRATION_STATUS_COMPLETED;
  1342. }
  1343. migrate_set_state(&ms->state, MIGRATION_STATUS_SETUP, status);
  1344. /* f is outer parameter, it should not stay in global migration state after
  1345. * this function finished */
  1346. ms->to_dst_file = NULL;
  1347. return ret;
  1348. }
  1349. void qemu_savevm_live_state(QEMUFile *f)
  1350. {
  1351. /* save QEMU_VM_SECTION_END section */
  1352. qemu_savevm_state_complete_precopy(f, true, false);
  1353. qemu_put_byte(f, QEMU_VM_EOF);
  1354. }
  1355. int qemu_save_device_state(QEMUFile *f)
  1356. {
  1357. SaveStateEntry *se;
  1358. if (!migration_in_colo_state()) {
  1359. qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
  1360. qemu_put_be32(f, QEMU_VM_FILE_VERSION);
  1361. }
  1362. cpu_synchronize_all_states();
  1363. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  1364. int ret;
  1365. if (se->is_ram) {
  1366. continue;
  1367. }
  1368. if ((!se->ops || !se->ops->save_state) && !se->vmsd) {
  1369. continue;
  1370. }
  1371. if (se->vmsd && !vmstate_save_needed(se->vmsd, se->opaque)) {
  1372. continue;
  1373. }
  1374. save_section_header(f, se, QEMU_VM_SECTION_FULL);
  1375. ret = vmstate_save(f, se, NULL);
  1376. if (ret) {
  1377. return ret;
  1378. }
  1379. save_section_footer(f, se);
  1380. }
  1381. qemu_put_byte(f, QEMU_VM_EOF);
  1382. return qemu_file_get_error(f);
  1383. }
  1384. static SaveStateEntry *find_se(const char *idstr, int instance_id)
  1385. {
  1386. SaveStateEntry *se;
  1387. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  1388. if (!strcmp(se->idstr, idstr) &&
  1389. (instance_id == se->instance_id ||
  1390. instance_id == se->alias_id))
  1391. return se;
  1392. /* Migrating from an older version? */
  1393. if (strstr(se->idstr, idstr) && se->compat) {
  1394. if (!strcmp(se->compat->idstr, idstr) &&
  1395. (instance_id == se->compat->instance_id ||
  1396. instance_id == se->alias_id))
  1397. return se;
  1398. }
  1399. }
  1400. return NULL;
  1401. }
  1402. enum LoadVMExitCodes {
  1403. /* Allow a command to quit all layers of nested loadvm loops */
  1404. LOADVM_QUIT = 1,
  1405. };
  1406. /* ------ incoming postcopy messages ------ */
  1407. /* 'advise' arrives before any transfers just to tell us that a postcopy
  1408. * *might* happen - it might be skipped if precopy transferred everything
  1409. * quickly.
  1410. */
  1411. static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis,
  1412. uint16_t len)
  1413. {
  1414. PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_ADVISE);
  1415. uint64_t remote_pagesize_summary, local_pagesize_summary, remote_tps;
  1416. Error *local_err = NULL;
  1417. trace_loadvm_postcopy_handle_advise();
  1418. if (ps != POSTCOPY_INCOMING_NONE) {
  1419. error_report("CMD_POSTCOPY_ADVISE in wrong postcopy state (%d)", ps);
  1420. return -1;
  1421. }
  1422. switch (len) {
  1423. case 0:
  1424. if (migrate_postcopy_ram()) {
  1425. error_report("RAM postcopy is enabled but have 0 byte advise");
  1426. return -EINVAL;
  1427. }
  1428. return 0;
  1429. case 8 + 8:
  1430. if (!migrate_postcopy_ram()) {
  1431. error_report("RAM postcopy is disabled but have 16 byte advise");
  1432. return -EINVAL;
  1433. }
  1434. break;
  1435. default:
  1436. error_report("CMD_POSTCOPY_ADVISE invalid length (%d)", len);
  1437. return -EINVAL;
  1438. }
  1439. if (!postcopy_ram_supported_by_host(mis)) {
  1440. postcopy_state_set(POSTCOPY_INCOMING_NONE);
  1441. return -1;
  1442. }
  1443. remote_pagesize_summary = qemu_get_be64(mis->from_src_file);
  1444. local_pagesize_summary = ram_pagesize_summary();
  1445. if (remote_pagesize_summary != local_pagesize_summary) {
  1446. /*
  1447. * This detects two potential causes of mismatch:
  1448. * a) A mismatch in host page sizes
  1449. * Some combinations of mismatch are probably possible but it gets
  1450. * a bit more complicated. In particular we need to place whole
  1451. * host pages on the dest at once, and we need to ensure that we
  1452. * handle dirtying to make sure we never end up sending part of
  1453. * a hostpage on it's own.
  1454. * b) The use of different huge page sizes on source/destination
  1455. * a more fine grain test is performed during RAM block migration
  1456. * but this test here causes a nice early clear failure, and
  1457. * also fails when passed to an older qemu that doesn't
  1458. * do huge pages.
  1459. */
  1460. error_report("Postcopy needs matching RAM page sizes (s=%" PRIx64
  1461. " d=%" PRIx64 ")",
  1462. remote_pagesize_summary, local_pagesize_summary);
  1463. return -1;
  1464. }
  1465. remote_tps = qemu_get_be64(mis->from_src_file);
  1466. if (remote_tps != qemu_target_page_size()) {
  1467. /*
  1468. * Again, some differences could be dealt with, but for now keep it
  1469. * simple.
  1470. */
  1471. error_report("Postcopy needs matching target page sizes (s=%d d=%zd)",
  1472. (int)remote_tps, qemu_target_page_size());
  1473. return -1;
  1474. }
  1475. if (postcopy_notify(POSTCOPY_NOTIFY_INBOUND_ADVISE, &local_err)) {
  1476. error_report_err(local_err);
  1477. return -1;
  1478. }
  1479. if (ram_postcopy_incoming_init(mis)) {
  1480. return -1;
  1481. }
  1482. return 0;
  1483. }
  1484. /* After postcopy we will be told to throw some pages away since they're
  1485. * dirty and will have to be demand fetched. Must happen before CPU is
  1486. * started.
  1487. * There can be 0..many of these messages, each encoding multiple pages.
  1488. */
  1489. static int loadvm_postcopy_ram_handle_discard(MigrationIncomingState *mis,
  1490. uint16_t len)
  1491. {
  1492. int tmp;
  1493. char ramid[256];
  1494. PostcopyState ps = postcopy_state_get();
  1495. trace_loadvm_postcopy_ram_handle_discard();
  1496. switch (ps) {
  1497. case POSTCOPY_INCOMING_ADVISE:
  1498. /* 1st discard */
  1499. tmp = postcopy_ram_prepare_discard(mis);
  1500. if (tmp) {
  1501. return tmp;
  1502. }
  1503. break;
  1504. case POSTCOPY_INCOMING_DISCARD:
  1505. /* Expected state */
  1506. break;
  1507. default:
  1508. error_report("CMD_POSTCOPY_RAM_DISCARD in wrong postcopy state (%d)",
  1509. ps);
  1510. return -1;
  1511. }
  1512. /* We're expecting a
  1513. * Version (0)
  1514. * a RAM ID string (length byte, name, 0 term)
  1515. * then at least 1 16 byte chunk
  1516. */
  1517. if (len < (1 + 1 + 1 + 1 + 2 * 8)) {
  1518. error_report("CMD_POSTCOPY_RAM_DISCARD invalid length (%d)", len);
  1519. return -1;
  1520. }
  1521. tmp = qemu_get_byte(mis->from_src_file);
  1522. if (tmp != postcopy_ram_discard_version) {
  1523. error_report("CMD_POSTCOPY_RAM_DISCARD invalid version (%d)", tmp);
  1524. return -1;
  1525. }
  1526. if (!qemu_get_counted_string(mis->from_src_file, ramid)) {
  1527. error_report("CMD_POSTCOPY_RAM_DISCARD Failed to read RAMBlock ID");
  1528. return -1;
  1529. }
  1530. tmp = qemu_get_byte(mis->from_src_file);
  1531. if (tmp != 0) {
  1532. error_report("CMD_POSTCOPY_RAM_DISCARD missing nil (%d)", tmp);
  1533. return -1;
  1534. }
  1535. len -= 3 + strlen(ramid);
  1536. if (len % 16) {
  1537. error_report("CMD_POSTCOPY_RAM_DISCARD invalid length (%d)", len);
  1538. return -1;
  1539. }
  1540. trace_loadvm_postcopy_ram_handle_discard_header(ramid, len);
  1541. while (len) {
  1542. uint64_t start_addr, block_length;
  1543. start_addr = qemu_get_be64(mis->from_src_file);
  1544. block_length = qemu_get_be64(mis->from_src_file);
  1545. len -= 16;
  1546. int ret = ram_discard_range(ramid, start_addr, block_length);
  1547. if (ret) {
  1548. return ret;
  1549. }
  1550. }
  1551. trace_loadvm_postcopy_ram_handle_discard_end();
  1552. return 0;
  1553. }
  1554. /*
  1555. * Triggered by a postcopy_listen command; this thread takes over reading
  1556. * the input stream, leaving the main thread free to carry on loading the rest
  1557. * of the device state (from RAM).
  1558. * (TODO:This could do with being in a postcopy file - but there again it's
  1559. * just another input loop, not that postcopy specific)
  1560. */
  1561. static void *postcopy_ram_listen_thread(void *opaque)
  1562. {
  1563. MigrationIncomingState *mis = migration_incoming_get_current();
  1564. QEMUFile *f = mis->from_src_file;
  1565. int load_res;
  1566. migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
  1567. MIGRATION_STATUS_POSTCOPY_ACTIVE);
  1568. qemu_sem_post(&mis->listen_thread_sem);
  1569. trace_postcopy_ram_listen_thread_start();
  1570. rcu_register_thread();
  1571. /*
  1572. * Because we're a thread and not a coroutine we can't yield
  1573. * in qemu_file, and thus we must be blocking now.
  1574. */
  1575. qemu_file_set_blocking(f, true);
  1576. load_res = qemu_loadvm_state_main(f, mis);
  1577. /*
  1578. * This is tricky, but, mis->from_src_file can change after it
  1579. * returns, when postcopy recovery happened. In the future, we may
  1580. * want a wrapper for the QEMUFile handle.
  1581. */
  1582. f = mis->from_src_file;
  1583. /* And non-blocking again so we don't block in any cleanup */
  1584. qemu_file_set_blocking(f, false);
  1585. trace_postcopy_ram_listen_thread_exit();
  1586. if (load_res < 0) {
  1587. error_report("%s: loadvm failed: %d", __func__, load_res);
  1588. qemu_file_set_error(f, load_res);
  1589. migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
  1590. MIGRATION_STATUS_FAILED);
  1591. } else {
  1592. /*
  1593. * This looks good, but it's possible that the device loading in the
  1594. * main thread hasn't finished yet, and so we might not be in 'RUN'
  1595. * state yet; wait for the end of the main thread.
  1596. */
  1597. qemu_event_wait(&mis->main_thread_load_event);
  1598. }
  1599. postcopy_ram_incoming_cleanup(mis);
  1600. if (load_res < 0) {
  1601. /*
  1602. * If something went wrong then we have a bad state so exit;
  1603. * depending how far we got it might be possible at this point
  1604. * to leave the guest running and fire MCEs for pages that never
  1605. * arrived as a desperate recovery step.
  1606. */
  1607. rcu_unregister_thread();
  1608. exit(EXIT_FAILURE);
  1609. }
  1610. migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
  1611. MIGRATION_STATUS_COMPLETED);
  1612. /*
  1613. * If everything has worked fine, then the main thread has waited
  1614. * for us to start, and we're the last use of the mis.
  1615. * (If something broke then qemu will have to exit anyway since it's
  1616. * got a bad migration state).
  1617. */
  1618. migration_incoming_state_destroy();
  1619. qemu_loadvm_state_cleanup();
  1620. rcu_unregister_thread();
  1621. mis->have_listen_thread = false;
  1622. postcopy_state_set(POSTCOPY_INCOMING_END);
  1623. return NULL;
  1624. }
  1625. /* After this message we must be able to immediately receive postcopy data */
  1626. static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
  1627. {
  1628. PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_LISTENING);
  1629. trace_loadvm_postcopy_handle_listen();
  1630. Error *local_err = NULL;
  1631. if (ps != POSTCOPY_INCOMING_ADVISE && ps != POSTCOPY_INCOMING_DISCARD) {
  1632. error_report("CMD_POSTCOPY_LISTEN in wrong postcopy state (%d)", ps);
  1633. return -1;
  1634. }
  1635. if (ps == POSTCOPY_INCOMING_ADVISE) {
  1636. /*
  1637. * A rare case, we entered listen without having to do any discards,
  1638. * so do the setup that's normally done at the time of the 1st discard.
  1639. */
  1640. if (migrate_postcopy_ram()) {
  1641. postcopy_ram_prepare_discard(mis);
  1642. }
  1643. }
  1644. /*
  1645. * Sensitise RAM - can now generate requests for blocks that don't exist
  1646. * However, at this point the CPU shouldn't be running, and the IO
  1647. * shouldn't be doing anything yet so don't actually expect requests
  1648. */
  1649. if (migrate_postcopy_ram()) {
  1650. if (postcopy_ram_incoming_setup(mis)) {
  1651. postcopy_ram_incoming_cleanup(mis);
  1652. return -1;
  1653. }
  1654. }
  1655. if (postcopy_notify(POSTCOPY_NOTIFY_INBOUND_LISTEN, &local_err)) {
  1656. error_report_err(local_err);
  1657. return -1;
  1658. }
  1659. mis->have_listen_thread = true;
  1660. /* Start up the listening thread and wait for it to signal ready */
  1661. qemu_sem_init(&mis->listen_thread_sem, 0);
  1662. qemu_thread_create(&mis->listen_thread, "postcopy/listen",
  1663. postcopy_ram_listen_thread, NULL,
  1664. QEMU_THREAD_DETACHED);
  1665. qemu_sem_wait(&mis->listen_thread_sem);
  1666. qemu_sem_destroy(&mis->listen_thread_sem);
  1667. return 0;
  1668. }
  1669. static void loadvm_postcopy_handle_run_bh(void *opaque)
  1670. {
  1671. Error *local_err = NULL;
  1672. MigrationIncomingState *mis = opaque;
  1673. /* TODO we should move all of this lot into postcopy_ram.c or a shared code
  1674. * in migration.c
  1675. */
  1676. cpu_synchronize_all_post_init();
  1677. qemu_announce_self(&mis->announce_timer, migrate_announce_params());
  1678. /* Make sure all file formats flush their mutable metadata.
  1679. * If we get an error here, just don't restart the VM yet. */
  1680. bdrv_invalidate_cache_all(&local_err);
  1681. if (local_err) {
  1682. error_report_err(local_err);
  1683. local_err = NULL;
  1684. autostart = false;
  1685. }
  1686. trace_loadvm_postcopy_handle_run_cpu_sync();
  1687. trace_loadvm_postcopy_handle_run_vmstart();
  1688. dirty_bitmap_mig_before_vm_start();
  1689. if (autostart) {
  1690. /* Hold onto your hats, starting the CPU */
  1691. vm_start();
  1692. } else {
  1693. /* leave it paused and let management decide when to start the CPU */
  1694. runstate_set(RUN_STATE_PAUSED);
  1695. }
  1696. qemu_bh_delete(mis->bh);
  1697. }
  1698. /* After all discards we can start running and asking for pages */
  1699. static int loadvm_postcopy_handle_run(MigrationIncomingState *mis)
  1700. {
  1701. PostcopyState ps = postcopy_state_get();
  1702. trace_loadvm_postcopy_handle_run();
  1703. if (ps != POSTCOPY_INCOMING_LISTENING) {
  1704. error_report("CMD_POSTCOPY_RUN in wrong postcopy state (%d)", ps);
  1705. return -1;
  1706. }
  1707. postcopy_state_set(POSTCOPY_INCOMING_RUNNING);
  1708. mis->bh = qemu_bh_new(loadvm_postcopy_handle_run_bh, mis);
  1709. qemu_bh_schedule(mis->bh);
  1710. /* We need to finish reading the stream from the package
  1711. * and also stop reading anything more from the stream that loaded the
  1712. * package (since it's now being read by the listener thread).
  1713. * LOADVM_QUIT will quit all the layers of nested loadvm loops.
  1714. */
  1715. return LOADVM_QUIT;
  1716. }
  1717. static int loadvm_postcopy_handle_resume(MigrationIncomingState *mis)
  1718. {
  1719. if (mis->state != MIGRATION_STATUS_POSTCOPY_RECOVER) {
  1720. error_report("%s: illegal resume received", __func__);
  1721. /* Don't fail the load, only for this. */
  1722. return 0;
  1723. }
  1724. /*
  1725. * This means source VM is ready to resume the postcopy migration.
  1726. * It's time to switch state and release the fault thread to
  1727. * continue service page faults.
  1728. */
  1729. migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_RECOVER,
  1730. MIGRATION_STATUS_POSTCOPY_ACTIVE);
  1731. qemu_sem_post(&mis->postcopy_pause_sem_fault);
  1732. trace_loadvm_postcopy_handle_resume();
  1733. /* Tell source that "we are ready" */
  1734. migrate_send_rp_resume_ack(mis, MIGRATION_RESUME_ACK_VALUE);
  1735. return 0;
  1736. }
  1737. /**
  1738. * Immediately following this command is a blob of data containing an embedded
  1739. * chunk of migration stream; read it and load it.
  1740. *
  1741. * @mis: Incoming state
  1742. * @length: Length of packaged data to read
  1743. *
  1744. * Returns: Negative values on error
  1745. *
  1746. */
  1747. static int loadvm_handle_cmd_packaged(MigrationIncomingState *mis)
  1748. {
  1749. int ret;
  1750. size_t length;
  1751. QIOChannelBuffer *bioc;
  1752. length = qemu_get_be32(mis->from_src_file);
  1753. trace_loadvm_handle_cmd_packaged(length);
  1754. if (length > MAX_VM_CMD_PACKAGED_SIZE) {
  1755. error_report("Unreasonably large packaged state: %zu", length);
  1756. return -1;
  1757. }
  1758. bioc = qio_channel_buffer_new(length);
  1759. qio_channel_set_name(QIO_CHANNEL(bioc), "migration-loadvm-buffer");
  1760. ret = qemu_get_buffer(mis->from_src_file,
  1761. bioc->data,
  1762. length);
  1763. if (ret != length) {
  1764. object_unref(OBJECT(bioc));
  1765. error_report("CMD_PACKAGED: Buffer receive fail ret=%d length=%zu",
  1766. ret, length);
  1767. return (ret < 0) ? ret : -EAGAIN;
  1768. }
  1769. bioc->usage += length;
  1770. trace_loadvm_handle_cmd_packaged_received(ret);
  1771. QEMUFile *packf = qemu_fopen_channel_input(QIO_CHANNEL(bioc));
  1772. ret = qemu_loadvm_state_main(packf, mis);
  1773. trace_loadvm_handle_cmd_packaged_main(ret);
  1774. qemu_fclose(packf);
  1775. object_unref(OBJECT(bioc));
  1776. return ret;
  1777. }
  1778. /*
  1779. * Handle request that source requests for recved_bitmap on
  1780. * destination. Payload format:
  1781. *
  1782. * len (1 byte) + ramblock_name (<255 bytes)
  1783. */
  1784. static int loadvm_handle_recv_bitmap(MigrationIncomingState *mis,
  1785. uint16_t len)
  1786. {
  1787. QEMUFile *file = mis->from_src_file;
  1788. RAMBlock *rb;
  1789. char block_name[256];
  1790. size_t cnt;
  1791. cnt = qemu_get_counted_string(file, block_name);
  1792. if (!cnt) {
  1793. error_report("%s: failed to read block name", __func__);
  1794. return -EINVAL;
  1795. }
  1796. /* Validate before using the data */
  1797. if (qemu_file_get_error(file)) {
  1798. return qemu_file_get_error(file);
  1799. }
  1800. if (len != cnt + 1) {
  1801. error_report("%s: invalid payload length (%d)", __func__, len);
  1802. return -EINVAL;
  1803. }
  1804. rb = qemu_ram_block_by_name(block_name);
  1805. if (!rb) {
  1806. error_report("%s: block '%s' not found", __func__, block_name);
  1807. return -EINVAL;
  1808. }
  1809. migrate_send_rp_recv_bitmap(mis, block_name);
  1810. trace_loadvm_handle_recv_bitmap(block_name);
  1811. return 0;
  1812. }
  1813. static int loadvm_process_enable_colo(MigrationIncomingState *mis)
  1814. {
  1815. migration_incoming_enable_colo();
  1816. return colo_init_ram_cache();
  1817. }
  1818. /*
  1819. * Process an incoming 'QEMU_VM_COMMAND'
  1820. * 0 just a normal return
  1821. * LOADVM_QUIT All good, but exit the loop
  1822. * <0 Error
  1823. */
  1824. static int loadvm_process_command(QEMUFile *f)
  1825. {
  1826. MigrationIncomingState *mis = migration_incoming_get_current();
  1827. uint16_t cmd;
  1828. uint16_t len;
  1829. uint32_t tmp32;
  1830. cmd = qemu_get_be16(f);
  1831. len = qemu_get_be16(f);
  1832. /* Check validity before continue processing of cmds */
  1833. if (qemu_file_get_error(f)) {
  1834. return qemu_file_get_error(f);
  1835. }
  1836. trace_loadvm_process_command(cmd, len);
  1837. if (cmd >= MIG_CMD_MAX || cmd == MIG_CMD_INVALID) {
  1838. error_report("MIG_CMD 0x%x unknown (len 0x%x)", cmd, len);
  1839. return -EINVAL;
  1840. }
  1841. if (mig_cmd_args[cmd].len != -1 && mig_cmd_args[cmd].len != len) {
  1842. error_report("%s received with bad length - expecting %zu, got %d",
  1843. mig_cmd_args[cmd].name,
  1844. (size_t)mig_cmd_args[cmd].len, len);
  1845. return -ERANGE;
  1846. }
  1847. switch (cmd) {
  1848. case MIG_CMD_OPEN_RETURN_PATH:
  1849. if (mis->to_src_file) {
  1850. error_report("CMD_OPEN_RETURN_PATH called when RP already open");
  1851. /* Not really a problem, so don't give up */
  1852. return 0;
  1853. }
  1854. mis->to_src_file = qemu_file_get_return_path(f);
  1855. if (!mis->to_src_file) {
  1856. error_report("CMD_OPEN_RETURN_PATH failed");
  1857. return -1;
  1858. }
  1859. break;
  1860. case MIG_CMD_PING:
  1861. tmp32 = qemu_get_be32(f);
  1862. trace_loadvm_process_command_ping(tmp32);
  1863. if (!mis->to_src_file) {
  1864. error_report("CMD_PING (0x%x) received with no return path",
  1865. tmp32);
  1866. return -1;
  1867. }
  1868. migrate_send_rp_pong(mis, tmp32);
  1869. break;
  1870. case MIG_CMD_PACKAGED:
  1871. return loadvm_handle_cmd_packaged(mis);
  1872. case MIG_CMD_POSTCOPY_ADVISE:
  1873. return loadvm_postcopy_handle_advise(mis, len);
  1874. case MIG_CMD_POSTCOPY_LISTEN:
  1875. return loadvm_postcopy_handle_listen(mis);
  1876. case MIG_CMD_POSTCOPY_RUN:
  1877. return loadvm_postcopy_handle_run(mis);
  1878. case MIG_CMD_POSTCOPY_RAM_DISCARD:
  1879. return loadvm_postcopy_ram_handle_discard(mis, len);
  1880. case MIG_CMD_POSTCOPY_RESUME:
  1881. return loadvm_postcopy_handle_resume(mis);
  1882. case MIG_CMD_RECV_BITMAP:
  1883. return loadvm_handle_recv_bitmap(mis, len);
  1884. case MIG_CMD_ENABLE_COLO:
  1885. return loadvm_process_enable_colo(mis);
  1886. }
  1887. return 0;
  1888. }
  1889. /*
  1890. * Read a footer off the wire and check that it matches the expected section
  1891. *
  1892. * Returns: true if the footer was good
  1893. * false if there is a problem (and calls error_report to say why)
  1894. */
  1895. static bool check_section_footer(QEMUFile *f, SaveStateEntry *se)
  1896. {
  1897. int ret;
  1898. uint8_t read_mark;
  1899. uint32_t read_section_id;
  1900. if (!migrate_get_current()->send_section_footer) {
  1901. /* No footer to check */
  1902. return true;
  1903. }
  1904. read_mark = qemu_get_byte(f);
  1905. ret = qemu_file_get_error(f);
  1906. if (ret) {
  1907. error_report("%s: Read section footer failed: %d",
  1908. __func__, ret);
  1909. return false;
  1910. }
  1911. if (read_mark != QEMU_VM_SECTION_FOOTER) {
  1912. error_report("Missing section footer for %s", se->idstr);
  1913. return false;
  1914. }
  1915. read_section_id = qemu_get_be32(f);
  1916. if (read_section_id != se->load_section_id) {
  1917. error_report("Mismatched section id in footer for %s -"
  1918. " read 0x%x expected 0x%x",
  1919. se->idstr, read_section_id, se->load_section_id);
  1920. return false;
  1921. }
  1922. /* All good */
  1923. return true;
  1924. }
  1925. static int
  1926. qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis)
  1927. {
  1928. uint32_t instance_id, version_id, section_id;
  1929. SaveStateEntry *se;
  1930. char idstr[256];
  1931. int ret;
  1932. /* Read section start */
  1933. section_id = qemu_get_be32(f);
  1934. if (!qemu_get_counted_string(f, idstr)) {
  1935. error_report("Unable to read ID string for section %u",
  1936. section_id);
  1937. return -EINVAL;
  1938. }
  1939. instance_id = qemu_get_be32(f);
  1940. version_id = qemu_get_be32(f);
  1941. ret = qemu_file_get_error(f);
  1942. if (ret) {
  1943. error_report("%s: Failed to read instance/version ID: %d",
  1944. __func__, ret);
  1945. return ret;
  1946. }
  1947. trace_qemu_loadvm_state_section_startfull(section_id, idstr,
  1948. instance_id, version_id);
  1949. /* Find savevm section */
  1950. se = find_se(idstr, instance_id);
  1951. if (se == NULL) {
  1952. error_report("Unknown savevm section or instance '%s' %d. "
  1953. "Make sure that your current VM setup matches your "
  1954. "saved VM setup, including any hotplugged devices",
  1955. idstr, instance_id);
  1956. return -EINVAL;
  1957. }
  1958. /* Validate version */
  1959. if (version_id > se->version_id) {
  1960. error_report("savevm: unsupported version %d for '%s' v%d",
  1961. version_id, idstr, se->version_id);
  1962. return -EINVAL;
  1963. }
  1964. se->load_version_id = version_id;
  1965. se->load_section_id = section_id;
  1966. /* Validate if it is a device's state */
  1967. if (xen_enabled() && se->is_ram) {
  1968. error_report("loadvm: %s RAM loading not allowed on Xen", idstr);
  1969. return -EINVAL;
  1970. }
  1971. ret = vmstate_load(f, se);
  1972. if (ret < 0) {
  1973. error_report("error while loading state for instance 0x%x of"
  1974. " device '%s'", instance_id, idstr);
  1975. return ret;
  1976. }
  1977. if (!check_section_footer(f, se)) {
  1978. return -EINVAL;
  1979. }
  1980. return 0;
  1981. }
  1982. static int
  1983. qemu_loadvm_section_part_end(QEMUFile *f, MigrationIncomingState *mis)
  1984. {
  1985. uint32_t section_id;
  1986. SaveStateEntry *se;
  1987. int ret;
  1988. section_id = qemu_get_be32(f);
  1989. ret = qemu_file_get_error(f);
  1990. if (ret) {
  1991. error_report("%s: Failed to read section ID: %d",
  1992. __func__, ret);
  1993. return ret;
  1994. }
  1995. trace_qemu_loadvm_state_section_partend(section_id);
  1996. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  1997. if (se->load_section_id == section_id) {
  1998. break;
  1999. }
  2000. }
  2001. if (se == NULL) {
  2002. error_report("Unknown savevm section %d", section_id);
  2003. return -EINVAL;
  2004. }
  2005. ret = vmstate_load(f, se);
  2006. if (ret < 0) {
  2007. error_report("error while loading state section id %d(%s)",
  2008. section_id, se->idstr);
  2009. return ret;
  2010. }
  2011. if (!check_section_footer(f, se)) {
  2012. return -EINVAL;
  2013. }
  2014. return 0;
  2015. }
  2016. static int qemu_loadvm_state_header(QEMUFile *f)
  2017. {
  2018. unsigned int v;
  2019. int ret;
  2020. v = qemu_get_be32(f);
  2021. if (v != QEMU_VM_FILE_MAGIC) {
  2022. error_report("Not a migration stream");
  2023. return -EINVAL;
  2024. }
  2025. v = qemu_get_be32(f);
  2026. if (v == QEMU_VM_FILE_VERSION_COMPAT) {
  2027. error_report("SaveVM v2 format is obsolete and don't work anymore");
  2028. return -ENOTSUP;
  2029. }
  2030. if (v != QEMU_VM_FILE_VERSION) {
  2031. error_report("Unsupported migration stream version");
  2032. return -ENOTSUP;
  2033. }
  2034. if (migrate_get_current()->send_configuration) {
  2035. if (qemu_get_byte(f) != QEMU_VM_CONFIGURATION) {
  2036. error_report("Configuration section missing");
  2037. qemu_loadvm_state_cleanup();
  2038. return -EINVAL;
  2039. }
  2040. ret = vmstate_load_state(f, &vmstate_configuration, &savevm_state, 0);
  2041. if (ret) {
  2042. qemu_loadvm_state_cleanup();
  2043. return ret;
  2044. }
  2045. }
  2046. return 0;
  2047. }
  2048. static int qemu_loadvm_state_setup(QEMUFile *f)
  2049. {
  2050. SaveStateEntry *se;
  2051. int ret;
  2052. trace_loadvm_state_setup();
  2053. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  2054. if (!se->ops || !se->ops->load_setup) {
  2055. continue;
  2056. }
  2057. if (se->ops->is_active) {
  2058. if (!se->ops->is_active(se->opaque)) {
  2059. continue;
  2060. }
  2061. }
  2062. ret = se->ops->load_setup(f, se->opaque);
  2063. if (ret < 0) {
  2064. qemu_file_set_error(f, ret);
  2065. error_report("Load state of device %s failed", se->idstr);
  2066. return ret;
  2067. }
  2068. }
  2069. return 0;
  2070. }
  2071. void qemu_loadvm_state_cleanup(void)
  2072. {
  2073. SaveStateEntry *se;
  2074. trace_loadvm_state_cleanup();
  2075. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  2076. if (se->ops && se->ops->load_cleanup) {
  2077. se->ops->load_cleanup(se->opaque);
  2078. }
  2079. }
  2080. }
  2081. /* Return true if we should continue the migration, or false. */
  2082. static bool postcopy_pause_incoming(MigrationIncomingState *mis)
  2083. {
  2084. trace_postcopy_pause_incoming();
  2085. /* Clear the triggered bit to allow one recovery */
  2086. mis->postcopy_recover_triggered = false;
  2087. assert(mis->from_src_file);
  2088. qemu_file_shutdown(mis->from_src_file);
  2089. qemu_fclose(mis->from_src_file);
  2090. mis->from_src_file = NULL;
  2091. assert(mis->to_src_file);
  2092. qemu_file_shutdown(mis->to_src_file);
  2093. qemu_mutex_lock(&mis->rp_mutex);
  2094. qemu_fclose(mis->to_src_file);
  2095. mis->to_src_file = NULL;
  2096. qemu_mutex_unlock(&mis->rp_mutex);
  2097. migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
  2098. MIGRATION_STATUS_POSTCOPY_PAUSED);
  2099. /* Notify the fault thread for the invalidated file handle */
  2100. postcopy_fault_thread_notify(mis);
  2101. error_report("Detected IO failure for postcopy. "
  2102. "Migration paused.");
  2103. while (mis->state == MIGRATION_STATUS_POSTCOPY_PAUSED) {
  2104. qemu_sem_wait(&mis->postcopy_pause_sem_dst);
  2105. }
  2106. trace_postcopy_pause_incoming_continued();
  2107. return true;
  2108. }
  2109. int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis)
  2110. {
  2111. uint8_t section_type;
  2112. int ret = 0;
  2113. retry:
  2114. while (true) {
  2115. section_type = qemu_get_byte(f);
  2116. if (qemu_file_get_error(f)) {
  2117. ret = qemu_file_get_error(f);
  2118. break;
  2119. }
  2120. trace_qemu_loadvm_state_section(section_type);
  2121. switch (section_type) {
  2122. case QEMU_VM_SECTION_START:
  2123. case QEMU_VM_SECTION_FULL:
  2124. ret = qemu_loadvm_section_start_full(f, mis);
  2125. if (ret < 0) {
  2126. goto out;
  2127. }
  2128. break;
  2129. case QEMU_VM_SECTION_PART:
  2130. case QEMU_VM_SECTION_END:
  2131. ret = qemu_loadvm_section_part_end(f, mis);
  2132. if (ret < 0) {
  2133. goto out;
  2134. }
  2135. break;
  2136. case QEMU_VM_COMMAND:
  2137. ret = loadvm_process_command(f);
  2138. trace_qemu_loadvm_state_section_command(ret);
  2139. if ((ret < 0) || (ret == LOADVM_QUIT)) {
  2140. goto out;
  2141. }
  2142. break;
  2143. case QEMU_VM_EOF:
  2144. /* This is the end of migration */
  2145. goto out;
  2146. default:
  2147. error_report("Unknown savevm section type %d", section_type);
  2148. ret = -EINVAL;
  2149. goto out;
  2150. }
  2151. }
  2152. out:
  2153. if (ret < 0) {
  2154. qemu_file_set_error(f, ret);
  2155. /*
  2156. * If we are during an active postcopy, then we pause instead
  2157. * of bail out to at least keep the VM's dirty data. Note
  2158. * that POSTCOPY_INCOMING_LISTENING stage is still not enough,
  2159. * during which we're still receiving device states and we
  2160. * still haven't yet started the VM on destination.
  2161. */
  2162. if (postcopy_state_get() == POSTCOPY_INCOMING_RUNNING &&
  2163. postcopy_pause_incoming(mis)) {
  2164. /* Reset f to point to the newly created channel */
  2165. f = mis->from_src_file;
  2166. goto retry;
  2167. }
  2168. }
  2169. return ret;
  2170. }
  2171. int qemu_loadvm_state(QEMUFile *f)
  2172. {
  2173. MigrationIncomingState *mis = migration_incoming_get_current();
  2174. Error *local_err = NULL;
  2175. int ret;
  2176. if (qemu_savevm_state_blocked(&local_err)) {
  2177. error_report_err(local_err);
  2178. return -EINVAL;
  2179. }
  2180. ret = qemu_loadvm_state_header(f);
  2181. if (ret) {
  2182. return ret;
  2183. }
  2184. if (qemu_loadvm_state_setup(f) != 0) {
  2185. return -EINVAL;
  2186. }
  2187. cpu_synchronize_all_pre_loadvm();
  2188. ret = qemu_loadvm_state_main(f, mis);
  2189. qemu_event_set(&mis->main_thread_load_event);
  2190. trace_qemu_loadvm_state_post_main(ret);
  2191. if (mis->have_listen_thread) {
  2192. /* Listen thread still going, can't clean up yet */
  2193. return ret;
  2194. }
  2195. if (ret == 0) {
  2196. ret = qemu_file_get_error(f);
  2197. }
  2198. /*
  2199. * Try to read in the VMDESC section as well, so that dumping tools that
  2200. * intercept our migration stream have the chance to see it.
  2201. */
  2202. /* We've got to be careful; if we don't read the data and just shut the fd
  2203. * then the sender can error if we close while it's still sending.
  2204. * We also mustn't read data that isn't there; some transports (RDMA)
  2205. * will stall waiting for that data when the source has already closed.
  2206. */
  2207. if (ret == 0 && should_send_vmdesc()) {
  2208. uint8_t *buf;
  2209. uint32_t size;
  2210. uint8_t section_type = qemu_get_byte(f);
  2211. if (section_type != QEMU_VM_VMDESCRIPTION) {
  2212. error_report("Expected vmdescription section, but got %d",
  2213. section_type);
  2214. /*
  2215. * It doesn't seem worth failing at this point since
  2216. * we apparently have an otherwise valid VM state
  2217. */
  2218. } else {
  2219. buf = g_malloc(0x1000);
  2220. size = qemu_get_be32(f);
  2221. while (size > 0) {
  2222. uint32_t read_chunk = MIN(size, 0x1000);
  2223. qemu_get_buffer(f, buf, read_chunk);
  2224. size -= read_chunk;
  2225. }
  2226. g_free(buf);
  2227. }
  2228. }
  2229. qemu_loadvm_state_cleanup();
  2230. cpu_synchronize_all_post_init();
  2231. return ret;
  2232. }
  2233. int qemu_load_device_state(QEMUFile *f)
  2234. {
  2235. MigrationIncomingState *mis = migration_incoming_get_current();
  2236. int ret;
  2237. /* Load QEMU_VM_SECTION_FULL section */
  2238. ret = qemu_loadvm_state_main(f, mis);
  2239. if (ret < 0) {
  2240. error_report("Failed to load device state: %d", ret);
  2241. return ret;
  2242. }
  2243. cpu_synchronize_all_post_init();
  2244. return 0;
  2245. }
  2246. int save_snapshot(const char *name, Error **errp)
  2247. {
  2248. BlockDriverState *bs, *bs1;
  2249. QEMUSnapshotInfo sn1, *sn = &sn1, old_sn1, *old_sn = &old_sn1;
  2250. int ret = -1;
  2251. QEMUFile *f;
  2252. int saved_vm_running;
  2253. uint64_t vm_state_size;
  2254. qemu_timeval tv;
  2255. struct tm tm;
  2256. AioContext *aio_context;
  2257. if (migration_is_blocked(errp)) {
  2258. return ret;
  2259. }
  2260. if (!replay_can_snapshot()) {
  2261. error_setg(errp, "Record/replay does not allow making snapshot "
  2262. "right now. Try once more later.");
  2263. return ret;
  2264. }
  2265. if (!bdrv_all_can_snapshot(&bs)) {
  2266. error_setg(errp, "Device '%s' is writable but does not support "
  2267. "snapshots", bdrv_get_device_name(bs));
  2268. return ret;
  2269. }
  2270. /* Delete old snapshots of the same name */
  2271. if (name) {
  2272. ret = bdrv_all_delete_snapshot(name, &bs1, errp);
  2273. if (ret < 0) {
  2274. error_prepend(errp, "Error while deleting snapshot on device "
  2275. "'%s': ", bdrv_get_device_name(bs1));
  2276. return ret;
  2277. }
  2278. }
  2279. bs = bdrv_all_find_vmstate_bs();
  2280. if (bs == NULL) {
  2281. error_setg(errp, "No block device can accept snapshots");
  2282. return ret;
  2283. }
  2284. aio_context = bdrv_get_aio_context(bs);
  2285. saved_vm_running = runstate_is_running();
  2286. ret = global_state_store();
  2287. if (ret) {
  2288. error_setg(errp, "Error saving global state");
  2289. return ret;
  2290. }
  2291. vm_stop(RUN_STATE_SAVE_VM);
  2292. bdrv_drain_all_begin();
  2293. aio_context_acquire(aio_context);
  2294. memset(sn, 0, sizeof(*sn));
  2295. /* fill auxiliary fields */
  2296. qemu_gettimeofday(&tv);
  2297. sn->date_sec = tv.tv_sec;
  2298. sn->date_nsec = tv.tv_usec * 1000;
  2299. sn->vm_clock_nsec = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
  2300. if (name) {
  2301. ret = bdrv_snapshot_find(bs, old_sn, name);
  2302. if (ret >= 0) {
  2303. pstrcpy(sn->name, sizeof(sn->name), old_sn->name);
  2304. pstrcpy(sn->id_str, sizeof(sn->id_str), old_sn->id_str);
  2305. } else {
  2306. pstrcpy(sn->name, sizeof(sn->name), name);
  2307. }
  2308. } else {
  2309. /* cast below needed for OpenBSD where tv_sec is still 'long' */
  2310. localtime_r((const time_t *)&tv.tv_sec, &tm);
  2311. strftime(sn->name, sizeof(sn->name), "vm-%Y%m%d%H%M%S", &tm);
  2312. }
  2313. /* save the VM state */
  2314. f = qemu_fopen_bdrv(bs, 1);
  2315. if (!f) {
  2316. error_setg(errp, "Could not open VM state file");
  2317. goto the_end;
  2318. }
  2319. ret = qemu_savevm_state(f, errp);
  2320. vm_state_size = qemu_ftell(f);
  2321. qemu_fclose(f);
  2322. if (ret < 0) {
  2323. goto the_end;
  2324. }
  2325. /* The bdrv_all_create_snapshot() call that follows acquires the AioContext
  2326. * for itself. BDRV_POLL_WHILE() does not support nested locking because
  2327. * it only releases the lock once. Therefore synchronous I/O will deadlock
  2328. * unless we release the AioContext before bdrv_all_create_snapshot().
  2329. */
  2330. aio_context_release(aio_context);
  2331. aio_context = NULL;
  2332. ret = bdrv_all_create_snapshot(sn, bs, vm_state_size, &bs);
  2333. if (ret < 0) {
  2334. error_setg(errp, "Error while creating snapshot on '%s'",
  2335. bdrv_get_device_name(bs));
  2336. goto the_end;
  2337. }
  2338. ret = 0;
  2339. the_end:
  2340. if (aio_context) {
  2341. aio_context_release(aio_context);
  2342. }
  2343. bdrv_drain_all_end();
  2344. if (saved_vm_running) {
  2345. vm_start();
  2346. }
  2347. return ret;
  2348. }
  2349. void qmp_xen_save_devices_state(const char *filename, bool has_live, bool live,
  2350. Error **errp)
  2351. {
  2352. QEMUFile *f;
  2353. QIOChannelFile *ioc;
  2354. int saved_vm_running;
  2355. int ret;
  2356. if (!has_live) {
  2357. /* live default to true so old version of Xen tool stack can have a
  2358. * successfull live migration */
  2359. live = true;
  2360. }
  2361. saved_vm_running = runstate_is_running();
  2362. vm_stop(RUN_STATE_SAVE_VM);
  2363. global_state_store_running();
  2364. ioc = qio_channel_file_new_path(filename, O_WRONLY | O_CREAT, 0660, errp);
  2365. if (!ioc) {
  2366. goto the_end;
  2367. }
  2368. qio_channel_set_name(QIO_CHANNEL(ioc), "migration-xen-save-state");
  2369. f = qemu_fopen_channel_output(QIO_CHANNEL(ioc));
  2370. object_unref(OBJECT(ioc));
  2371. ret = qemu_save_device_state(f);
  2372. if (ret < 0 || qemu_fclose(f) < 0) {
  2373. error_setg(errp, QERR_IO_ERROR);
  2374. } else {
  2375. /* libxl calls the QMP command "stop" before calling
  2376. * "xen-save-devices-state" and in case of migration failure, libxl
  2377. * would call "cont".
  2378. * So call bdrv_inactivate_all (release locks) here to let the other
  2379. * side of the migration take controle of the images.
  2380. */
  2381. if (live && !saved_vm_running) {
  2382. ret = bdrv_inactivate_all();
  2383. if (ret) {
  2384. error_setg(errp, "%s: bdrv_inactivate_all() failed (%d)",
  2385. __func__, ret);
  2386. }
  2387. }
  2388. }
  2389. the_end:
  2390. if (saved_vm_running) {
  2391. vm_start();
  2392. }
  2393. }
  2394. void qmp_xen_load_devices_state(const char *filename, Error **errp)
  2395. {
  2396. QEMUFile *f;
  2397. QIOChannelFile *ioc;
  2398. int ret;
  2399. /* Guest must be paused before loading the device state; the RAM state
  2400. * will already have been loaded by xc
  2401. */
  2402. if (runstate_is_running()) {
  2403. error_setg(errp, "Cannot update device state while vm is running");
  2404. return;
  2405. }
  2406. vm_stop(RUN_STATE_RESTORE_VM);
  2407. ioc = qio_channel_file_new_path(filename, O_RDONLY | O_BINARY, 0, errp);
  2408. if (!ioc) {
  2409. return;
  2410. }
  2411. qio_channel_set_name(QIO_CHANNEL(ioc), "migration-xen-load-state");
  2412. f = qemu_fopen_channel_input(QIO_CHANNEL(ioc));
  2413. object_unref(OBJECT(ioc));
  2414. ret = qemu_loadvm_state(f);
  2415. qemu_fclose(f);
  2416. if (ret < 0) {
  2417. error_setg(errp, QERR_IO_ERROR);
  2418. }
  2419. migration_incoming_state_destroy();
  2420. }
  2421. int load_snapshot(const char *name, Error **errp)
  2422. {
  2423. BlockDriverState *bs, *bs_vm_state;
  2424. QEMUSnapshotInfo sn;
  2425. QEMUFile *f;
  2426. int ret;
  2427. AioContext *aio_context;
  2428. MigrationIncomingState *mis = migration_incoming_get_current();
  2429. if (!replay_can_snapshot()) {
  2430. error_setg(errp, "Record/replay does not allow loading snapshot "
  2431. "right now. Try once more later.");
  2432. return -EINVAL;
  2433. }
  2434. if (!bdrv_all_can_snapshot(&bs)) {
  2435. error_setg(errp,
  2436. "Device '%s' is writable but does not support snapshots",
  2437. bdrv_get_device_name(bs));
  2438. return -ENOTSUP;
  2439. }
  2440. ret = bdrv_all_find_snapshot(name, &bs);
  2441. if (ret < 0) {
  2442. error_setg(errp,
  2443. "Device '%s' does not have the requested snapshot '%s'",
  2444. bdrv_get_device_name(bs), name);
  2445. return ret;
  2446. }
  2447. bs_vm_state = bdrv_all_find_vmstate_bs();
  2448. if (!bs_vm_state) {
  2449. error_setg(errp, "No block device supports snapshots");
  2450. return -ENOTSUP;
  2451. }
  2452. aio_context = bdrv_get_aio_context(bs_vm_state);
  2453. /* Don't even try to load empty VM states */
  2454. aio_context_acquire(aio_context);
  2455. ret = bdrv_snapshot_find(bs_vm_state, &sn, name);
  2456. aio_context_release(aio_context);
  2457. if (ret < 0) {
  2458. return ret;
  2459. } else if (sn.vm_state_size == 0) {
  2460. error_setg(errp, "This is a disk-only snapshot. Revert to it "
  2461. " offline using qemu-img");
  2462. return -EINVAL;
  2463. }
  2464. /* Flush all IO requests so they don't interfere with the new state. */
  2465. bdrv_drain_all_begin();
  2466. ret = bdrv_all_goto_snapshot(name, &bs, errp);
  2467. if (ret < 0) {
  2468. error_prepend(errp, "Could not load snapshot '%s' on '%s': ",
  2469. name, bdrv_get_device_name(bs));
  2470. goto err_drain;
  2471. }
  2472. /* restore the VM state */
  2473. f = qemu_fopen_bdrv(bs_vm_state, 0);
  2474. if (!f) {
  2475. error_setg(errp, "Could not open VM state file");
  2476. ret = -EINVAL;
  2477. goto err_drain;
  2478. }
  2479. qemu_system_reset(SHUTDOWN_CAUSE_NONE);
  2480. mis->from_src_file = f;
  2481. aio_context_acquire(aio_context);
  2482. ret = qemu_loadvm_state(f);
  2483. migration_incoming_state_destroy();
  2484. aio_context_release(aio_context);
  2485. bdrv_drain_all_end();
  2486. if (ret < 0) {
  2487. error_setg(errp, "Error %d while loading VM state", ret);
  2488. return ret;
  2489. }
  2490. return 0;
  2491. err_drain:
  2492. bdrv_drain_all_end();
  2493. return ret;
  2494. }
  2495. void vmstate_register_ram(MemoryRegion *mr, DeviceState *dev)
  2496. {
  2497. qemu_ram_set_idstr(mr->ram_block,
  2498. memory_region_name(mr), dev);
  2499. qemu_ram_set_migratable(mr->ram_block);
  2500. }
  2501. void vmstate_unregister_ram(MemoryRegion *mr, DeviceState *dev)
  2502. {
  2503. qemu_ram_unset_idstr(mr->ram_block);
  2504. qemu_ram_unset_migratable(mr->ram_block);
  2505. }
  2506. void vmstate_register_ram_global(MemoryRegion *mr)
  2507. {
  2508. vmstate_register_ram(mr, NULL);
  2509. }
  2510. bool vmstate_check_only_migratable(const VMStateDescription *vmsd)
  2511. {
  2512. /* check needed if --only-migratable is specified */
  2513. if (!only_migratable) {
  2514. return true;
  2515. }
  2516. return !(vmsd && vmsd->unmigratable);
  2517. }