savevm.c 98 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389
  1. /*
  2. * QEMU System Emulator
  3. *
  4. * Copyright (c) 2003-2008 Fabrice Bellard
  5. * Copyright (c) 2009-2015 Red Hat Inc
  6. *
  7. * Authors:
  8. * Juan Quintela <quintela@redhat.com>
  9. *
  10. * Permission is hereby granted, free of charge, to any person obtaining a copy
  11. * of this software and associated documentation files (the "Software"), to deal
  12. * in the Software without restriction, including without limitation the rights
  13. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  14. * copies of the Software, and to permit persons to whom the Software is
  15. * furnished to do so, subject to the following conditions:
  16. *
  17. * The above copyright notice and this permission notice shall be included in
  18. * all copies or substantial portions of the Software.
  19. *
  20. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  21. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  22. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  23. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  24. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  25. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  26. * THE SOFTWARE.
  27. */
  28. #include "qemu/osdep.h"
  29. #include "hw/boards.h"
  30. #include "net/net.h"
  31. #include "migration.h"
  32. #include "migration/snapshot.h"
  33. #include "migration/vmstate.h"
  34. #include "migration/misc.h"
  35. #include "migration/register.h"
  36. #include "migration/global_state.h"
  37. #include "migration/channel-block.h"
  38. #include "ram.h"
  39. #include "qemu-file.h"
  40. #include "savevm.h"
  41. #include "postcopy-ram.h"
  42. #include "qapi/error.h"
  43. #include "qapi/qapi-commands-migration.h"
  44. #include "qapi/clone-visitor.h"
  45. #include "qapi/qapi-builtin-visit.h"
  46. #include "qapi/qmp/qerror.h"
  47. #include "qemu/error-report.h"
  48. #include "sysemu/cpus.h"
  49. #include "exec/memory.h"
  50. #include "exec/target_page.h"
  51. #include "trace.h"
  52. #include "qemu/iov.h"
  53. #include "qemu/job.h"
  54. #include "qemu/main-loop.h"
  55. #include "block/snapshot.h"
  56. #include "qemu/cutils.h"
  57. #include "io/channel-buffer.h"
  58. #include "io/channel-file.h"
  59. #include "sysemu/replay.h"
  60. #include "sysemu/runstate.h"
  61. #include "sysemu/sysemu.h"
  62. #include "sysemu/xen.h"
  63. #include "migration/colo.h"
  64. #include "qemu/bitmap.h"
  65. #include "net/announce.h"
  66. #include "qemu/yank.h"
  67. #include "yank_functions.h"
  68. #include "sysemu/qtest.h"
  69. const unsigned int postcopy_ram_discard_version;
  70. /* Subcommands for QEMU_VM_COMMAND */
  71. enum qemu_vm_cmd {
  72. MIG_CMD_INVALID = 0, /* Must be 0 */
  73. MIG_CMD_OPEN_RETURN_PATH, /* Tell the dest to open the Return path */
  74. MIG_CMD_PING, /* Request a PONG on the RP */
  75. MIG_CMD_POSTCOPY_ADVISE, /* Prior to any page transfers, just
  76. warn we might want to do PC */
  77. MIG_CMD_POSTCOPY_LISTEN, /* Start listening for incoming
  78. pages as it's running. */
  79. MIG_CMD_POSTCOPY_RUN, /* Start execution */
  80. MIG_CMD_POSTCOPY_RAM_DISCARD, /* A list of pages to discard that
  81. were previously sent during
  82. precopy but are dirty. */
  83. MIG_CMD_PACKAGED, /* Send a wrapped stream within this stream */
  84. MIG_CMD_ENABLE_COLO, /* Enable COLO */
  85. MIG_CMD_POSTCOPY_RESUME, /* resume postcopy on dest */
  86. MIG_CMD_RECV_BITMAP, /* Request for recved bitmap on dst */
  87. MIG_CMD_MAX
  88. };
  89. #define MAX_VM_CMD_PACKAGED_SIZE UINT32_MAX
  90. static struct mig_cmd_args {
  91. ssize_t len; /* -1 = variable */
  92. const char *name;
  93. } mig_cmd_args[] = {
  94. [MIG_CMD_INVALID] = { .len = -1, .name = "INVALID" },
  95. [MIG_CMD_OPEN_RETURN_PATH] = { .len = 0, .name = "OPEN_RETURN_PATH" },
  96. [MIG_CMD_PING] = { .len = sizeof(uint32_t), .name = "PING" },
  97. [MIG_CMD_POSTCOPY_ADVISE] = { .len = -1, .name = "POSTCOPY_ADVISE" },
  98. [MIG_CMD_POSTCOPY_LISTEN] = { .len = 0, .name = "POSTCOPY_LISTEN" },
  99. [MIG_CMD_POSTCOPY_RUN] = { .len = 0, .name = "POSTCOPY_RUN" },
  100. [MIG_CMD_POSTCOPY_RAM_DISCARD] = {
  101. .len = -1, .name = "POSTCOPY_RAM_DISCARD" },
  102. [MIG_CMD_POSTCOPY_RESUME] = { .len = 0, .name = "POSTCOPY_RESUME" },
  103. [MIG_CMD_PACKAGED] = { .len = 4, .name = "PACKAGED" },
  104. [MIG_CMD_RECV_BITMAP] = { .len = -1, .name = "RECV_BITMAP" },
  105. [MIG_CMD_MAX] = { .len = -1, .name = "MAX" },
  106. };
  107. /* Note for MIG_CMD_POSTCOPY_ADVISE:
  108. * The format of arguments is depending on postcopy mode:
  109. * - postcopy RAM only
  110. * uint64_t host page size
  111. * uint64_t taget page size
  112. *
  113. * - postcopy RAM and postcopy dirty bitmaps
  114. * format is the same as for postcopy RAM only
  115. *
  116. * - postcopy dirty bitmaps only
  117. * Nothing. Command length field is 0.
  118. *
  119. * Be careful: adding a new postcopy entity with some other parameters should
  120. * not break format self-description ability. Good way is to introduce some
  121. * generic extendable format with an exception for two old entities.
  122. */
  123. /***********************************************************/
  124. /* savevm/loadvm support */
  125. static QEMUFile *qemu_fopen_bdrv(BlockDriverState *bs, int is_writable)
  126. {
  127. if (is_writable) {
  128. return qemu_file_new_output(QIO_CHANNEL(qio_channel_block_new(bs)));
  129. } else {
  130. return qemu_file_new_input(QIO_CHANNEL(qio_channel_block_new(bs)));
  131. }
  132. }
  133. /* QEMUFile timer support.
  134. * Not in qemu-file.c to not add qemu-timer.c as dependency to qemu-file.c
  135. */
  136. void timer_put(QEMUFile *f, QEMUTimer *ts)
  137. {
  138. uint64_t expire_time;
  139. expire_time = timer_expire_time_ns(ts);
  140. qemu_put_be64(f, expire_time);
  141. }
  142. void timer_get(QEMUFile *f, QEMUTimer *ts)
  143. {
  144. uint64_t expire_time;
  145. expire_time = qemu_get_be64(f);
  146. if (expire_time != -1) {
  147. timer_mod_ns(ts, expire_time);
  148. } else {
  149. timer_del(ts);
  150. }
  151. }
  152. /* VMState timer support.
  153. * Not in vmstate.c to not add qemu-timer.c as dependency to vmstate.c
  154. */
  155. static int get_timer(QEMUFile *f, void *pv, size_t size,
  156. const VMStateField *field)
  157. {
  158. QEMUTimer *v = pv;
  159. timer_get(f, v);
  160. return 0;
  161. }
  162. static int put_timer(QEMUFile *f, void *pv, size_t size,
  163. const VMStateField *field, JSONWriter *vmdesc)
  164. {
  165. QEMUTimer *v = pv;
  166. timer_put(f, v);
  167. return 0;
  168. }
  169. const VMStateInfo vmstate_info_timer = {
  170. .name = "timer",
  171. .get = get_timer,
  172. .put = put_timer,
  173. };
  174. typedef struct CompatEntry {
  175. char idstr[256];
  176. int instance_id;
  177. } CompatEntry;
  178. typedef struct SaveStateEntry {
  179. QTAILQ_ENTRY(SaveStateEntry) entry;
  180. char idstr[256];
  181. uint32_t instance_id;
  182. int alias_id;
  183. int version_id;
  184. /* version id read from the stream */
  185. int load_version_id;
  186. int section_id;
  187. /* section id read from the stream */
  188. int load_section_id;
  189. const SaveVMHandlers *ops;
  190. const VMStateDescription *vmsd;
  191. void *opaque;
  192. CompatEntry *compat;
  193. int is_ram;
  194. } SaveStateEntry;
  195. typedef struct SaveState {
  196. QTAILQ_HEAD(, SaveStateEntry) handlers;
  197. SaveStateEntry *handler_pri_head[MIG_PRI_MAX + 1];
  198. int global_section_id;
  199. uint32_t len;
  200. const char *name;
  201. uint32_t target_page_bits;
  202. uint32_t caps_count;
  203. MigrationCapability *capabilities;
  204. QemuUUID uuid;
  205. } SaveState;
  206. static SaveState savevm_state = {
  207. .handlers = QTAILQ_HEAD_INITIALIZER(savevm_state.handlers),
  208. .handler_pri_head = { [MIG_PRI_DEFAULT ... MIG_PRI_MAX] = NULL },
  209. .global_section_id = 0,
  210. };
  211. static bool should_validate_capability(int capability)
  212. {
  213. assert(capability >= 0 && capability < MIGRATION_CAPABILITY__MAX);
  214. /* Validate only new capabilities to keep compatibility. */
  215. switch (capability) {
  216. case MIGRATION_CAPABILITY_X_IGNORE_SHARED:
  217. return true;
  218. default:
  219. return false;
  220. }
  221. }
  222. static uint32_t get_validatable_capabilities_count(void)
  223. {
  224. MigrationState *s = migrate_get_current();
  225. uint32_t result = 0;
  226. int i;
  227. for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
  228. if (should_validate_capability(i) && s->enabled_capabilities[i]) {
  229. result++;
  230. }
  231. }
  232. return result;
  233. }
  234. static int configuration_pre_save(void *opaque)
  235. {
  236. SaveState *state = opaque;
  237. const char *current_name = MACHINE_GET_CLASS(current_machine)->name;
  238. MigrationState *s = migrate_get_current();
  239. int i, j;
  240. state->len = strlen(current_name);
  241. state->name = current_name;
  242. state->target_page_bits = qemu_target_page_bits();
  243. state->caps_count = get_validatable_capabilities_count();
  244. state->capabilities = g_renew(MigrationCapability, state->capabilities,
  245. state->caps_count);
  246. for (i = j = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
  247. if (should_validate_capability(i) && s->enabled_capabilities[i]) {
  248. state->capabilities[j++] = i;
  249. }
  250. }
  251. state->uuid = qemu_uuid;
  252. return 0;
  253. }
  254. static int configuration_post_save(void *opaque)
  255. {
  256. SaveState *state = opaque;
  257. g_free(state->capabilities);
  258. state->capabilities = NULL;
  259. state->caps_count = 0;
  260. return 0;
  261. }
  262. static int configuration_pre_load(void *opaque)
  263. {
  264. SaveState *state = opaque;
  265. /* If there is no target-page-bits subsection it means the source
  266. * predates the variable-target-page-bits support and is using the
  267. * minimum possible value for this CPU.
  268. */
  269. state->target_page_bits = qemu_target_page_bits_min();
  270. return 0;
  271. }
  272. static bool configuration_validate_capabilities(SaveState *state)
  273. {
  274. bool ret = true;
  275. MigrationState *s = migrate_get_current();
  276. unsigned long *source_caps_bm;
  277. int i;
  278. source_caps_bm = bitmap_new(MIGRATION_CAPABILITY__MAX);
  279. for (i = 0; i < state->caps_count; i++) {
  280. MigrationCapability capability = state->capabilities[i];
  281. set_bit(capability, source_caps_bm);
  282. }
  283. for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
  284. bool source_state, target_state;
  285. if (!should_validate_capability(i)) {
  286. continue;
  287. }
  288. source_state = test_bit(i, source_caps_bm);
  289. target_state = s->enabled_capabilities[i];
  290. if (source_state != target_state) {
  291. error_report("Capability %s is %s, but received capability is %s",
  292. MigrationCapability_str(i),
  293. target_state ? "on" : "off",
  294. source_state ? "on" : "off");
  295. ret = false;
  296. /* Don't break here to report all failed capabilities */
  297. }
  298. }
  299. g_free(source_caps_bm);
  300. return ret;
  301. }
  302. static int configuration_post_load(void *opaque, int version_id)
  303. {
  304. SaveState *state = opaque;
  305. const char *current_name = MACHINE_GET_CLASS(current_machine)->name;
  306. int ret = 0;
  307. if (strncmp(state->name, current_name, state->len) != 0) {
  308. error_report("Machine type received is '%.*s' and local is '%s'",
  309. (int) state->len, state->name, current_name);
  310. ret = -EINVAL;
  311. goto out;
  312. }
  313. if (state->target_page_bits != qemu_target_page_bits()) {
  314. error_report("Received TARGET_PAGE_BITS is %d but local is %d",
  315. state->target_page_bits, qemu_target_page_bits());
  316. ret = -EINVAL;
  317. goto out;
  318. }
  319. if (!configuration_validate_capabilities(state)) {
  320. ret = -EINVAL;
  321. goto out;
  322. }
  323. out:
  324. g_free((void *)state->name);
  325. state->name = NULL;
  326. state->len = 0;
  327. g_free(state->capabilities);
  328. state->capabilities = NULL;
  329. state->caps_count = 0;
  330. return ret;
  331. }
  332. static int get_capability(QEMUFile *f, void *pv, size_t size,
  333. const VMStateField *field)
  334. {
  335. MigrationCapability *capability = pv;
  336. char capability_str[UINT8_MAX + 1];
  337. uint8_t len;
  338. int i;
  339. len = qemu_get_byte(f);
  340. qemu_get_buffer(f, (uint8_t *)capability_str, len);
  341. capability_str[len] = '\0';
  342. for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
  343. if (!strcmp(MigrationCapability_str(i), capability_str)) {
  344. *capability = i;
  345. return 0;
  346. }
  347. }
  348. error_report("Received unknown capability %s", capability_str);
  349. return -EINVAL;
  350. }
  351. static int put_capability(QEMUFile *f, void *pv, size_t size,
  352. const VMStateField *field, JSONWriter *vmdesc)
  353. {
  354. MigrationCapability *capability = pv;
  355. const char *capability_str = MigrationCapability_str(*capability);
  356. size_t len = strlen(capability_str);
  357. assert(len <= UINT8_MAX);
  358. qemu_put_byte(f, len);
  359. qemu_put_buffer(f, (uint8_t *)capability_str, len);
  360. return 0;
  361. }
  362. static const VMStateInfo vmstate_info_capability = {
  363. .name = "capability",
  364. .get = get_capability,
  365. .put = put_capability,
  366. };
  367. /* The target-page-bits subsection is present only if the
  368. * target page size is not the same as the default (ie the
  369. * minimum page size for a variable-page-size guest CPU).
  370. * If it is present then it contains the actual target page
  371. * bits for the machine, and migration will fail if the
  372. * two ends don't agree about it.
  373. */
  374. static bool vmstate_target_page_bits_needed(void *opaque)
  375. {
  376. return qemu_target_page_bits()
  377. > qemu_target_page_bits_min();
  378. }
  379. static const VMStateDescription vmstate_target_page_bits = {
  380. .name = "configuration/target-page-bits",
  381. .version_id = 1,
  382. .minimum_version_id = 1,
  383. .needed = vmstate_target_page_bits_needed,
  384. .fields = (VMStateField[]) {
  385. VMSTATE_UINT32(target_page_bits, SaveState),
  386. VMSTATE_END_OF_LIST()
  387. }
  388. };
  389. static bool vmstate_capabilites_needed(void *opaque)
  390. {
  391. return get_validatable_capabilities_count() > 0;
  392. }
  393. static const VMStateDescription vmstate_capabilites = {
  394. .name = "configuration/capabilities",
  395. .version_id = 1,
  396. .minimum_version_id = 1,
  397. .needed = vmstate_capabilites_needed,
  398. .fields = (VMStateField[]) {
  399. VMSTATE_UINT32_V(caps_count, SaveState, 1),
  400. VMSTATE_VARRAY_UINT32_ALLOC(capabilities, SaveState, caps_count, 1,
  401. vmstate_info_capability,
  402. MigrationCapability),
  403. VMSTATE_END_OF_LIST()
  404. }
  405. };
  406. static bool vmstate_uuid_needed(void *opaque)
  407. {
  408. return qemu_uuid_set && migrate_validate_uuid();
  409. }
  410. static int vmstate_uuid_post_load(void *opaque, int version_id)
  411. {
  412. SaveState *state = opaque;
  413. char uuid_src[UUID_FMT_LEN + 1];
  414. char uuid_dst[UUID_FMT_LEN + 1];
  415. if (!qemu_uuid_set) {
  416. /*
  417. * It's warning because user might not know UUID in some cases,
  418. * e.g. load an old snapshot
  419. */
  420. qemu_uuid_unparse(&state->uuid, uuid_src);
  421. warn_report("UUID is received %s, but local uuid isn't set",
  422. uuid_src);
  423. return 0;
  424. }
  425. if (!qemu_uuid_is_equal(&state->uuid, &qemu_uuid)) {
  426. qemu_uuid_unparse(&state->uuid, uuid_src);
  427. qemu_uuid_unparse(&qemu_uuid, uuid_dst);
  428. error_report("UUID received is %s and local is %s", uuid_src, uuid_dst);
  429. return -EINVAL;
  430. }
  431. return 0;
  432. }
  433. static const VMStateDescription vmstate_uuid = {
  434. .name = "configuration/uuid",
  435. .version_id = 1,
  436. .minimum_version_id = 1,
  437. .needed = vmstate_uuid_needed,
  438. .post_load = vmstate_uuid_post_load,
  439. .fields = (VMStateField[]) {
  440. VMSTATE_UINT8_ARRAY_V(uuid.data, SaveState, sizeof(QemuUUID), 1),
  441. VMSTATE_END_OF_LIST()
  442. }
  443. };
  444. static const VMStateDescription vmstate_configuration = {
  445. .name = "configuration",
  446. .version_id = 1,
  447. .pre_load = configuration_pre_load,
  448. .post_load = configuration_post_load,
  449. .pre_save = configuration_pre_save,
  450. .post_save = configuration_post_save,
  451. .fields = (VMStateField[]) {
  452. VMSTATE_UINT32(len, SaveState),
  453. VMSTATE_VBUFFER_ALLOC_UINT32(name, SaveState, 0, NULL, len),
  454. VMSTATE_END_OF_LIST()
  455. },
  456. .subsections = (const VMStateDescription *[]) {
  457. &vmstate_target_page_bits,
  458. &vmstate_capabilites,
  459. &vmstate_uuid,
  460. NULL
  461. }
  462. };
  463. static void dump_vmstate_vmsd(FILE *out_file,
  464. const VMStateDescription *vmsd, int indent,
  465. bool is_subsection);
  466. static void dump_vmstate_vmsf(FILE *out_file, const VMStateField *field,
  467. int indent)
  468. {
  469. fprintf(out_file, "%*s{\n", indent, "");
  470. indent += 2;
  471. fprintf(out_file, "%*s\"field\": \"%s\",\n", indent, "", field->name);
  472. fprintf(out_file, "%*s\"version_id\": %d,\n", indent, "",
  473. field->version_id);
  474. fprintf(out_file, "%*s\"field_exists\": %s,\n", indent, "",
  475. field->field_exists ? "true" : "false");
  476. fprintf(out_file, "%*s\"size\": %zu", indent, "", field->size);
  477. if (field->vmsd != NULL) {
  478. fprintf(out_file, ",\n");
  479. dump_vmstate_vmsd(out_file, field->vmsd, indent, false);
  480. }
  481. fprintf(out_file, "\n%*s}", indent - 2, "");
  482. }
  483. static void dump_vmstate_vmss(FILE *out_file,
  484. const VMStateDescription **subsection,
  485. int indent)
  486. {
  487. if (*subsection != NULL) {
  488. dump_vmstate_vmsd(out_file, *subsection, indent, true);
  489. }
  490. }
  491. static void dump_vmstate_vmsd(FILE *out_file,
  492. const VMStateDescription *vmsd, int indent,
  493. bool is_subsection)
  494. {
  495. if (is_subsection) {
  496. fprintf(out_file, "%*s{\n", indent, "");
  497. } else {
  498. fprintf(out_file, "%*s\"%s\": {\n", indent, "", "Description");
  499. }
  500. indent += 2;
  501. fprintf(out_file, "%*s\"name\": \"%s\",\n", indent, "", vmsd->name);
  502. fprintf(out_file, "%*s\"version_id\": %d,\n", indent, "",
  503. vmsd->version_id);
  504. fprintf(out_file, "%*s\"minimum_version_id\": %d", indent, "",
  505. vmsd->minimum_version_id);
  506. if (vmsd->fields != NULL) {
  507. const VMStateField *field = vmsd->fields;
  508. bool first;
  509. fprintf(out_file, ",\n%*s\"Fields\": [\n", indent, "");
  510. first = true;
  511. while (field->name != NULL) {
  512. if (field->flags & VMS_MUST_EXIST) {
  513. /* Ignore VMSTATE_VALIDATE bits; these don't get migrated */
  514. field++;
  515. continue;
  516. }
  517. if (!first) {
  518. fprintf(out_file, ",\n");
  519. }
  520. dump_vmstate_vmsf(out_file, field, indent + 2);
  521. field++;
  522. first = false;
  523. }
  524. assert(field->flags == VMS_END);
  525. fprintf(out_file, "\n%*s]", indent, "");
  526. }
  527. if (vmsd->subsections != NULL) {
  528. const VMStateDescription **subsection = vmsd->subsections;
  529. bool first;
  530. fprintf(out_file, ",\n%*s\"Subsections\": [\n", indent, "");
  531. first = true;
  532. while (*subsection != NULL) {
  533. if (!first) {
  534. fprintf(out_file, ",\n");
  535. }
  536. dump_vmstate_vmss(out_file, subsection, indent + 2);
  537. subsection++;
  538. first = false;
  539. }
  540. fprintf(out_file, "\n%*s]", indent, "");
  541. }
  542. fprintf(out_file, "\n%*s}", indent - 2, "");
  543. }
  544. static void dump_machine_type(FILE *out_file)
  545. {
  546. MachineClass *mc;
  547. mc = MACHINE_GET_CLASS(current_machine);
  548. fprintf(out_file, " \"vmschkmachine\": {\n");
  549. fprintf(out_file, " \"Name\": \"%s\"\n", mc->name);
  550. fprintf(out_file, " },\n");
  551. }
  552. void dump_vmstate_json_to_file(FILE *out_file)
  553. {
  554. GSList *list, *elt;
  555. bool first;
  556. fprintf(out_file, "{\n");
  557. dump_machine_type(out_file);
  558. first = true;
  559. list = object_class_get_list(TYPE_DEVICE, true);
  560. for (elt = list; elt; elt = elt->next) {
  561. DeviceClass *dc = OBJECT_CLASS_CHECK(DeviceClass, elt->data,
  562. TYPE_DEVICE);
  563. const char *name;
  564. int indent = 2;
  565. if (!dc->vmsd) {
  566. continue;
  567. }
  568. if (!first) {
  569. fprintf(out_file, ",\n");
  570. }
  571. name = object_class_get_name(OBJECT_CLASS(dc));
  572. fprintf(out_file, "%*s\"%s\": {\n", indent, "", name);
  573. indent += 2;
  574. fprintf(out_file, "%*s\"Name\": \"%s\",\n", indent, "", name);
  575. fprintf(out_file, "%*s\"version_id\": %d,\n", indent, "",
  576. dc->vmsd->version_id);
  577. fprintf(out_file, "%*s\"minimum_version_id\": %d,\n", indent, "",
  578. dc->vmsd->minimum_version_id);
  579. dump_vmstate_vmsd(out_file, dc->vmsd, indent, false);
  580. fprintf(out_file, "\n%*s}", indent - 2, "");
  581. first = false;
  582. }
  583. fprintf(out_file, "\n}\n");
  584. fclose(out_file);
  585. g_slist_free(list);
  586. }
  587. static uint32_t calculate_new_instance_id(const char *idstr)
  588. {
  589. SaveStateEntry *se;
  590. uint32_t instance_id = 0;
  591. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  592. if (strcmp(idstr, se->idstr) == 0
  593. && instance_id <= se->instance_id) {
  594. instance_id = se->instance_id + 1;
  595. }
  596. }
  597. /* Make sure we never loop over without being noticed */
  598. assert(instance_id != VMSTATE_INSTANCE_ID_ANY);
  599. return instance_id;
  600. }
  601. static int calculate_compat_instance_id(const char *idstr)
  602. {
  603. SaveStateEntry *se;
  604. int instance_id = 0;
  605. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  606. if (!se->compat) {
  607. continue;
  608. }
  609. if (strcmp(idstr, se->compat->idstr) == 0
  610. && instance_id <= se->compat->instance_id) {
  611. instance_id = se->compat->instance_id + 1;
  612. }
  613. }
  614. return instance_id;
  615. }
  616. static inline MigrationPriority save_state_priority(SaveStateEntry *se)
  617. {
  618. if (se->vmsd) {
  619. return se->vmsd->priority;
  620. }
  621. return MIG_PRI_DEFAULT;
  622. }
  623. static void savevm_state_handler_insert(SaveStateEntry *nse)
  624. {
  625. MigrationPriority priority = save_state_priority(nse);
  626. SaveStateEntry *se;
  627. int i;
  628. assert(priority <= MIG_PRI_MAX);
  629. for (i = priority - 1; i >= 0; i--) {
  630. se = savevm_state.handler_pri_head[i];
  631. if (se != NULL) {
  632. assert(save_state_priority(se) < priority);
  633. break;
  634. }
  635. }
  636. if (i >= 0) {
  637. QTAILQ_INSERT_BEFORE(se, nse, entry);
  638. } else {
  639. QTAILQ_INSERT_TAIL(&savevm_state.handlers, nse, entry);
  640. }
  641. if (savevm_state.handler_pri_head[priority] == NULL) {
  642. savevm_state.handler_pri_head[priority] = nse;
  643. }
  644. }
  645. static void savevm_state_handler_remove(SaveStateEntry *se)
  646. {
  647. SaveStateEntry *next;
  648. MigrationPriority priority = save_state_priority(se);
  649. if (se == savevm_state.handler_pri_head[priority]) {
  650. next = QTAILQ_NEXT(se, entry);
  651. if (next != NULL && save_state_priority(next) == priority) {
  652. savevm_state.handler_pri_head[priority] = next;
  653. } else {
  654. savevm_state.handler_pri_head[priority] = NULL;
  655. }
  656. }
  657. QTAILQ_REMOVE(&savevm_state.handlers, se, entry);
  658. }
  659. /* TODO: Individual devices generally have very little idea about the rest
  660. of the system, so instance_id should be removed/replaced.
  661. Meanwhile pass -1 as instance_id if you do not already have a clearly
  662. distinguishing id for all instances of your device class. */
  663. int register_savevm_live(const char *idstr,
  664. uint32_t instance_id,
  665. int version_id,
  666. const SaveVMHandlers *ops,
  667. void *opaque)
  668. {
  669. SaveStateEntry *se;
  670. se = g_new0(SaveStateEntry, 1);
  671. se->version_id = version_id;
  672. se->section_id = savevm_state.global_section_id++;
  673. se->ops = ops;
  674. se->opaque = opaque;
  675. se->vmsd = NULL;
  676. /* if this is a live_savem then set is_ram */
  677. if (ops->save_setup != NULL) {
  678. se->is_ram = 1;
  679. }
  680. pstrcat(se->idstr, sizeof(se->idstr), idstr);
  681. if (instance_id == VMSTATE_INSTANCE_ID_ANY) {
  682. se->instance_id = calculate_new_instance_id(se->idstr);
  683. } else {
  684. se->instance_id = instance_id;
  685. }
  686. assert(!se->compat || se->instance_id == 0);
  687. savevm_state_handler_insert(se);
  688. return 0;
  689. }
  690. void unregister_savevm(VMStateIf *obj, const char *idstr, void *opaque)
  691. {
  692. SaveStateEntry *se, *new_se;
  693. char id[256] = "";
  694. if (obj) {
  695. char *oid = vmstate_if_get_id(obj);
  696. if (oid) {
  697. pstrcpy(id, sizeof(id), oid);
  698. pstrcat(id, sizeof(id), "/");
  699. g_free(oid);
  700. }
  701. }
  702. pstrcat(id, sizeof(id), idstr);
  703. QTAILQ_FOREACH_SAFE(se, &savevm_state.handlers, entry, new_se) {
  704. if (strcmp(se->idstr, id) == 0 && se->opaque == opaque) {
  705. savevm_state_handler_remove(se);
  706. g_free(se->compat);
  707. g_free(se);
  708. }
  709. }
  710. }
  711. /*
  712. * Perform some basic checks on vmsd's at registration
  713. * time.
  714. */
  715. static void vmstate_check(const VMStateDescription *vmsd)
  716. {
  717. const VMStateField *field = vmsd->fields;
  718. const VMStateDescription **subsection = vmsd->subsections;
  719. if (field) {
  720. while (field->name) {
  721. if (field->flags & (VMS_STRUCT | VMS_VSTRUCT)) {
  722. /* Recurse to sub structures */
  723. vmstate_check(field->vmsd);
  724. }
  725. /* Carry on */
  726. field++;
  727. }
  728. /* Check for the end of field list canary */
  729. if (field->flags != VMS_END) {
  730. error_report("VMSTATE not ending with VMS_END: %s", vmsd->name);
  731. g_assert_not_reached();
  732. }
  733. }
  734. while (subsection && *subsection) {
  735. /*
  736. * The name of a subsection should start with the name of the
  737. * current object.
  738. */
  739. assert(!strncmp(vmsd->name, (*subsection)->name, strlen(vmsd->name)));
  740. vmstate_check(*subsection);
  741. subsection++;
  742. }
  743. }
  744. int vmstate_register_with_alias_id(VMStateIf *obj, uint32_t instance_id,
  745. const VMStateDescription *vmsd,
  746. void *opaque, int alias_id,
  747. int required_for_version,
  748. Error **errp)
  749. {
  750. SaveStateEntry *se;
  751. /* If this triggers, alias support can be dropped for the vmsd. */
  752. assert(alias_id == -1 || required_for_version >= vmsd->minimum_version_id);
  753. se = g_new0(SaveStateEntry, 1);
  754. se->version_id = vmsd->version_id;
  755. se->section_id = savevm_state.global_section_id++;
  756. se->opaque = opaque;
  757. se->vmsd = vmsd;
  758. se->alias_id = alias_id;
  759. if (obj) {
  760. char *id = vmstate_if_get_id(obj);
  761. if (id) {
  762. if (snprintf(se->idstr, sizeof(se->idstr), "%s/", id) >=
  763. sizeof(se->idstr)) {
  764. error_setg(errp, "Path too long for VMState (%s)", id);
  765. g_free(id);
  766. g_free(se);
  767. return -1;
  768. }
  769. g_free(id);
  770. se->compat = g_new0(CompatEntry, 1);
  771. pstrcpy(se->compat->idstr, sizeof(se->compat->idstr), vmsd->name);
  772. se->compat->instance_id = instance_id == VMSTATE_INSTANCE_ID_ANY ?
  773. calculate_compat_instance_id(vmsd->name) : instance_id;
  774. instance_id = VMSTATE_INSTANCE_ID_ANY;
  775. }
  776. }
  777. pstrcat(se->idstr, sizeof(se->idstr), vmsd->name);
  778. if (instance_id == VMSTATE_INSTANCE_ID_ANY) {
  779. se->instance_id = calculate_new_instance_id(se->idstr);
  780. } else {
  781. se->instance_id = instance_id;
  782. }
  783. /* Perform a recursive sanity check during the test runs */
  784. if (qtest_enabled()) {
  785. vmstate_check(vmsd);
  786. }
  787. assert(!se->compat || se->instance_id == 0);
  788. savevm_state_handler_insert(se);
  789. return 0;
  790. }
  791. void vmstate_unregister(VMStateIf *obj, const VMStateDescription *vmsd,
  792. void *opaque)
  793. {
  794. SaveStateEntry *se, *new_se;
  795. QTAILQ_FOREACH_SAFE(se, &savevm_state.handlers, entry, new_se) {
  796. if (se->vmsd == vmsd && se->opaque == opaque) {
  797. savevm_state_handler_remove(se);
  798. g_free(se->compat);
  799. g_free(se);
  800. }
  801. }
  802. }
  803. static int vmstate_load(QEMUFile *f, SaveStateEntry *se)
  804. {
  805. trace_vmstate_load(se->idstr, se->vmsd ? se->vmsd->name : "(old)");
  806. if (!se->vmsd) { /* Old style */
  807. return se->ops->load_state(f, se->opaque, se->load_version_id);
  808. }
  809. return vmstate_load_state(f, se->vmsd, se->opaque, se->load_version_id);
  810. }
  811. static void vmstate_save_old_style(QEMUFile *f, SaveStateEntry *se,
  812. JSONWriter *vmdesc)
  813. {
  814. int64_t old_offset, size;
  815. old_offset = qemu_file_total_transferred_fast(f);
  816. se->ops->save_state(f, se->opaque);
  817. size = qemu_file_total_transferred_fast(f) - old_offset;
  818. if (vmdesc) {
  819. json_writer_int64(vmdesc, "size", size);
  820. json_writer_start_array(vmdesc, "fields");
  821. json_writer_start_object(vmdesc, NULL);
  822. json_writer_str(vmdesc, "name", "data");
  823. json_writer_int64(vmdesc, "size", size);
  824. json_writer_str(vmdesc, "type", "buffer");
  825. json_writer_end_object(vmdesc);
  826. json_writer_end_array(vmdesc);
  827. }
  828. }
  829. /*
  830. * Write the header for device section (QEMU_VM_SECTION START/END/PART/FULL)
  831. */
  832. static void save_section_header(QEMUFile *f, SaveStateEntry *se,
  833. uint8_t section_type)
  834. {
  835. qemu_put_byte(f, section_type);
  836. qemu_put_be32(f, se->section_id);
  837. if (section_type == QEMU_VM_SECTION_FULL ||
  838. section_type == QEMU_VM_SECTION_START) {
  839. /* ID string */
  840. size_t len = strlen(se->idstr);
  841. qemu_put_byte(f, len);
  842. qemu_put_buffer(f, (uint8_t *)se->idstr, len);
  843. qemu_put_be32(f, se->instance_id);
  844. qemu_put_be32(f, se->version_id);
  845. }
  846. }
  847. /*
  848. * Write a footer onto device sections that catches cases misformatted device
  849. * sections.
  850. */
  851. static void save_section_footer(QEMUFile *f, SaveStateEntry *se)
  852. {
  853. if (migrate_get_current()->send_section_footer) {
  854. qemu_put_byte(f, QEMU_VM_SECTION_FOOTER);
  855. qemu_put_be32(f, se->section_id);
  856. }
  857. }
  858. static int vmstate_save(QEMUFile *f, SaveStateEntry *se, JSONWriter *vmdesc)
  859. {
  860. int ret;
  861. if ((!se->ops || !se->ops->save_state) && !se->vmsd) {
  862. return 0;
  863. }
  864. if (se->vmsd && !vmstate_save_needed(se->vmsd, se->opaque)) {
  865. trace_savevm_section_skip(se->idstr, se->section_id);
  866. return 0;
  867. }
  868. trace_savevm_section_start(se->idstr, se->section_id);
  869. save_section_header(f, se, QEMU_VM_SECTION_FULL);
  870. if (vmdesc) {
  871. json_writer_start_object(vmdesc, NULL);
  872. json_writer_str(vmdesc, "name", se->idstr);
  873. json_writer_int64(vmdesc, "instance_id", se->instance_id);
  874. }
  875. trace_vmstate_save(se->idstr, se->vmsd ? se->vmsd->name : "(old)");
  876. if (!se->vmsd) {
  877. vmstate_save_old_style(f, se, vmdesc);
  878. } else {
  879. ret = vmstate_save_state(f, se->vmsd, se->opaque, vmdesc);
  880. if (ret) {
  881. return ret;
  882. }
  883. }
  884. trace_savevm_section_end(se->idstr, se->section_id, 0);
  885. save_section_footer(f, se);
  886. if (vmdesc) {
  887. json_writer_end_object(vmdesc);
  888. }
  889. return 0;
  890. }
  891. /**
  892. * qemu_savevm_command_send: Send a 'QEMU_VM_COMMAND' type element with the
  893. * command and associated data.
  894. *
  895. * @f: File to send command on
  896. * @command: Command type to send
  897. * @len: Length of associated data
  898. * @data: Data associated with command.
  899. */
  900. static void qemu_savevm_command_send(QEMUFile *f,
  901. enum qemu_vm_cmd command,
  902. uint16_t len,
  903. uint8_t *data)
  904. {
  905. trace_savevm_command_send(command, len);
  906. qemu_put_byte(f, QEMU_VM_COMMAND);
  907. qemu_put_be16(f, (uint16_t)command);
  908. qemu_put_be16(f, len);
  909. qemu_put_buffer(f, data, len);
  910. qemu_fflush(f);
  911. }
  912. void qemu_savevm_send_colo_enable(QEMUFile *f)
  913. {
  914. trace_savevm_send_colo_enable();
  915. qemu_savevm_command_send(f, MIG_CMD_ENABLE_COLO, 0, NULL);
  916. }
  917. void qemu_savevm_send_ping(QEMUFile *f, uint32_t value)
  918. {
  919. uint32_t buf;
  920. trace_savevm_send_ping(value);
  921. buf = cpu_to_be32(value);
  922. qemu_savevm_command_send(f, MIG_CMD_PING, sizeof(value), (uint8_t *)&buf);
  923. }
  924. void qemu_savevm_send_open_return_path(QEMUFile *f)
  925. {
  926. trace_savevm_send_open_return_path();
  927. qemu_savevm_command_send(f, MIG_CMD_OPEN_RETURN_PATH, 0, NULL);
  928. }
  929. /* We have a buffer of data to send; we don't want that all to be loaded
  930. * by the command itself, so the command contains just the length of the
  931. * extra buffer that we then send straight after it.
  932. * TODO: Must be a better way to organise that
  933. *
  934. * Returns:
  935. * 0 on success
  936. * -ve on error
  937. */
  938. int qemu_savevm_send_packaged(QEMUFile *f, const uint8_t *buf, size_t len)
  939. {
  940. uint32_t tmp;
  941. if (len > MAX_VM_CMD_PACKAGED_SIZE) {
  942. error_report("%s: Unreasonably large packaged state: %zu",
  943. __func__, len);
  944. return -1;
  945. }
  946. tmp = cpu_to_be32(len);
  947. trace_qemu_savevm_send_packaged();
  948. qemu_savevm_command_send(f, MIG_CMD_PACKAGED, 4, (uint8_t *)&tmp);
  949. qemu_put_buffer(f, buf, len);
  950. return 0;
  951. }
  952. /* Send prior to any postcopy transfer */
  953. void qemu_savevm_send_postcopy_advise(QEMUFile *f)
  954. {
  955. if (migrate_postcopy_ram()) {
  956. uint64_t tmp[2];
  957. tmp[0] = cpu_to_be64(ram_pagesize_summary());
  958. tmp[1] = cpu_to_be64(qemu_target_page_size());
  959. trace_qemu_savevm_send_postcopy_advise();
  960. qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_ADVISE,
  961. 16, (uint8_t *)tmp);
  962. } else {
  963. qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_ADVISE, 0, NULL);
  964. }
  965. }
  966. /* Sent prior to starting the destination running in postcopy, discard pages
  967. * that have already been sent but redirtied on the source.
  968. * CMD_POSTCOPY_RAM_DISCARD consist of:
  969. * byte version (0)
  970. * byte Length of name field (not including 0)
  971. * n x byte RAM block name
  972. * byte 0 terminator (just for safety)
  973. * n x Byte ranges within the named RAMBlock
  974. * be64 Start of the range
  975. * be64 Length
  976. *
  977. * name: RAMBlock name that these entries are part of
  978. * len: Number of page entries
  979. * start_list: 'len' addresses
  980. * length_list: 'len' addresses
  981. *
  982. */
  983. void qemu_savevm_send_postcopy_ram_discard(QEMUFile *f, const char *name,
  984. uint16_t len,
  985. uint64_t *start_list,
  986. uint64_t *length_list)
  987. {
  988. uint8_t *buf;
  989. uint16_t tmplen;
  990. uint16_t t;
  991. size_t name_len = strlen(name);
  992. trace_qemu_savevm_send_postcopy_ram_discard(name, len);
  993. assert(name_len < 256);
  994. buf = g_malloc0(1 + 1 + name_len + 1 + (8 + 8) * len);
  995. buf[0] = postcopy_ram_discard_version;
  996. buf[1] = name_len;
  997. memcpy(buf + 2, name, name_len);
  998. tmplen = 2 + name_len;
  999. buf[tmplen++] = '\0';
  1000. for (t = 0; t < len; t++) {
  1001. stq_be_p(buf + tmplen, start_list[t]);
  1002. tmplen += 8;
  1003. stq_be_p(buf + tmplen, length_list[t]);
  1004. tmplen += 8;
  1005. }
  1006. qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RAM_DISCARD, tmplen, buf);
  1007. g_free(buf);
  1008. }
  1009. /* Get the destination into a state where it can receive postcopy data. */
  1010. void qemu_savevm_send_postcopy_listen(QEMUFile *f)
  1011. {
  1012. trace_savevm_send_postcopy_listen();
  1013. qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_LISTEN, 0, NULL);
  1014. }
  1015. /* Kick the destination into running */
  1016. void qemu_savevm_send_postcopy_run(QEMUFile *f)
  1017. {
  1018. trace_savevm_send_postcopy_run();
  1019. qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RUN, 0, NULL);
  1020. }
  1021. void qemu_savevm_send_postcopy_resume(QEMUFile *f)
  1022. {
  1023. trace_savevm_send_postcopy_resume();
  1024. qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RESUME, 0, NULL);
  1025. }
  1026. void qemu_savevm_send_recv_bitmap(QEMUFile *f, char *block_name)
  1027. {
  1028. size_t len;
  1029. char buf[256];
  1030. trace_savevm_send_recv_bitmap(block_name);
  1031. buf[0] = len = strlen(block_name);
  1032. memcpy(buf + 1, block_name, len);
  1033. qemu_savevm_command_send(f, MIG_CMD_RECV_BITMAP, len + 1, (uint8_t *)buf);
  1034. }
  1035. bool qemu_savevm_state_blocked(Error **errp)
  1036. {
  1037. SaveStateEntry *se;
  1038. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  1039. if (se->vmsd && se->vmsd->unmigratable) {
  1040. error_setg(errp, "State blocked by non-migratable device '%s'",
  1041. se->idstr);
  1042. return true;
  1043. }
  1044. }
  1045. return false;
  1046. }
  1047. void qemu_savevm_non_migratable_list(strList **reasons)
  1048. {
  1049. SaveStateEntry *se;
  1050. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  1051. if (se->vmsd && se->vmsd->unmigratable) {
  1052. QAPI_LIST_PREPEND(*reasons,
  1053. g_strdup_printf("non-migratable device: %s",
  1054. se->idstr));
  1055. }
  1056. }
  1057. }
  1058. void qemu_savevm_state_header(QEMUFile *f)
  1059. {
  1060. trace_savevm_state_header();
  1061. qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
  1062. qemu_put_be32(f, QEMU_VM_FILE_VERSION);
  1063. if (migrate_get_current()->send_configuration) {
  1064. qemu_put_byte(f, QEMU_VM_CONFIGURATION);
  1065. vmstate_save_state(f, &vmstate_configuration, &savevm_state, 0);
  1066. }
  1067. }
  1068. bool qemu_savevm_state_guest_unplug_pending(void)
  1069. {
  1070. SaveStateEntry *se;
  1071. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  1072. if (se->vmsd && se->vmsd->dev_unplug_pending &&
  1073. se->vmsd->dev_unplug_pending(se->opaque)) {
  1074. return true;
  1075. }
  1076. }
  1077. return false;
  1078. }
  1079. void qemu_savevm_state_setup(QEMUFile *f)
  1080. {
  1081. MigrationState *ms = migrate_get_current();
  1082. SaveStateEntry *se;
  1083. Error *local_err = NULL;
  1084. int ret;
  1085. ms->vmdesc = json_writer_new(false);
  1086. json_writer_start_object(ms->vmdesc, NULL);
  1087. json_writer_int64(ms->vmdesc, "page_size", qemu_target_page_size());
  1088. json_writer_start_array(ms->vmdesc, "devices");
  1089. trace_savevm_state_setup();
  1090. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  1091. if (se->vmsd && se->vmsd->early_setup) {
  1092. ret = vmstate_save(f, se, ms->vmdesc);
  1093. if (ret) {
  1094. qemu_file_set_error(f, ret);
  1095. break;
  1096. }
  1097. continue;
  1098. }
  1099. if (!se->ops || !se->ops->save_setup) {
  1100. continue;
  1101. }
  1102. if (se->ops->is_active) {
  1103. if (!se->ops->is_active(se->opaque)) {
  1104. continue;
  1105. }
  1106. }
  1107. save_section_header(f, se, QEMU_VM_SECTION_START);
  1108. ret = se->ops->save_setup(f, se->opaque);
  1109. save_section_footer(f, se);
  1110. if (ret < 0) {
  1111. qemu_file_set_error(f, ret);
  1112. break;
  1113. }
  1114. }
  1115. if (precopy_notify(PRECOPY_NOTIFY_SETUP, &local_err)) {
  1116. error_report_err(local_err);
  1117. }
  1118. }
  1119. int qemu_savevm_state_resume_prepare(MigrationState *s)
  1120. {
  1121. SaveStateEntry *se;
  1122. int ret;
  1123. trace_savevm_state_resume_prepare();
  1124. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  1125. if (!se->ops || !se->ops->resume_prepare) {
  1126. continue;
  1127. }
  1128. if (se->ops->is_active) {
  1129. if (!se->ops->is_active(se->opaque)) {
  1130. continue;
  1131. }
  1132. }
  1133. ret = se->ops->resume_prepare(s, se->opaque);
  1134. if (ret < 0) {
  1135. return ret;
  1136. }
  1137. }
  1138. return 0;
  1139. }
  1140. /*
  1141. * this function has three return values:
  1142. * negative: there was one error, and we have -errno.
  1143. * 0 : We haven't finished, caller have to go again
  1144. * 1 : We have finished, we can go to complete phase
  1145. */
  1146. int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy)
  1147. {
  1148. SaveStateEntry *se;
  1149. int ret = 1;
  1150. trace_savevm_state_iterate();
  1151. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  1152. if (!se->ops || !se->ops->save_live_iterate) {
  1153. continue;
  1154. }
  1155. if (se->ops->is_active &&
  1156. !se->ops->is_active(se->opaque)) {
  1157. continue;
  1158. }
  1159. if (se->ops->is_active_iterate &&
  1160. !se->ops->is_active_iterate(se->opaque)) {
  1161. continue;
  1162. }
  1163. /*
  1164. * In the postcopy phase, any device that doesn't know how to
  1165. * do postcopy should have saved it's state in the _complete
  1166. * call that's already run, it might get confused if we call
  1167. * iterate afterwards.
  1168. */
  1169. if (postcopy &&
  1170. !(se->ops->has_postcopy && se->ops->has_postcopy(se->opaque))) {
  1171. continue;
  1172. }
  1173. if (qemu_file_rate_limit(f)) {
  1174. return 0;
  1175. }
  1176. trace_savevm_section_start(se->idstr, se->section_id);
  1177. save_section_header(f, se, QEMU_VM_SECTION_PART);
  1178. ret = se->ops->save_live_iterate(f, se->opaque);
  1179. trace_savevm_section_end(se->idstr, se->section_id, ret);
  1180. save_section_footer(f, se);
  1181. if (ret < 0) {
  1182. error_report("failed to save SaveStateEntry with id(name): "
  1183. "%d(%s): %d",
  1184. se->section_id, se->idstr, ret);
  1185. qemu_file_set_error(f, ret);
  1186. }
  1187. if (ret <= 0) {
  1188. /* Do not proceed to the next vmstate before this one reported
  1189. completion of the current stage. This serializes the migration
  1190. and reduces the probability that a faster changing state is
  1191. synchronized over and over again. */
  1192. break;
  1193. }
  1194. }
  1195. return ret;
  1196. }
  1197. static bool should_send_vmdesc(void)
  1198. {
  1199. MachineState *machine = MACHINE(qdev_get_machine());
  1200. bool in_postcopy = migration_in_postcopy();
  1201. return !machine->suppress_vmdesc && !in_postcopy;
  1202. }
  1203. /*
  1204. * Calls the save_live_complete_postcopy methods
  1205. * causing the last few pages to be sent immediately and doing any associated
  1206. * cleanup.
  1207. * Note postcopy also calls qemu_savevm_state_complete_precopy to complete
  1208. * all the other devices, but that happens at the point we switch to postcopy.
  1209. */
  1210. void qemu_savevm_state_complete_postcopy(QEMUFile *f)
  1211. {
  1212. SaveStateEntry *se;
  1213. int ret;
  1214. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  1215. if (!se->ops || !se->ops->save_live_complete_postcopy) {
  1216. continue;
  1217. }
  1218. if (se->ops->is_active) {
  1219. if (!se->ops->is_active(se->opaque)) {
  1220. continue;
  1221. }
  1222. }
  1223. trace_savevm_section_start(se->idstr, se->section_id);
  1224. /* Section type */
  1225. qemu_put_byte(f, QEMU_VM_SECTION_END);
  1226. qemu_put_be32(f, se->section_id);
  1227. ret = se->ops->save_live_complete_postcopy(f, se->opaque);
  1228. trace_savevm_section_end(se->idstr, se->section_id, ret);
  1229. save_section_footer(f, se);
  1230. if (ret < 0) {
  1231. qemu_file_set_error(f, ret);
  1232. return;
  1233. }
  1234. }
  1235. qemu_put_byte(f, QEMU_VM_EOF);
  1236. qemu_fflush(f);
  1237. }
  1238. static
  1239. int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy)
  1240. {
  1241. SaveStateEntry *se;
  1242. int ret;
  1243. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  1244. if (!se->ops ||
  1245. (in_postcopy && se->ops->has_postcopy &&
  1246. se->ops->has_postcopy(se->opaque)) ||
  1247. !se->ops->save_live_complete_precopy) {
  1248. continue;
  1249. }
  1250. if (se->ops->is_active) {
  1251. if (!se->ops->is_active(se->opaque)) {
  1252. continue;
  1253. }
  1254. }
  1255. trace_savevm_section_start(se->idstr, se->section_id);
  1256. save_section_header(f, se, QEMU_VM_SECTION_END);
  1257. ret = se->ops->save_live_complete_precopy(f, se->opaque);
  1258. trace_savevm_section_end(se->idstr, se->section_id, ret);
  1259. save_section_footer(f, se);
  1260. if (ret < 0) {
  1261. qemu_file_set_error(f, ret);
  1262. return -1;
  1263. }
  1264. }
  1265. return 0;
  1266. }
  1267. int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
  1268. bool in_postcopy,
  1269. bool inactivate_disks)
  1270. {
  1271. MigrationState *ms = migrate_get_current();
  1272. JSONWriter *vmdesc = ms->vmdesc;
  1273. int vmdesc_len;
  1274. SaveStateEntry *se;
  1275. int ret;
  1276. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  1277. if (se->vmsd && se->vmsd->early_setup) {
  1278. /* Already saved during qemu_savevm_state_setup(). */
  1279. continue;
  1280. }
  1281. ret = vmstate_save(f, se, vmdesc);
  1282. if (ret) {
  1283. qemu_file_set_error(f, ret);
  1284. return ret;
  1285. }
  1286. }
  1287. if (inactivate_disks) {
  1288. /* Inactivate before sending QEMU_VM_EOF so that the
  1289. * bdrv_activate_all() on the other end won't fail. */
  1290. ret = bdrv_inactivate_all();
  1291. if (ret) {
  1292. error_report("%s: bdrv_inactivate_all() failed (%d)",
  1293. __func__, ret);
  1294. qemu_file_set_error(f, ret);
  1295. return ret;
  1296. }
  1297. }
  1298. if (!in_postcopy) {
  1299. /* Postcopy stream will still be going */
  1300. qemu_put_byte(f, QEMU_VM_EOF);
  1301. }
  1302. json_writer_end_array(vmdesc);
  1303. json_writer_end_object(vmdesc);
  1304. vmdesc_len = strlen(json_writer_get(vmdesc));
  1305. if (should_send_vmdesc()) {
  1306. qemu_put_byte(f, QEMU_VM_VMDESCRIPTION);
  1307. qemu_put_be32(f, vmdesc_len);
  1308. qemu_put_buffer(f, (uint8_t *)json_writer_get(vmdesc), vmdesc_len);
  1309. }
  1310. /* Free it now to detect any inconsistencies. */
  1311. json_writer_free(vmdesc);
  1312. ms->vmdesc = NULL;
  1313. return 0;
  1314. }
  1315. int qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only,
  1316. bool inactivate_disks)
  1317. {
  1318. int ret;
  1319. Error *local_err = NULL;
  1320. bool in_postcopy = migration_in_postcopy();
  1321. if (precopy_notify(PRECOPY_NOTIFY_COMPLETE, &local_err)) {
  1322. error_report_err(local_err);
  1323. }
  1324. trace_savevm_state_complete_precopy();
  1325. cpu_synchronize_all_states();
  1326. if (!in_postcopy || iterable_only) {
  1327. ret = qemu_savevm_state_complete_precopy_iterable(f, in_postcopy);
  1328. if (ret) {
  1329. return ret;
  1330. }
  1331. }
  1332. if (iterable_only) {
  1333. goto flush;
  1334. }
  1335. ret = qemu_savevm_state_complete_precopy_non_iterable(f, in_postcopy,
  1336. inactivate_disks);
  1337. if (ret) {
  1338. return ret;
  1339. }
  1340. flush:
  1341. qemu_fflush(f);
  1342. return 0;
  1343. }
  1344. /* Give an estimate of the amount left to be transferred,
  1345. * the result is split into the amount for units that can and
  1346. * for units that can't do postcopy.
  1347. */
  1348. void qemu_savevm_state_pending_estimate(uint64_t *must_precopy,
  1349. uint64_t *can_postcopy)
  1350. {
  1351. SaveStateEntry *se;
  1352. *must_precopy = 0;
  1353. *can_postcopy = 0;
  1354. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  1355. if (!se->ops || !se->ops->state_pending_estimate) {
  1356. continue;
  1357. }
  1358. if (se->ops->is_active) {
  1359. if (!se->ops->is_active(se->opaque)) {
  1360. continue;
  1361. }
  1362. }
  1363. se->ops->state_pending_estimate(se->opaque, must_precopy, can_postcopy);
  1364. }
  1365. }
  1366. void qemu_savevm_state_pending_exact(uint64_t *must_precopy,
  1367. uint64_t *can_postcopy)
  1368. {
  1369. SaveStateEntry *se;
  1370. *must_precopy = 0;
  1371. *can_postcopy = 0;
  1372. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  1373. if (!se->ops || !se->ops->state_pending_exact) {
  1374. continue;
  1375. }
  1376. if (se->ops->is_active) {
  1377. if (!se->ops->is_active(se->opaque)) {
  1378. continue;
  1379. }
  1380. }
  1381. se->ops->state_pending_exact(se->opaque, must_precopy, can_postcopy);
  1382. }
  1383. }
  1384. void qemu_savevm_state_cleanup(void)
  1385. {
  1386. SaveStateEntry *se;
  1387. Error *local_err = NULL;
  1388. if (precopy_notify(PRECOPY_NOTIFY_CLEANUP, &local_err)) {
  1389. error_report_err(local_err);
  1390. }
  1391. trace_savevm_state_cleanup();
  1392. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  1393. if (se->ops && se->ops->save_cleanup) {
  1394. se->ops->save_cleanup(se->opaque);
  1395. }
  1396. }
  1397. }
  1398. static int qemu_savevm_state(QEMUFile *f, Error **errp)
  1399. {
  1400. int ret;
  1401. MigrationState *ms = migrate_get_current();
  1402. MigrationStatus status;
  1403. if (migration_is_running(ms->state)) {
  1404. error_setg(errp, QERR_MIGRATION_ACTIVE);
  1405. return -EINVAL;
  1406. }
  1407. if (migrate_use_block()) {
  1408. error_setg(errp, "Block migration and snapshots are incompatible");
  1409. return -EINVAL;
  1410. }
  1411. migrate_init(ms);
  1412. memset(&ram_counters, 0, sizeof(ram_counters));
  1413. memset(&compression_counters, 0, sizeof(compression_counters));
  1414. ms->to_dst_file = f;
  1415. qemu_mutex_unlock_iothread();
  1416. qemu_savevm_state_header(f);
  1417. qemu_savevm_state_setup(f);
  1418. qemu_mutex_lock_iothread();
  1419. while (qemu_file_get_error(f) == 0) {
  1420. if (qemu_savevm_state_iterate(f, false) > 0) {
  1421. break;
  1422. }
  1423. }
  1424. ret = qemu_file_get_error(f);
  1425. if (ret == 0) {
  1426. qemu_savevm_state_complete_precopy(f, false, false);
  1427. ret = qemu_file_get_error(f);
  1428. }
  1429. qemu_savevm_state_cleanup();
  1430. if (ret != 0) {
  1431. error_setg_errno(errp, -ret, "Error while writing VM state");
  1432. }
  1433. if (ret != 0) {
  1434. status = MIGRATION_STATUS_FAILED;
  1435. } else {
  1436. status = MIGRATION_STATUS_COMPLETED;
  1437. }
  1438. migrate_set_state(&ms->state, MIGRATION_STATUS_SETUP, status);
  1439. /* f is outer parameter, it should not stay in global migration state after
  1440. * this function finished */
  1441. ms->to_dst_file = NULL;
  1442. return ret;
  1443. }
  1444. void qemu_savevm_live_state(QEMUFile *f)
  1445. {
  1446. /* save QEMU_VM_SECTION_END section */
  1447. qemu_savevm_state_complete_precopy(f, true, false);
  1448. qemu_put_byte(f, QEMU_VM_EOF);
  1449. }
  1450. int qemu_save_device_state(QEMUFile *f)
  1451. {
  1452. SaveStateEntry *se;
  1453. if (!migration_in_colo_state()) {
  1454. qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
  1455. qemu_put_be32(f, QEMU_VM_FILE_VERSION);
  1456. }
  1457. cpu_synchronize_all_states();
  1458. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  1459. int ret;
  1460. if (se->is_ram) {
  1461. continue;
  1462. }
  1463. ret = vmstate_save(f, se, NULL);
  1464. if (ret) {
  1465. return ret;
  1466. }
  1467. }
  1468. qemu_put_byte(f, QEMU_VM_EOF);
  1469. return qemu_file_get_error(f);
  1470. }
  1471. static SaveStateEntry *find_se(const char *idstr, uint32_t instance_id)
  1472. {
  1473. SaveStateEntry *se;
  1474. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  1475. if (!strcmp(se->idstr, idstr) &&
  1476. (instance_id == se->instance_id ||
  1477. instance_id == se->alias_id))
  1478. return se;
  1479. /* Migrating from an older version? */
  1480. if (strstr(se->idstr, idstr) && se->compat) {
  1481. if (!strcmp(se->compat->idstr, idstr) &&
  1482. (instance_id == se->compat->instance_id ||
  1483. instance_id == se->alias_id))
  1484. return se;
  1485. }
  1486. }
  1487. return NULL;
  1488. }
  1489. enum LoadVMExitCodes {
  1490. /* Allow a command to quit all layers of nested loadvm loops */
  1491. LOADVM_QUIT = 1,
  1492. };
  1493. /* ------ incoming postcopy messages ------ */
  1494. /* 'advise' arrives before any transfers just to tell us that a postcopy
  1495. * *might* happen - it might be skipped if precopy transferred everything
  1496. * quickly.
  1497. */
  1498. static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis,
  1499. uint16_t len)
  1500. {
  1501. PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_ADVISE);
  1502. uint64_t remote_pagesize_summary, local_pagesize_summary, remote_tps;
  1503. size_t page_size = qemu_target_page_size();
  1504. Error *local_err = NULL;
  1505. trace_loadvm_postcopy_handle_advise();
  1506. if (ps != POSTCOPY_INCOMING_NONE) {
  1507. error_report("CMD_POSTCOPY_ADVISE in wrong postcopy state (%d)", ps);
  1508. return -1;
  1509. }
  1510. switch (len) {
  1511. case 0:
  1512. if (migrate_postcopy_ram()) {
  1513. error_report("RAM postcopy is enabled but have 0 byte advise");
  1514. return -EINVAL;
  1515. }
  1516. return 0;
  1517. case 8 + 8:
  1518. if (!migrate_postcopy_ram()) {
  1519. error_report("RAM postcopy is disabled but have 16 byte advise");
  1520. return -EINVAL;
  1521. }
  1522. break;
  1523. default:
  1524. error_report("CMD_POSTCOPY_ADVISE invalid length (%d)", len);
  1525. return -EINVAL;
  1526. }
  1527. if (!postcopy_ram_supported_by_host(mis)) {
  1528. postcopy_state_set(POSTCOPY_INCOMING_NONE);
  1529. return -1;
  1530. }
  1531. remote_pagesize_summary = qemu_get_be64(mis->from_src_file);
  1532. local_pagesize_summary = ram_pagesize_summary();
  1533. if (remote_pagesize_summary != local_pagesize_summary) {
  1534. /*
  1535. * This detects two potential causes of mismatch:
  1536. * a) A mismatch in host page sizes
  1537. * Some combinations of mismatch are probably possible but it gets
  1538. * a bit more complicated. In particular we need to place whole
  1539. * host pages on the dest at once, and we need to ensure that we
  1540. * handle dirtying to make sure we never end up sending part of
  1541. * a hostpage on it's own.
  1542. * b) The use of different huge page sizes on source/destination
  1543. * a more fine grain test is performed during RAM block migration
  1544. * but this test here causes a nice early clear failure, and
  1545. * also fails when passed to an older qemu that doesn't
  1546. * do huge pages.
  1547. */
  1548. error_report("Postcopy needs matching RAM page sizes (s=%" PRIx64
  1549. " d=%" PRIx64 ")",
  1550. remote_pagesize_summary, local_pagesize_summary);
  1551. return -1;
  1552. }
  1553. remote_tps = qemu_get_be64(mis->from_src_file);
  1554. if (remote_tps != page_size) {
  1555. /*
  1556. * Again, some differences could be dealt with, but for now keep it
  1557. * simple.
  1558. */
  1559. error_report("Postcopy needs matching target page sizes (s=%d d=%zd)",
  1560. (int)remote_tps, page_size);
  1561. return -1;
  1562. }
  1563. if (postcopy_notify(POSTCOPY_NOTIFY_INBOUND_ADVISE, &local_err)) {
  1564. error_report_err(local_err);
  1565. return -1;
  1566. }
  1567. if (ram_postcopy_incoming_init(mis)) {
  1568. return -1;
  1569. }
  1570. return 0;
  1571. }
  1572. /* After postcopy we will be told to throw some pages away since they're
  1573. * dirty and will have to be demand fetched. Must happen before CPU is
  1574. * started.
  1575. * There can be 0..many of these messages, each encoding multiple pages.
  1576. */
  1577. static int loadvm_postcopy_ram_handle_discard(MigrationIncomingState *mis,
  1578. uint16_t len)
  1579. {
  1580. int tmp;
  1581. char ramid[256];
  1582. PostcopyState ps = postcopy_state_get();
  1583. trace_loadvm_postcopy_ram_handle_discard();
  1584. switch (ps) {
  1585. case POSTCOPY_INCOMING_ADVISE:
  1586. /* 1st discard */
  1587. tmp = postcopy_ram_prepare_discard(mis);
  1588. if (tmp) {
  1589. return tmp;
  1590. }
  1591. break;
  1592. case POSTCOPY_INCOMING_DISCARD:
  1593. /* Expected state */
  1594. break;
  1595. default:
  1596. error_report("CMD_POSTCOPY_RAM_DISCARD in wrong postcopy state (%d)",
  1597. ps);
  1598. return -1;
  1599. }
  1600. /* We're expecting a
  1601. * Version (0)
  1602. * a RAM ID string (length byte, name, 0 term)
  1603. * then at least 1 16 byte chunk
  1604. */
  1605. if (len < (1 + 1 + 1 + 1 + 2 * 8)) {
  1606. error_report("CMD_POSTCOPY_RAM_DISCARD invalid length (%d)", len);
  1607. return -1;
  1608. }
  1609. tmp = qemu_get_byte(mis->from_src_file);
  1610. if (tmp != postcopy_ram_discard_version) {
  1611. error_report("CMD_POSTCOPY_RAM_DISCARD invalid version (%d)", tmp);
  1612. return -1;
  1613. }
  1614. if (!qemu_get_counted_string(mis->from_src_file, ramid)) {
  1615. error_report("CMD_POSTCOPY_RAM_DISCARD Failed to read RAMBlock ID");
  1616. return -1;
  1617. }
  1618. tmp = qemu_get_byte(mis->from_src_file);
  1619. if (tmp != 0) {
  1620. error_report("CMD_POSTCOPY_RAM_DISCARD missing nil (%d)", tmp);
  1621. return -1;
  1622. }
  1623. len -= 3 + strlen(ramid);
  1624. if (len % 16) {
  1625. error_report("CMD_POSTCOPY_RAM_DISCARD invalid length (%d)", len);
  1626. return -1;
  1627. }
  1628. trace_loadvm_postcopy_ram_handle_discard_header(ramid, len);
  1629. while (len) {
  1630. uint64_t start_addr, block_length;
  1631. start_addr = qemu_get_be64(mis->from_src_file);
  1632. block_length = qemu_get_be64(mis->from_src_file);
  1633. len -= 16;
  1634. int ret = ram_discard_range(ramid, start_addr, block_length);
  1635. if (ret) {
  1636. return ret;
  1637. }
  1638. }
  1639. trace_loadvm_postcopy_ram_handle_discard_end();
  1640. return 0;
  1641. }
  1642. /*
  1643. * Triggered by a postcopy_listen command; this thread takes over reading
  1644. * the input stream, leaving the main thread free to carry on loading the rest
  1645. * of the device state (from RAM).
  1646. * (TODO:This could do with being in a postcopy file - but there again it's
  1647. * just another input loop, not that postcopy specific)
  1648. */
  1649. static void *postcopy_ram_listen_thread(void *opaque)
  1650. {
  1651. MigrationIncomingState *mis = migration_incoming_get_current();
  1652. QEMUFile *f = mis->from_src_file;
  1653. int load_res;
  1654. MigrationState *migr = migrate_get_current();
  1655. object_ref(OBJECT(migr));
  1656. migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
  1657. MIGRATION_STATUS_POSTCOPY_ACTIVE);
  1658. qemu_sem_post(&mis->thread_sync_sem);
  1659. trace_postcopy_ram_listen_thread_start();
  1660. rcu_register_thread();
  1661. /*
  1662. * Because we're a thread and not a coroutine we can't yield
  1663. * in qemu_file, and thus we must be blocking now.
  1664. */
  1665. qemu_file_set_blocking(f, true);
  1666. load_res = qemu_loadvm_state_main(f, mis);
  1667. /*
  1668. * This is tricky, but, mis->from_src_file can change after it
  1669. * returns, when postcopy recovery happened. In the future, we may
  1670. * want a wrapper for the QEMUFile handle.
  1671. */
  1672. f = mis->from_src_file;
  1673. /* And non-blocking again so we don't block in any cleanup */
  1674. qemu_file_set_blocking(f, false);
  1675. trace_postcopy_ram_listen_thread_exit();
  1676. if (load_res < 0) {
  1677. qemu_file_set_error(f, load_res);
  1678. dirty_bitmap_mig_cancel_incoming();
  1679. if (postcopy_state_get() == POSTCOPY_INCOMING_RUNNING &&
  1680. !migrate_postcopy_ram() && migrate_dirty_bitmaps())
  1681. {
  1682. error_report("%s: loadvm failed during postcopy: %d. All states "
  1683. "are migrated except dirty bitmaps. Some dirty "
  1684. "bitmaps may be lost, and present migrated dirty "
  1685. "bitmaps are correctly migrated and valid.",
  1686. __func__, load_res);
  1687. load_res = 0; /* prevent further exit() */
  1688. } else {
  1689. error_report("%s: loadvm failed: %d", __func__, load_res);
  1690. migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
  1691. MIGRATION_STATUS_FAILED);
  1692. }
  1693. }
  1694. if (load_res >= 0) {
  1695. /*
  1696. * This looks good, but it's possible that the device loading in the
  1697. * main thread hasn't finished yet, and so we might not be in 'RUN'
  1698. * state yet; wait for the end of the main thread.
  1699. */
  1700. qemu_event_wait(&mis->main_thread_load_event);
  1701. }
  1702. postcopy_ram_incoming_cleanup(mis);
  1703. if (load_res < 0) {
  1704. /*
  1705. * If something went wrong then we have a bad state so exit;
  1706. * depending how far we got it might be possible at this point
  1707. * to leave the guest running and fire MCEs for pages that never
  1708. * arrived as a desperate recovery step.
  1709. */
  1710. rcu_unregister_thread();
  1711. exit(EXIT_FAILURE);
  1712. }
  1713. migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
  1714. MIGRATION_STATUS_COMPLETED);
  1715. /*
  1716. * If everything has worked fine, then the main thread has waited
  1717. * for us to start, and we're the last use of the mis.
  1718. * (If something broke then qemu will have to exit anyway since it's
  1719. * got a bad migration state).
  1720. */
  1721. migration_incoming_state_destroy();
  1722. qemu_loadvm_state_cleanup();
  1723. rcu_unregister_thread();
  1724. mis->have_listen_thread = false;
  1725. postcopy_state_set(POSTCOPY_INCOMING_END);
  1726. object_unref(OBJECT(migr));
  1727. return NULL;
  1728. }
  1729. /* After this message we must be able to immediately receive postcopy data */
  1730. static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
  1731. {
  1732. PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_LISTENING);
  1733. Error *local_err = NULL;
  1734. trace_loadvm_postcopy_handle_listen("enter");
  1735. if (ps != POSTCOPY_INCOMING_ADVISE && ps != POSTCOPY_INCOMING_DISCARD) {
  1736. error_report("CMD_POSTCOPY_LISTEN in wrong postcopy state (%d)", ps);
  1737. return -1;
  1738. }
  1739. if (ps == POSTCOPY_INCOMING_ADVISE) {
  1740. /*
  1741. * A rare case, we entered listen without having to do any discards,
  1742. * so do the setup that's normally done at the time of the 1st discard.
  1743. */
  1744. if (migrate_postcopy_ram()) {
  1745. postcopy_ram_prepare_discard(mis);
  1746. }
  1747. }
  1748. trace_loadvm_postcopy_handle_listen("after discard");
  1749. /*
  1750. * Sensitise RAM - can now generate requests for blocks that don't exist
  1751. * However, at this point the CPU shouldn't be running, and the IO
  1752. * shouldn't be doing anything yet so don't actually expect requests
  1753. */
  1754. if (migrate_postcopy_ram()) {
  1755. if (postcopy_ram_incoming_setup(mis)) {
  1756. postcopy_ram_incoming_cleanup(mis);
  1757. return -1;
  1758. }
  1759. }
  1760. trace_loadvm_postcopy_handle_listen("after uffd");
  1761. if (postcopy_notify(POSTCOPY_NOTIFY_INBOUND_LISTEN, &local_err)) {
  1762. error_report_err(local_err);
  1763. return -1;
  1764. }
  1765. mis->have_listen_thread = true;
  1766. postcopy_thread_create(mis, &mis->listen_thread, "postcopy/listen",
  1767. postcopy_ram_listen_thread, QEMU_THREAD_DETACHED);
  1768. trace_loadvm_postcopy_handle_listen("return");
  1769. return 0;
  1770. }
  1771. static void loadvm_postcopy_handle_run_bh(void *opaque)
  1772. {
  1773. Error *local_err = NULL;
  1774. MigrationIncomingState *mis = opaque;
  1775. trace_loadvm_postcopy_handle_run_bh("enter");
  1776. /* TODO we should move all of this lot into postcopy_ram.c or a shared code
  1777. * in migration.c
  1778. */
  1779. cpu_synchronize_all_post_init();
  1780. trace_loadvm_postcopy_handle_run_bh("after cpu sync");
  1781. qemu_announce_self(&mis->announce_timer, migrate_announce_params());
  1782. trace_loadvm_postcopy_handle_run_bh("after announce");
  1783. /* Make sure all file formats throw away their mutable metadata.
  1784. * If we get an error here, just don't restart the VM yet. */
  1785. bdrv_activate_all(&local_err);
  1786. if (local_err) {
  1787. error_report_err(local_err);
  1788. local_err = NULL;
  1789. autostart = false;
  1790. }
  1791. trace_loadvm_postcopy_handle_run_bh("after invalidate cache");
  1792. dirty_bitmap_mig_before_vm_start();
  1793. if (autostart) {
  1794. /* Hold onto your hats, starting the CPU */
  1795. vm_start();
  1796. } else {
  1797. /* leave it paused and let management decide when to start the CPU */
  1798. runstate_set(RUN_STATE_PAUSED);
  1799. }
  1800. qemu_bh_delete(mis->bh);
  1801. trace_loadvm_postcopy_handle_run_bh("return");
  1802. }
  1803. /* After all discards we can start running and asking for pages */
  1804. static int loadvm_postcopy_handle_run(MigrationIncomingState *mis)
  1805. {
  1806. PostcopyState ps = postcopy_state_get();
  1807. trace_loadvm_postcopy_handle_run();
  1808. if (ps != POSTCOPY_INCOMING_LISTENING) {
  1809. error_report("CMD_POSTCOPY_RUN in wrong postcopy state (%d)", ps);
  1810. return -1;
  1811. }
  1812. postcopy_state_set(POSTCOPY_INCOMING_RUNNING);
  1813. mis->bh = qemu_bh_new(loadvm_postcopy_handle_run_bh, mis);
  1814. qemu_bh_schedule(mis->bh);
  1815. /* We need to finish reading the stream from the package
  1816. * and also stop reading anything more from the stream that loaded the
  1817. * package (since it's now being read by the listener thread).
  1818. * LOADVM_QUIT will quit all the layers of nested loadvm loops.
  1819. */
  1820. return LOADVM_QUIT;
  1821. }
  1822. /* We must be with page_request_mutex held */
  1823. static gboolean postcopy_sync_page_req(gpointer key, gpointer value,
  1824. gpointer data)
  1825. {
  1826. MigrationIncomingState *mis = data;
  1827. void *host_addr = (void *) key;
  1828. ram_addr_t rb_offset;
  1829. RAMBlock *rb;
  1830. int ret;
  1831. rb = qemu_ram_block_from_host(host_addr, true, &rb_offset);
  1832. if (!rb) {
  1833. /*
  1834. * This should _never_ happen. However be nice for a migrating VM to
  1835. * not crash/assert. Post an error (note: intended to not use *_once
  1836. * because we do want to see all the illegal addresses; and this can
  1837. * never be triggered by the guest so we're safe) and move on next.
  1838. */
  1839. error_report("%s: illegal host addr %p", __func__, host_addr);
  1840. /* Try the next entry */
  1841. return FALSE;
  1842. }
  1843. ret = migrate_send_rp_message_req_pages(mis, rb, rb_offset);
  1844. if (ret) {
  1845. /* Please refer to above comment. */
  1846. error_report("%s: send rp message failed for addr %p",
  1847. __func__, host_addr);
  1848. return FALSE;
  1849. }
  1850. trace_postcopy_page_req_sync(host_addr);
  1851. return FALSE;
  1852. }
  1853. static void migrate_send_rp_req_pages_pending(MigrationIncomingState *mis)
  1854. {
  1855. WITH_QEMU_LOCK_GUARD(&mis->page_request_mutex) {
  1856. g_tree_foreach(mis->page_requested, postcopy_sync_page_req, mis);
  1857. }
  1858. }
  1859. static int loadvm_postcopy_handle_resume(MigrationIncomingState *mis)
  1860. {
  1861. if (mis->state != MIGRATION_STATUS_POSTCOPY_RECOVER) {
  1862. error_report("%s: illegal resume received", __func__);
  1863. /* Don't fail the load, only for this. */
  1864. return 0;
  1865. }
  1866. /*
  1867. * Reset the last_rb before we resend any page req to source again, since
  1868. * the source should have it reset already.
  1869. */
  1870. mis->last_rb = NULL;
  1871. /*
  1872. * This means source VM is ready to resume the postcopy migration.
  1873. */
  1874. migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_RECOVER,
  1875. MIGRATION_STATUS_POSTCOPY_ACTIVE);
  1876. trace_loadvm_postcopy_handle_resume();
  1877. /* Tell source that "we are ready" */
  1878. migrate_send_rp_resume_ack(mis, MIGRATION_RESUME_ACK_VALUE);
  1879. /*
  1880. * After a postcopy recovery, the source should have lost the postcopy
  1881. * queue, or potentially the requested pages could have been lost during
  1882. * the network down phase. Let's re-sync with the source VM by re-sending
  1883. * all the pending pages that we eagerly need, so these threads won't get
  1884. * blocked too long due to the recovery.
  1885. *
  1886. * Without this procedure, the faulted destination VM threads (waiting for
  1887. * page requests right before the postcopy is interrupted) can keep hanging
  1888. * until the pages are sent by the source during the background copying of
  1889. * pages, or another thread faulted on the same address accidentally.
  1890. */
  1891. migrate_send_rp_req_pages_pending(mis);
  1892. /*
  1893. * It's time to switch state and release the fault thread to continue
  1894. * service page faults. Note that this should be explicitly after the
  1895. * above call to migrate_send_rp_req_pages_pending(). In short:
  1896. * migrate_send_rp_message_req_pages() is not thread safe, yet.
  1897. */
  1898. qemu_sem_post(&mis->postcopy_pause_sem_fault);
  1899. if (migrate_postcopy_preempt()) {
  1900. /*
  1901. * The preempt channel will be created in async manner, now let's
  1902. * wait for it and make sure it's created.
  1903. */
  1904. qemu_sem_wait(&mis->postcopy_qemufile_dst_done);
  1905. assert(mis->postcopy_qemufile_dst);
  1906. /* Kick the fast ram load thread too */
  1907. qemu_sem_post(&mis->postcopy_pause_sem_fast_load);
  1908. }
  1909. return 0;
  1910. }
  1911. /**
  1912. * Immediately following this command is a blob of data containing an embedded
  1913. * chunk of migration stream; read it and load it.
  1914. *
  1915. * @mis: Incoming state
  1916. * @length: Length of packaged data to read
  1917. *
  1918. * Returns: Negative values on error
  1919. *
  1920. */
  1921. static int loadvm_handle_cmd_packaged(MigrationIncomingState *mis)
  1922. {
  1923. int ret;
  1924. size_t length;
  1925. QIOChannelBuffer *bioc;
  1926. length = qemu_get_be32(mis->from_src_file);
  1927. trace_loadvm_handle_cmd_packaged(length);
  1928. if (length > MAX_VM_CMD_PACKAGED_SIZE) {
  1929. error_report("Unreasonably large packaged state: %zu", length);
  1930. return -1;
  1931. }
  1932. bioc = qio_channel_buffer_new(length);
  1933. qio_channel_set_name(QIO_CHANNEL(bioc), "migration-loadvm-buffer");
  1934. ret = qemu_get_buffer(mis->from_src_file,
  1935. bioc->data,
  1936. length);
  1937. if (ret != length) {
  1938. object_unref(OBJECT(bioc));
  1939. error_report("CMD_PACKAGED: Buffer receive fail ret=%d length=%zu",
  1940. ret, length);
  1941. return (ret < 0) ? ret : -EAGAIN;
  1942. }
  1943. bioc->usage += length;
  1944. trace_loadvm_handle_cmd_packaged_received(ret);
  1945. QEMUFile *packf = qemu_file_new_input(QIO_CHANNEL(bioc));
  1946. ret = qemu_loadvm_state_main(packf, mis);
  1947. trace_loadvm_handle_cmd_packaged_main(ret);
  1948. qemu_fclose(packf);
  1949. object_unref(OBJECT(bioc));
  1950. return ret;
  1951. }
  1952. /*
  1953. * Handle request that source requests for recved_bitmap on
  1954. * destination. Payload format:
  1955. *
  1956. * len (1 byte) + ramblock_name (<255 bytes)
  1957. */
  1958. static int loadvm_handle_recv_bitmap(MigrationIncomingState *mis,
  1959. uint16_t len)
  1960. {
  1961. QEMUFile *file = mis->from_src_file;
  1962. RAMBlock *rb;
  1963. char block_name[256];
  1964. size_t cnt;
  1965. cnt = qemu_get_counted_string(file, block_name);
  1966. if (!cnt) {
  1967. error_report("%s: failed to read block name", __func__);
  1968. return -EINVAL;
  1969. }
  1970. /* Validate before using the data */
  1971. if (qemu_file_get_error(file)) {
  1972. return qemu_file_get_error(file);
  1973. }
  1974. if (len != cnt + 1) {
  1975. error_report("%s: invalid payload length (%d)", __func__, len);
  1976. return -EINVAL;
  1977. }
  1978. rb = qemu_ram_block_by_name(block_name);
  1979. if (!rb) {
  1980. error_report("%s: block '%s' not found", __func__, block_name);
  1981. return -EINVAL;
  1982. }
  1983. migrate_send_rp_recv_bitmap(mis, block_name);
  1984. trace_loadvm_handle_recv_bitmap(block_name);
  1985. return 0;
  1986. }
  1987. static int loadvm_process_enable_colo(MigrationIncomingState *mis)
  1988. {
  1989. int ret = migration_incoming_enable_colo();
  1990. if (!ret) {
  1991. ret = colo_init_ram_cache();
  1992. if (ret) {
  1993. migration_incoming_disable_colo();
  1994. }
  1995. }
  1996. return ret;
  1997. }
  1998. /*
  1999. * Process an incoming 'QEMU_VM_COMMAND'
  2000. * 0 just a normal return
  2001. * LOADVM_QUIT All good, but exit the loop
  2002. * <0 Error
  2003. */
  2004. static int loadvm_process_command(QEMUFile *f)
  2005. {
  2006. MigrationIncomingState *mis = migration_incoming_get_current();
  2007. uint16_t cmd;
  2008. uint16_t len;
  2009. uint32_t tmp32;
  2010. cmd = qemu_get_be16(f);
  2011. len = qemu_get_be16(f);
  2012. /* Check validity before continue processing of cmds */
  2013. if (qemu_file_get_error(f)) {
  2014. return qemu_file_get_error(f);
  2015. }
  2016. if (cmd >= MIG_CMD_MAX || cmd == MIG_CMD_INVALID) {
  2017. error_report("MIG_CMD 0x%x unknown (len 0x%x)", cmd, len);
  2018. return -EINVAL;
  2019. }
  2020. trace_loadvm_process_command(mig_cmd_args[cmd].name, len);
  2021. if (mig_cmd_args[cmd].len != -1 && mig_cmd_args[cmd].len != len) {
  2022. error_report("%s received with bad length - expecting %zu, got %d",
  2023. mig_cmd_args[cmd].name,
  2024. (size_t)mig_cmd_args[cmd].len, len);
  2025. return -ERANGE;
  2026. }
  2027. switch (cmd) {
  2028. case MIG_CMD_OPEN_RETURN_PATH:
  2029. if (mis->to_src_file) {
  2030. error_report("CMD_OPEN_RETURN_PATH called when RP already open");
  2031. /* Not really a problem, so don't give up */
  2032. return 0;
  2033. }
  2034. mis->to_src_file = qemu_file_get_return_path(f);
  2035. if (!mis->to_src_file) {
  2036. error_report("CMD_OPEN_RETURN_PATH failed");
  2037. return -1;
  2038. }
  2039. break;
  2040. case MIG_CMD_PING:
  2041. tmp32 = qemu_get_be32(f);
  2042. trace_loadvm_process_command_ping(tmp32);
  2043. if (!mis->to_src_file) {
  2044. error_report("CMD_PING (0x%x) received with no return path",
  2045. tmp32);
  2046. return -1;
  2047. }
  2048. migrate_send_rp_pong(mis, tmp32);
  2049. break;
  2050. case MIG_CMD_PACKAGED:
  2051. return loadvm_handle_cmd_packaged(mis);
  2052. case MIG_CMD_POSTCOPY_ADVISE:
  2053. return loadvm_postcopy_handle_advise(mis, len);
  2054. case MIG_CMD_POSTCOPY_LISTEN:
  2055. return loadvm_postcopy_handle_listen(mis);
  2056. case MIG_CMD_POSTCOPY_RUN:
  2057. return loadvm_postcopy_handle_run(mis);
  2058. case MIG_CMD_POSTCOPY_RAM_DISCARD:
  2059. return loadvm_postcopy_ram_handle_discard(mis, len);
  2060. case MIG_CMD_POSTCOPY_RESUME:
  2061. return loadvm_postcopy_handle_resume(mis);
  2062. case MIG_CMD_RECV_BITMAP:
  2063. return loadvm_handle_recv_bitmap(mis, len);
  2064. case MIG_CMD_ENABLE_COLO:
  2065. return loadvm_process_enable_colo(mis);
  2066. }
  2067. return 0;
  2068. }
  2069. /*
  2070. * Read a footer off the wire and check that it matches the expected section
  2071. *
  2072. * Returns: true if the footer was good
  2073. * false if there is a problem (and calls error_report to say why)
  2074. */
  2075. static bool check_section_footer(QEMUFile *f, SaveStateEntry *se)
  2076. {
  2077. int ret;
  2078. uint8_t read_mark;
  2079. uint32_t read_section_id;
  2080. if (!migrate_get_current()->send_section_footer) {
  2081. /* No footer to check */
  2082. return true;
  2083. }
  2084. read_mark = qemu_get_byte(f);
  2085. ret = qemu_file_get_error(f);
  2086. if (ret) {
  2087. error_report("%s: Read section footer failed: %d",
  2088. __func__, ret);
  2089. return false;
  2090. }
  2091. if (read_mark != QEMU_VM_SECTION_FOOTER) {
  2092. error_report("Missing section footer for %s", se->idstr);
  2093. return false;
  2094. }
  2095. read_section_id = qemu_get_be32(f);
  2096. if (read_section_id != se->load_section_id) {
  2097. error_report("Mismatched section id in footer for %s -"
  2098. " read 0x%x expected 0x%x",
  2099. se->idstr, read_section_id, se->load_section_id);
  2100. return false;
  2101. }
  2102. /* All good */
  2103. return true;
  2104. }
  2105. static int
  2106. qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis)
  2107. {
  2108. uint32_t instance_id, version_id, section_id;
  2109. SaveStateEntry *se;
  2110. char idstr[256];
  2111. int ret;
  2112. /* Read section start */
  2113. section_id = qemu_get_be32(f);
  2114. if (!qemu_get_counted_string(f, idstr)) {
  2115. error_report("Unable to read ID string for section %u",
  2116. section_id);
  2117. return -EINVAL;
  2118. }
  2119. instance_id = qemu_get_be32(f);
  2120. version_id = qemu_get_be32(f);
  2121. ret = qemu_file_get_error(f);
  2122. if (ret) {
  2123. error_report("%s: Failed to read instance/version ID: %d",
  2124. __func__, ret);
  2125. return ret;
  2126. }
  2127. trace_qemu_loadvm_state_section_startfull(section_id, idstr,
  2128. instance_id, version_id);
  2129. /* Find savevm section */
  2130. se = find_se(idstr, instance_id);
  2131. if (se == NULL) {
  2132. error_report("Unknown savevm section or instance '%s' %"PRIu32". "
  2133. "Make sure that your current VM setup matches your "
  2134. "saved VM setup, including any hotplugged devices",
  2135. idstr, instance_id);
  2136. return -EINVAL;
  2137. }
  2138. /* Validate version */
  2139. if (version_id > se->version_id) {
  2140. error_report("savevm: unsupported version %d for '%s' v%d",
  2141. version_id, idstr, se->version_id);
  2142. return -EINVAL;
  2143. }
  2144. se->load_version_id = version_id;
  2145. se->load_section_id = section_id;
  2146. /* Validate if it is a device's state */
  2147. if (xen_enabled() && se->is_ram) {
  2148. error_report("loadvm: %s RAM loading not allowed on Xen", idstr);
  2149. return -EINVAL;
  2150. }
  2151. ret = vmstate_load(f, se);
  2152. if (ret < 0) {
  2153. error_report("error while loading state for instance 0x%"PRIx32" of"
  2154. " device '%s'", instance_id, idstr);
  2155. return ret;
  2156. }
  2157. if (!check_section_footer(f, se)) {
  2158. return -EINVAL;
  2159. }
  2160. return 0;
  2161. }
  2162. static int
  2163. qemu_loadvm_section_part_end(QEMUFile *f, MigrationIncomingState *mis)
  2164. {
  2165. uint32_t section_id;
  2166. SaveStateEntry *se;
  2167. int ret;
  2168. section_id = qemu_get_be32(f);
  2169. ret = qemu_file_get_error(f);
  2170. if (ret) {
  2171. error_report("%s: Failed to read section ID: %d",
  2172. __func__, ret);
  2173. return ret;
  2174. }
  2175. trace_qemu_loadvm_state_section_partend(section_id);
  2176. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  2177. if (se->load_section_id == section_id) {
  2178. break;
  2179. }
  2180. }
  2181. if (se == NULL) {
  2182. error_report("Unknown savevm section %d", section_id);
  2183. return -EINVAL;
  2184. }
  2185. ret = vmstate_load(f, se);
  2186. if (ret < 0) {
  2187. error_report("error while loading state section id %d(%s)",
  2188. section_id, se->idstr);
  2189. return ret;
  2190. }
  2191. if (!check_section_footer(f, se)) {
  2192. return -EINVAL;
  2193. }
  2194. return 0;
  2195. }
  2196. static int qemu_loadvm_state_header(QEMUFile *f)
  2197. {
  2198. unsigned int v;
  2199. int ret;
  2200. v = qemu_get_be32(f);
  2201. if (v != QEMU_VM_FILE_MAGIC) {
  2202. error_report("Not a migration stream");
  2203. return -EINVAL;
  2204. }
  2205. v = qemu_get_be32(f);
  2206. if (v == QEMU_VM_FILE_VERSION_COMPAT) {
  2207. error_report("SaveVM v2 format is obsolete and don't work anymore");
  2208. return -ENOTSUP;
  2209. }
  2210. if (v != QEMU_VM_FILE_VERSION) {
  2211. error_report("Unsupported migration stream version");
  2212. return -ENOTSUP;
  2213. }
  2214. if (migrate_get_current()->send_configuration) {
  2215. if (qemu_get_byte(f) != QEMU_VM_CONFIGURATION) {
  2216. error_report("Configuration section missing");
  2217. qemu_loadvm_state_cleanup();
  2218. return -EINVAL;
  2219. }
  2220. ret = vmstate_load_state(f, &vmstate_configuration, &savevm_state, 0);
  2221. if (ret) {
  2222. qemu_loadvm_state_cleanup();
  2223. return ret;
  2224. }
  2225. }
  2226. return 0;
  2227. }
  2228. static int qemu_loadvm_state_setup(QEMUFile *f)
  2229. {
  2230. SaveStateEntry *se;
  2231. int ret;
  2232. trace_loadvm_state_setup();
  2233. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  2234. if (!se->ops || !se->ops->load_setup) {
  2235. continue;
  2236. }
  2237. if (se->ops->is_active) {
  2238. if (!se->ops->is_active(se->opaque)) {
  2239. continue;
  2240. }
  2241. }
  2242. ret = se->ops->load_setup(f, se->opaque);
  2243. if (ret < 0) {
  2244. qemu_file_set_error(f, ret);
  2245. error_report("Load state of device %s failed", se->idstr);
  2246. return ret;
  2247. }
  2248. }
  2249. return 0;
  2250. }
  2251. void qemu_loadvm_state_cleanup(void)
  2252. {
  2253. SaveStateEntry *se;
  2254. trace_loadvm_state_cleanup();
  2255. QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
  2256. if (se->ops && se->ops->load_cleanup) {
  2257. se->ops->load_cleanup(se->opaque);
  2258. }
  2259. }
  2260. }
  2261. /* Return true if we should continue the migration, or false. */
  2262. static bool postcopy_pause_incoming(MigrationIncomingState *mis)
  2263. {
  2264. int i;
  2265. trace_postcopy_pause_incoming();
  2266. assert(migrate_postcopy_ram());
  2267. /*
  2268. * Unregister yank with either from/to src would work, since ioc behind it
  2269. * is the same
  2270. */
  2271. migration_ioc_unregister_yank_from_file(mis->from_src_file);
  2272. assert(mis->from_src_file);
  2273. qemu_file_shutdown(mis->from_src_file);
  2274. qemu_fclose(mis->from_src_file);
  2275. mis->from_src_file = NULL;
  2276. assert(mis->to_src_file);
  2277. qemu_file_shutdown(mis->to_src_file);
  2278. qemu_mutex_lock(&mis->rp_mutex);
  2279. qemu_fclose(mis->to_src_file);
  2280. mis->to_src_file = NULL;
  2281. qemu_mutex_unlock(&mis->rp_mutex);
  2282. /*
  2283. * NOTE: this must happen before reset the PostcopyTmpPages below,
  2284. * otherwise it's racy to reset those fields when the fast load thread
  2285. * can be accessing it in parallel.
  2286. */
  2287. if (mis->postcopy_qemufile_dst) {
  2288. qemu_file_shutdown(mis->postcopy_qemufile_dst);
  2289. /* Take the mutex to make sure the fast ram load thread halted */
  2290. qemu_mutex_lock(&mis->postcopy_prio_thread_mutex);
  2291. migration_ioc_unregister_yank_from_file(mis->postcopy_qemufile_dst);
  2292. qemu_fclose(mis->postcopy_qemufile_dst);
  2293. mis->postcopy_qemufile_dst = NULL;
  2294. qemu_mutex_unlock(&mis->postcopy_prio_thread_mutex);
  2295. }
  2296. migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
  2297. MIGRATION_STATUS_POSTCOPY_PAUSED);
  2298. /* Notify the fault thread for the invalidated file handle */
  2299. postcopy_fault_thread_notify(mis);
  2300. /*
  2301. * If network is interrupted, any temp page we received will be useless
  2302. * because we didn't mark them as "received" in receivedmap. After a
  2303. * proper recovery later (which will sync src dirty bitmap with receivedmap
  2304. * on dest) these cached small pages will be resent again.
  2305. */
  2306. for (i = 0; i < mis->postcopy_channels; i++) {
  2307. postcopy_temp_page_reset(&mis->postcopy_tmp_pages[i]);
  2308. }
  2309. error_report("Detected IO failure for postcopy. "
  2310. "Migration paused.");
  2311. while (mis->state == MIGRATION_STATUS_POSTCOPY_PAUSED) {
  2312. qemu_sem_wait(&mis->postcopy_pause_sem_dst);
  2313. }
  2314. trace_postcopy_pause_incoming_continued();
  2315. return true;
  2316. }
  2317. int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis)
  2318. {
  2319. uint8_t section_type;
  2320. int ret = 0;
  2321. retry:
  2322. while (true) {
  2323. section_type = qemu_get_byte(f);
  2324. ret = qemu_file_get_error_obj_any(f, mis->postcopy_qemufile_dst, NULL);
  2325. if (ret) {
  2326. break;
  2327. }
  2328. trace_qemu_loadvm_state_section(section_type);
  2329. switch (section_type) {
  2330. case QEMU_VM_SECTION_START:
  2331. case QEMU_VM_SECTION_FULL:
  2332. ret = qemu_loadvm_section_start_full(f, mis);
  2333. if (ret < 0) {
  2334. goto out;
  2335. }
  2336. break;
  2337. case QEMU_VM_SECTION_PART:
  2338. case QEMU_VM_SECTION_END:
  2339. ret = qemu_loadvm_section_part_end(f, mis);
  2340. if (ret < 0) {
  2341. goto out;
  2342. }
  2343. break;
  2344. case QEMU_VM_COMMAND:
  2345. ret = loadvm_process_command(f);
  2346. trace_qemu_loadvm_state_section_command(ret);
  2347. if ((ret < 0) || (ret == LOADVM_QUIT)) {
  2348. goto out;
  2349. }
  2350. break;
  2351. case QEMU_VM_EOF:
  2352. /* This is the end of migration */
  2353. goto out;
  2354. default:
  2355. error_report("Unknown savevm section type %d", section_type);
  2356. ret = -EINVAL;
  2357. goto out;
  2358. }
  2359. }
  2360. out:
  2361. if (ret < 0) {
  2362. qemu_file_set_error(f, ret);
  2363. /* Cancel bitmaps incoming regardless of recovery */
  2364. dirty_bitmap_mig_cancel_incoming();
  2365. /*
  2366. * If we are during an active postcopy, then we pause instead
  2367. * of bail out to at least keep the VM's dirty data. Note
  2368. * that POSTCOPY_INCOMING_LISTENING stage is still not enough,
  2369. * during which we're still receiving device states and we
  2370. * still haven't yet started the VM on destination.
  2371. *
  2372. * Only RAM postcopy supports recovery. Still, if RAM postcopy is
  2373. * enabled, canceled bitmaps postcopy will not affect RAM postcopy
  2374. * recovering.
  2375. */
  2376. if (postcopy_state_get() == POSTCOPY_INCOMING_RUNNING &&
  2377. migrate_postcopy_ram() && postcopy_pause_incoming(mis)) {
  2378. /* Reset f to point to the newly created channel */
  2379. f = mis->from_src_file;
  2380. goto retry;
  2381. }
  2382. }
  2383. return ret;
  2384. }
  2385. int qemu_loadvm_state(QEMUFile *f)
  2386. {
  2387. MigrationIncomingState *mis = migration_incoming_get_current();
  2388. Error *local_err = NULL;
  2389. int ret;
  2390. if (qemu_savevm_state_blocked(&local_err)) {
  2391. error_report_err(local_err);
  2392. return -EINVAL;
  2393. }
  2394. ret = qemu_loadvm_state_header(f);
  2395. if (ret) {
  2396. return ret;
  2397. }
  2398. if (qemu_loadvm_state_setup(f) != 0) {
  2399. return -EINVAL;
  2400. }
  2401. cpu_synchronize_all_pre_loadvm();
  2402. ret = qemu_loadvm_state_main(f, mis);
  2403. qemu_event_set(&mis->main_thread_load_event);
  2404. trace_qemu_loadvm_state_post_main(ret);
  2405. if (mis->have_listen_thread) {
  2406. /* Listen thread still going, can't clean up yet */
  2407. return ret;
  2408. }
  2409. if (ret == 0) {
  2410. ret = qemu_file_get_error(f);
  2411. }
  2412. /*
  2413. * Try to read in the VMDESC section as well, so that dumping tools that
  2414. * intercept our migration stream have the chance to see it.
  2415. */
  2416. /* We've got to be careful; if we don't read the data and just shut the fd
  2417. * then the sender can error if we close while it's still sending.
  2418. * We also mustn't read data that isn't there; some transports (RDMA)
  2419. * will stall waiting for that data when the source has already closed.
  2420. */
  2421. if (ret == 0 && should_send_vmdesc()) {
  2422. uint8_t *buf;
  2423. uint32_t size;
  2424. uint8_t section_type = qemu_get_byte(f);
  2425. if (section_type != QEMU_VM_VMDESCRIPTION) {
  2426. error_report("Expected vmdescription section, but got %d",
  2427. section_type);
  2428. /*
  2429. * It doesn't seem worth failing at this point since
  2430. * we apparently have an otherwise valid VM state
  2431. */
  2432. } else {
  2433. buf = g_malloc(0x1000);
  2434. size = qemu_get_be32(f);
  2435. while (size > 0) {
  2436. uint32_t read_chunk = MIN(size, 0x1000);
  2437. qemu_get_buffer(f, buf, read_chunk);
  2438. size -= read_chunk;
  2439. }
  2440. g_free(buf);
  2441. }
  2442. }
  2443. qemu_loadvm_state_cleanup();
  2444. cpu_synchronize_all_post_init();
  2445. return ret;
  2446. }
  2447. int qemu_load_device_state(QEMUFile *f)
  2448. {
  2449. MigrationIncomingState *mis = migration_incoming_get_current();
  2450. int ret;
  2451. /* Load QEMU_VM_SECTION_FULL section */
  2452. ret = qemu_loadvm_state_main(f, mis);
  2453. if (ret < 0) {
  2454. error_report("Failed to load device state: %d", ret);
  2455. return ret;
  2456. }
  2457. cpu_synchronize_all_post_init();
  2458. return 0;
  2459. }
  2460. bool save_snapshot(const char *name, bool overwrite, const char *vmstate,
  2461. bool has_devices, strList *devices, Error **errp)
  2462. {
  2463. BlockDriverState *bs;
  2464. QEMUSnapshotInfo sn1, *sn = &sn1;
  2465. int ret = -1, ret2;
  2466. QEMUFile *f;
  2467. int saved_vm_running;
  2468. uint64_t vm_state_size;
  2469. g_autoptr(GDateTime) now = g_date_time_new_now_local();
  2470. AioContext *aio_context;
  2471. GLOBAL_STATE_CODE();
  2472. if (migration_is_blocked(errp)) {
  2473. return false;
  2474. }
  2475. if (!replay_can_snapshot()) {
  2476. error_setg(errp, "Record/replay does not allow making snapshot "
  2477. "right now. Try once more later.");
  2478. return false;
  2479. }
  2480. if (!bdrv_all_can_snapshot(has_devices, devices, errp)) {
  2481. return false;
  2482. }
  2483. /* Delete old snapshots of the same name */
  2484. if (name) {
  2485. if (overwrite) {
  2486. if (bdrv_all_delete_snapshot(name, has_devices,
  2487. devices, errp) < 0) {
  2488. return false;
  2489. }
  2490. } else {
  2491. ret2 = bdrv_all_has_snapshot(name, has_devices, devices, errp);
  2492. if (ret2 < 0) {
  2493. return false;
  2494. }
  2495. if (ret2 == 1) {
  2496. error_setg(errp,
  2497. "Snapshot '%s' already exists in one or more devices",
  2498. name);
  2499. return false;
  2500. }
  2501. }
  2502. }
  2503. bs = bdrv_all_find_vmstate_bs(vmstate, has_devices, devices, errp);
  2504. if (bs == NULL) {
  2505. return false;
  2506. }
  2507. aio_context = bdrv_get_aio_context(bs);
  2508. saved_vm_running = runstate_is_running();
  2509. ret = global_state_store();
  2510. if (ret) {
  2511. error_setg(errp, "Error saving global state");
  2512. return false;
  2513. }
  2514. vm_stop(RUN_STATE_SAVE_VM);
  2515. bdrv_drain_all_begin();
  2516. aio_context_acquire(aio_context);
  2517. memset(sn, 0, sizeof(*sn));
  2518. /* fill auxiliary fields */
  2519. sn->date_sec = g_date_time_to_unix(now);
  2520. sn->date_nsec = g_date_time_get_microsecond(now) * 1000;
  2521. sn->vm_clock_nsec = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
  2522. if (replay_mode != REPLAY_MODE_NONE) {
  2523. sn->icount = replay_get_current_icount();
  2524. } else {
  2525. sn->icount = -1ULL;
  2526. }
  2527. if (name) {
  2528. pstrcpy(sn->name, sizeof(sn->name), name);
  2529. } else {
  2530. g_autofree char *autoname = g_date_time_format(now, "vm-%Y%m%d%H%M%S");
  2531. pstrcpy(sn->name, sizeof(sn->name), autoname);
  2532. }
  2533. /* save the VM state */
  2534. f = qemu_fopen_bdrv(bs, 1);
  2535. if (!f) {
  2536. error_setg(errp, "Could not open VM state file");
  2537. goto the_end;
  2538. }
  2539. ret = qemu_savevm_state(f, errp);
  2540. vm_state_size = qemu_file_total_transferred(f);
  2541. ret2 = qemu_fclose(f);
  2542. if (ret < 0) {
  2543. goto the_end;
  2544. }
  2545. if (ret2 < 0) {
  2546. ret = ret2;
  2547. goto the_end;
  2548. }
  2549. /* The bdrv_all_create_snapshot() call that follows acquires the AioContext
  2550. * for itself. BDRV_POLL_WHILE() does not support nested locking because
  2551. * it only releases the lock once. Therefore synchronous I/O will deadlock
  2552. * unless we release the AioContext before bdrv_all_create_snapshot().
  2553. */
  2554. aio_context_release(aio_context);
  2555. aio_context = NULL;
  2556. ret = bdrv_all_create_snapshot(sn, bs, vm_state_size,
  2557. has_devices, devices, errp);
  2558. if (ret < 0) {
  2559. bdrv_all_delete_snapshot(sn->name, has_devices, devices, NULL);
  2560. goto the_end;
  2561. }
  2562. ret = 0;
  2563. the_end:
  2564. if (aio_context) {
  2565. aio_context_release(aio_context);
  2566. }
  2567. bdrv_drain_all_end();
  2568. if (saved_vm_running) {
  2569. vm_start();
  2570. }
  2571. return ret == 0;
  2572. }
  2573. void qmp_xen_save_devices_state(const char *filename, bool has_live, bool live,
  2574. Error **errp)
  2575. {
  2576. QEMUFile *f;
  2577. QIOChannelFile *ioc;
  2578. int saved_vm_running;
  2579. int ret;
  2580. if (!has_live) {
  2581. /* live default to true so old version of Xen tool stack can have a
  2582. * successful live migration */
  2583. live = true;
  2584. }
  2585. saved_vm_running = runstate_is_running();
  2586. vm_stop(RUN_STATE_SAVE_VM);
  2587. global_state_store_running();
  2588. ioc = qio_channel_file_new_path(filename, O_WRONLY | O_CREAT | O_TRUNC,
  2589. 0660, errp);
  2590. if (!ioc) {
  2591. goto the_end;
  2592. }
  2593. qio_channel_set_name(QIO_CHANNEL(ioc), "migration-xen-save-state");
  2594. f = qemu_file_new_output(QIO_CHANNEL(ioc));
  2595. object_unref(OBJECT(ioc));
  2596. ret = qemu_save_device_state(f);
  2597. if (ret < 0 || qemu_fclose(f) < 0) {
  2598. error_setg(errp, QERR_IO_ERROR);
  2599. } else {
  2600. /* libxl calls the QMP command "stop" before calling
  2601. * "xen-save-devices-state" and in case of migration failure, libxl
  2602. * would call "cont".
  2603. * So call bdrv_inactivate_all (release locks) here to let the other
  2604. * side of the migration take control of the images.
  2605. */
  2606. if (live && !saved_vm_running) {
  2607. ret = bdrv_inactivate_all();
  2608. if (ret) {
  2609. error_setg(errp, "%s: bdrv_inactivate_all() failed (%d)",
  2610. __func__, ret);
  2611. }
  2612. }
  2613. }
  2614. the_end:
  2615. if (saved_vm_running) {
  2616. vm_start();
  2617. }
  2618. }
  2619. void qmp_xen_load_devices_state(const char *filename, Error **errp)
  2620. {
  2621. QEMUFile *f;
  2622. QIOChannelFile *ioc;
  2623. int ret;
  2624. /* Guest must be paused before loading the device state; the RAM state
  2625. * will already have been loaded by xc
  2626. */
  2627. if (runstate_is_running()) {
  2628. error_setg(errp, "Cannot update device state while vm is running");
  2629. return;
  2630. }
  2631. vm_stop(RUN_STATE_RESTORE_VM);
  2632. ioc = qio_channel_file_new_path(filename, O_RDONLY | O_BINARY, 0, errp);
  2633. if (!ioc) {
  2634. return;
  2635. }
  2636. qio_channel_set_name(QIO_CHANNEL(ioc), "migration-xen-load-state");
  2637. f = qemu_file_new_input(QIO_CHANNEL(ioc));
  2638. object_unref(OBJECT(ioc));
  2639. ret = qemu_loadvm_state(f);
  2640. qemu_fclose(f);
  2641. if (ret < 0) {
  2642. error_setg(errp, QERR_IO_ERROR);
  2643. }
  2644. migration_incoming_state_destroy();
  2645. }
  2646. bool load_snapshot(const char *name, const char *vmstate,
  2647. bool has_devices, strList *devices, Error **errp)
  2648. {
  2649. BlockDriverState *bs_vm_state;
  2650. QEMUSnapshotInfo sn;
  2651. QEMUFile *f;
  2652. int ret;
  2653. AioContext *aio_context;
  2654. MigrationIncomingState *mis = migration_incoming_get_current();
  2655. if (!bdrv_all_can_snapshot(has_devices, devices, errp)) {
  2656. return false;
  2657. }
  2658. ret = bdrv_all_has_snapshot(name, has_devices, devices, errp);
  2659. if (ret < 0) {
  2660. return false;
  2661. }
  2662. if (ret == 0) {
  2663. error_setg(errp, "Snapshot '%s' does not exist in one or more devices",
  2664. name);
  2665. return false;
  2666. }
  2667. bs_vm_state = bdrv_all_find_vmstate_bs(vmstate, has_devices, devices, errp);
  2668. if (!bs_vm_state) {
  2669. return false;
  2670. }
  2671. aio_context = bdrv_get_aio_context(bs_vm_state);
  2672. /* Don't even try to load empty VM states */
  2673. aio_context_acquire(aio_context);
  2674. ret = bdrv_snapshot_find(bs_vm_state, &sn, name);
  2675. aio_context_release(aio_context);
  2676. if (ret < 0) {
  2677. return false;
  2678. } else if (sn.vm_state_size == 0) {
  2679. error_setg(errp, "This is a disk-only snapshot. Revert to it "
  2680. " offline using qemu-img");
  2681. return false;
  2682. }
  2683. /*
  2684. * Flush the record/replay queue. Now the VM state is going
  2685. * to change. Therefore we don't need to preserve its consistency
  2686. */
  2687. replay_flush_events();
  2688. /* Flush all IO requests so they don't interfere with the new state. */
  2689. bdrv_drain_all_begin();
  2690. ret = bdrv_all_goto_snapshot(name, has_devices, devices, errp);
  2691. if (ret < 0) {
  2692. goto err_drain;
  2693. }
  2694. /* restore the VM state */
  2695. f = qemu_fopen_bdrv(bs_vm_state, 0);
  2696. if (!f) {
  2697. error_setg(errp, "Could not open VM state file");
  2698. goto err_drain;
  2699. }
  2700. qemu_system_reset(SHUTDOWN_CAUSE_SNAPSHOT_LOAD);
  2701. mis->from_src_file = f;
  2702. if (!yank_register_instance(MIGRATION_YANK_INSTANCE, errp)) {
  2703. ret = -EINVAL;
  2704. goto err_drain;
  2705. }
  2706. aio_context_acquire(aio_context);
  2707. ret = qemu_loadvm_state(f);
  2708. migration_incoming_state_destroy();
  2709. aio_context_release(aio_context);
  2710. bdrv_drain_all_end();
  2711. if (ret < 0) {
  2712. error_setg(errp, "Error %d while loading VM state", ret);
  2713. return false;
  2714. }
  2715. return true;
  2716. err_drain:
  2717. bdrv_drain_all_end();
  2718. return false;
  2719. }
  2720. bool delete_snapshot(const char *name, bool has_devices,
  2721. strList *devices, Error **errp)
  2722. {
  2723. if (!bdrv_all_can_snapshot(has_devices, devices, errp)) {
  2724. return false;
  2725. }
  2726. if (bdrv_all_delete_snapshot(name, has_devices, devices, errp) < 0) {
  2727. return false;
  2728. }
  2729. return true;
  2730. }
  2731. void vmstate_register_ram(MemoryRegion *mr, DeviceState *dev)
  2732. {
  2733. qemu_ram_set_idstr(mr->ram_block,
  2734. memory_region_name(mr), dev);
  2735. qemu_ram_set_migratable(mr->ram_block);
  2736. }
  2737. void vmstate_unregister_ram(MemoryRegion *mr, DeviceState *dev)
  2738. {
  2739. qemu_ram_unset_idstr(mr->ram_block);
  2740. qemu_ram_unset_migratable(mr->ram_block);
  2741. }
  2742. void vmstate_register_ram_global(MemoryRegion *mr)
  2743. {
  2744. vmstate_register_ram(mr, NULL);
  2745. }
  2746. bool vmstate_check_only_migratable(const VMStateDescription *vmsd)
  2747. {
  2748. /* check needed if --only-migratable is specified */
  2749. if (!only_migratable) {
  2750. return true;
  2751. }
  2752. return !(vmsd && vmsd->unmigratable);
  2753. }
  2754. typedef struct SnapshotJob {
  2755. Job common;
  2756. char *tag;
  2757. char *vmstate;
  2758. strList *devices;
  2759. Coroutine *co;
  2760. Error **errp;
  2761. bool ret;
  2762. } SnapshotJob;
  2763. static void qmp_snapshot_job_free(SnapshotJob *s)
  2764. {
  2765. g_free(s->tag);
  2766. g_free(s->vmstate);
  2767. qapi_free_strList(s->devices);
  2768. }
  2769. static void snapshot_load_job_bh(void *opaque)
  2770. {
  2771. Job *job = opaque;
  2772. SnapshotJob *s = container_of(job, SnapshotJob, common);
  2773. int orig_vm_running;
  2774. job_progress_set_remaining(&s->common, 1);
  2775. orig_vm_running = runstate_is_running();
  2776. vm_stop(RUN_STATE_RESTORE_VM);
  2777. s->ret = load_snapshot(s->tag, s->vmstate, true, s->devices, s->errp);
  2778. if (s->ret && orig_vm_running) {
  2779. vm_start();
  2780. }
  2781. job_progress_update(&s->common, 1);
  2782. qmp_snapshot_job_free(s);
  2783. aio_co_wake(s->co);
  2784. }
  2785. static void snapshot_save_job_bh(void *opaque)
  2786. {
  2787. Job *job = opaque;
  2788. SnapshotJob *s = container_of(job, SnapshotJob, common);
  2789. job_progress_set_remaining(&s->common, 1);
  2790. s->ret = save_snapshot(s->tag, false, s->vmstate,
  2791. true, s->devices, s->errp);
  2792. job_progress_update(&s->common, 1);
  2793. qmp_snapshot_job_free(s);
  2794. aio_co_wake(s->co);
  2795. }
  2796. static void snapshot_delete_job_bh(void *opaque)
  2797. {
  2798. Job *job = opaque;
  2799. SnapshotJob *s = container_of(job, SnapshotJob, common);
  2800. job_progress_set_remaining(&s->common, 1);
  2801. s->ret = delete_snapshot(s->tag, true, s->devices, s->errp);
  2802. job_progress_update(&s->common, 1);
  2803. qmp_snapshot_job_free(s);
  2804. aio_co_wake(s->co);
  2805. }
  2806. static int coroutine_fn snapshot_save_job_run(Job *job, Error **errp)
  2807. {
  2808. SnapshotJob *s = container_of(job, SnapshotJob, common);
  2809. s->errp = errp;
  2810. s->co = qemu_coroutine_self();
  2811. aio_bh_schedule_oneshot(qemu_get_aio_context(),
  2812. snapshot_save_job_bh, job);
  2813. qemu_coroutine_yield();
  2814. return s->ret ? 0 : -1;
  2815. }
  2816. static int coroutine_fn snapshot_load_job_run(Job *job, Error **errp)
  2817. {
  2818. SnapshotJob *s = container_of(job, SnapshotJob, common);
  2819. s->errp = errp;
  2820. s->co = qemu_coroutine_self();
  2821. aio_bh_schedule_oneshot(qemu_get_aio_context(),
  2822. snapshot_load_job_bh, job);
  2823. qemu_coroutine_yield();
  2824. return s->ret ? 0 : -1;
  2825. }
  2826. static int coroutine_fn snapshot_delete_job_run(Job *job, Error **errp)
  2827. {
  2828. SnapshotJob *s = container_of(job, SnapshotJob, common);
  2829. s->errp = errp;
  2830. s->co = qemu_coroutine_self();
  2831. aio_bh_schedule_oneshot(qemu_get_aio_context(),
  2832. snapshot_delete_job_bh, job);
  2833. qemu_coroutine_yield();
  2834. return s->ret ? 0 : -1;
  2835. }
  2836. static const JobDriver snapshot_load_job_driver = {
  2837. .instance_size = sizeof(SnapshotJob),
  2838. .job_type = JOB_TYPE_SNAPSHOT_LOAD,
  2839. .run = snapshot_load_job_run,
  2840. };
  2841. static const JobDriver snapshot_save_job_driver = {
  2842. .instance_size = sizeof(SnapshotJob),
  2843. .job_type = JOB_TYPE_SNAPSHOT_SAVE,
  2844. .run = snapshot_save_job_run,
  2845. };
  2846. static const JobDriver snapshot_delete_job_driver = {
  2847. .instance_size = sizeof(SnapshotJob),
  2848. .job_type = JOB_TYPE_SNAPSHOT_DELETE,
  2849. .run = snapshot_delete_job_run,
  2850. };
  2851. void qmp_snapshot_save(const char *job_id,
  2852. const char *tag,
  2853. const char *vmstate,
  2854. strList *devices,
  2855. Error **errp)
  2856. {
  2857. SnapshotJob *s;
  2858. s = job_create(job_id, &snapshot_save_job_driver, NULL,
  2859. qemu_get_aio_context(), JOB_MANUAL_DISMISS,
  2860. NULL, NULL, errp);
  2861. if (!s) {
  2862. return;
  2863. }
  2864. s->tag = g_strdup(tag);
  2865. s->vmstate = g_strdup(vmstate);
  2866. s->devices = QAPI_CLONE(strList, devices);
  2867. job_start(&s->common);
  2868. }
  2869. void qmp_snapshot_load(const char *job_id,
  2870. const char *tag,
  2871. const char *vmstate,
  2872. strList *devices,
  2873. Error **errp)
  2874. {
  2875. SnapshotJob *s;
  2876. s = job_create(job_id, &snapshot_load_job_driver, NULL,
  2877. qemu_get_aio_context(), JOB_MANUAL_DISMISS,
  2878. NULL, NULL, errp);
  2879. if (!s) {
  2880. return;
  2881. }
  2882. s->tag = g_strdup(tag);
  2883. s->vmstate = g_strdup(vmstate);
  2884. s->devices = QAPI_CLONE(strList, devices);
  2885. job_start(&s->common);
  2886. }
  2887. void qmp_snapshot_delete(const char *job_id,
  2888. const char *tag,
  2889. strList *devices,
  2890. Error **errp)
  2891. {
  2892. SnapshotJob *s;
  2893. s = job_create(job_id, &snapshot_delete_job_driver, NULL,
  2894. qemu_get_aio_context(), JOB_MANUAL_DISMISS,
  2895. NULL, NULL, errp);
  2896. if (!s) {
  2897. return;
  2898. }
  2899. s->tag = g_strdup(tag);
  2900. s->devices = QAPI_CLONE(strList, devices);
  2901. job_start(&s->common);
  2902. }