migration.c 111 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635
  1. /*
  2. * QEMU live migration
  3. *
  4. * Copyright IBM, Corp. 2008
  5. *
  6. * Authors:
  7. * Anthony Liguori <aliguori@us.ibm.com>
  8. *
  9. * This work is licensed under the terms of the GNU GPL, version 2. See
  10. * the COPYING file in the top-level directory.
  11. *
  12. * Contributions after 2012-01-13 are licensed under the terms of the
  13. * GNU GPL, version 2 or (at your option) any later version.
  14. */
  15. #include "qemu/osdep.h"
  16. #include "qemu/cutils.h"
  17. #include "qemu/error-report.h"
  18. #include "qemu/main-loop.h"
  19. #include "migration/blocker.h"
  20. #include "exec.h"
  21. #include "fd.h"
  22. #include "socket.h"
  23. #include "sysemu/runstate.h"
  24. #include "sysemu/sysemu.h"
  25. #include "rdma.h"
  26. #include "ram.h"
  27. #include "migration/global_state.h"
  28. #include "migration/misc.h"
  29. #include "migration.h"
  30. #include "savevm.h"
  31. #include "qemu-file-channel.h"
  32. #include "qemu-file.h"
  33. #include "migration/vmstate.h"
  34. #include "block/block.h"
  35. #include "qapi/error.h"
  36. #include "qapi/clone-visitor.h"
  37. #include "qapi/qapi-visit-sockets.h"
  38. #include "qapi/qapi-commands-migration.h"
  39. #include "qapi/qapi-events-migration.h"
  40. #include "qapi/qmp/qerror.h"
  41. #include "qapi/qmp/qnull.h"
  42. #include "qemu/rcu.h"
  43. #include "block.h"
  44. #include "postcopy-ram.h"
  45. #include "qemu/thread.h"
  46. #include "trace.h"
  47. #include "exec/target_page.h"
  48. #include "io/channel-buffer.h"
  49. #include "migration/colo.h"
  50. #include "hw/boards.h"
  51. #include "hw/qdev-properties.h"
  52. #include "monitor/monitor.h"
  53. #include "net/announce.h"
  54. #include "qemu/queue.h"
  55. #define MAX_THROTTLE (32 << 20) /* Migration transfer speed throttling */
  56. /* Amount of time to allocate to each "chunk" of bandwidth-throttled
  57. * data. */
  58. #define BUFFER_DELAY 100
  59. #define XFER_LIMIT_RATIO (1000 / BUFFER_DELAY)
  60. /* Time in milliseconds we are allowed to stop the source,
  61. * for sending the last part */
  62. #define DEFAULT_MIGRATE_SET_DOWNTIME 300
  63. /* Maximum migrate downtime set to 2000 seconds */
  64. #define MAX_MIGRATE_DOWNTIME_SECONDS 2000
  65. #define MAX_MIGRATE_DOWNTIME (MAX_MIGRATE_DOWNTIME_SECONDS * 1000)
  66. /* Default compression thread count */
  67. #define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8
  68. /* Default decompression thread count, usually decompression is at
  69. * least 4 times as fast as compression.*/
  70. #define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2
  71. /*0: means nocompress, 1: best speed, ... 9: best compress ratio */
  72. #define DEFAULT_MIGRATE_COMPRESS_LEVEL 1
  73. /* Define default autoconverge cpu throttle migration parameters */
  74. #define DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL 20
  75. #define DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT 10
  76. #define DEFAULT_MIGRATE_MAX_CPU_THROTTLE 99
  77. /* Migration XBZRLE default cache size */
  78. #define DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE (64 * 1024 * 1024)
  79. /* The delay time (in ms) between two COLO checkpoints */
  80. #define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY (200 * 100)
  81. #define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2
  82. /* Background transfer rate for postcopy, 0 means unlimited, note
  83. * that page requests can still exceed this limit.
  84. */
  85. #define DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH 0
  86. /*
  87. * Parameters for self_announce_delay giving a stream of RARP/ARP
  88. * packets after migration.
  89. */
  90. #define DEFAULT_MIGRATE_ANNOUNCE_INITIAL 50
  91. #define DEFAULT_MIGRATE_ANNOUNCE_MAX 550
  92. #define DEFAULT_MIGRATE_ANNOUNCE_ROUNDS 5
  93. #define DEFAULT_MIGRATE_ANNOUNCE_STEP 100
  94. static NotifierList migration_state_notifiers =
  95. NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);
  96. static bool deferred_incoming;
  97. /* Messages sent on the return path from destination to source */
  98. enum mig_rp_message_type {
  99. MIG_RP_MSG_INVALID = 0, /* Must be 0 */
  100. MIG_RP_MSG_SHUT, /* sibling will not send any more RP messages */
  101. MIG_RP_MSG_PONG, /* Response to a PING; data (seq: be32 ) */
  102. MIG_RP_MSG_REQ_PAGES_ID, /* data (start: be64, len: be32, id: string) */
  103. MIG_RP_MSG_REQ_PAGES, /* data (start: be64, len: be32) */
  104. MIG_RP_MSG_RECV_BITMAP, /* send recved_bitmap back to source */
  105. MIG_RP_MSG_RESUME_ACK, /* tell source that we are ready to resume */
  106. MIG_RP_MSG_MAX
  107. };
  108. /* When we add fault tolerance, we could have several
  109. migrations at once. For now we don't need to add
  110. dynamic creation of migration */
  111. static MigrationState *current_migration;
  112. static MigrationIncomingState *current_incoming;
  113. static bool migration_object_check(MigrationState *ms, Error **errp);
  114. static int migration_maybe_pause(MigrationState *s,
  115. int *current_active_state,
  116. int new_state);
  117. static void migrate_fd_cancel(MigrationState *s);
  118. void migration_object_init(void)
  119. {
  120. MachineState *ms = MACHINE(qdev_get_machine());
  121. Error *err = NULL;
  122. /* This can only be called once. */
  123. assert(!current_migration);
  124. current_migration = MIGRATION_OBJ(object_new(TYPE_MIGRATION));
  125. /*
  126. * Init the migrate incoming object as well no matter whether
  127. * we'll use it or not.
  128. */
  129. assert(!current_incoming);
  130. current_incoming = g_new0(MigrationIncomingState, 1);
  131. current_incoming->state = MIGRATION_STATUS_NONE;
  132. current_incoming->postcopy_remote_fds =
  133. g_array_new(FALSE, TRUE, sizeof(struct PostCopyFD));
  134. qemu_mutex_init(&current_incoming->rp_mutex);
  135. qemu_event_init(&current_incoming->main_thread_load_event, false);
  136. qemu_sem_init(&current_incoming->postcopy_pause_sem_dst, 0);
  137. qemu_sem_init(&current_incoming->postcopy_pause_sem_fault, 0);
  138. init_dirty_bitmap_incoming_migration();
  139. if (!migration_object_check(current_migration, &err)) {
  140. error_report_err(err);
  141. exit(1);
  142. }
  143. /*
  144. * We cannot really do this in migration_instance_init() since at
  145. * that time global properties are not yet applied, then this
  146. * value will be definitely replaced by something else.
  147. */
  148. if (ms->enforce_config_section) {
  149. current_migration->send_configuration = true;
  150. }
  151. }
  152. void migration_shutdown(void)
  153. {
  154. /*
  155. * Cancel the current migration - that will (eventually)
  156. * stop the migration using this structure
  157. */
  158. migrate_fd_cancel(current_migration);
  159. object_unref(OBJECT(current_migration));
  160. }
  161. /* For outgoing */
  162. MigrationState *migrate_get_current(void)
  163. {
  164. /* This can only be called after the object created. */
  165. assert(current_migration);
  166. return current_migration;
  167. }
  168. MigrationIncomingState *migration_incoming_get_current(void)
  169. {
  170. assert(current_incoming);
  171. return current_incoming;
  172. }
  173. void migration_incoming_state_destroy(void)
  174. {
  175. struct MigrationIncomingState *mis = migration_incoming_get_current();
  176. if (mis->to_src_file) {
  177. /* Tell source that we are done */
  178. migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0);
  179. qemu_fclose(mis->to_src_file);
  180. mis->to_src_file = NULL;
  181. }
  182. if (mis->from_src_file) {
  183. qemu_fclose(mis->from_src_file);
  184. mis->from_src_file = NULL;
  185. }
  186. if (mis->postcopy_remote_fds) {
  187. g_array_free(mis->postcopy_remote_fds, TRUE);
  188. mis->postcopy_remote_fds = NULL;
  189. }
  190. qemu_event_reset(&mis->main_thread_load_event);
  191. if (mis->socket_address_list) {
  192. qapi_free_SocketAddressList(mis->socket_address_list);
  193. mis->socket_address_list = NULL;
  194. }
  195. }
  196. static void migrate_generate_event(int new_state)
  197. {
  198. if (migrate_use_events()) {
  199. qapi_event_send_migration(new_state);
  200. }
  201. }
  202. static bool migrate_late_block_activate(void)
  203. {
  204. MigrationState *s;
  205. s = migrate_get_current();
  206. return s->enabled_capabilities[
  207. MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE];
  208. }
  209. /*
  210. * Called on -incoming with a defer: uri.
  211. * The migration can be started later after any parameters have been
  212. * changed.
  213. */
  214. static void deferred_incoming_migration(Error **errp)
  215. {
  216. if (deferred_incoming) {
  217. error_setg(errp, "Incoming migration already deferred");
  218. }
  219. deferred_incoming = true;
  220. }
  221. /*
  222. * Send a message on the return channel back to the source
  223. * of the migration.
  224. */
  225. static int migrate_send_rp_message(MigrationIncomingState *mis,
  226. enum mig_rp_message_type message_type,
  227. uint16_t len, void *data)
  228. {
  229. int ret = 0;
  230. trace_migrate_send_rp_message((int)message_type, len);
  231. qemu_mutex_lock(&mis->rp_mutex);
  232. /*
  233. * It's possible that the file handle got lost due to network
  234. * failures.
  235. */
  236. if (!mis->to_src_file) {
  237. ret = -EIO;
  238. goto error;
  239. }
  240. qemu_put_be16(mis->to_src_file, (unsigned int)message_type);
  241. qemu_put_be16(mis->to_src_file, len);
  242. qemu_put_buffer(mis->to_src_file, data, len);
  243. qemu_fflush(mis->to_src_file);
  244. /* It's possible that qemu file got error during sending */
  245. ret = qemu_file_get_error(mis->to_src_file);
  246. error:
  247. qemu_mutex_unlock(&mis->rp_mutex);
  248. return ret;
  249. }
  250. /* Request a range of pages from the source VM at the given
  251. * start address.
  252. * rbname: Name of the RAMBlock to request the page in, if NULL it's the same
  253. * as the last request (a name must have been given previously)
  254. * Start: Address offset within the RB
  255. * Len: Length in bytes required - must be a multiple of pagesize
  256. */
  257. int migrate_send_rp_req_pages(MigrationIncomingState *mis, const char *rbname,
  258. ram_addr_t start, size_t len)
  259. {
  260. uint8_t bufc[12 + 1 + 255]; /* start (8), len (4), rbname up to 256 */
  261. size_t msglen = 12; /* start + len */
  262. enum mig_rp_message_type msg_type;
  263. *(uint64_t *)bufc = cpu_to_be64((uint64_t)start);
  264. *(uint32_t *)(bufc + 8) = cpu_to_be32((uint32_t)len);
  265. if (rbname) {
  266. int rbname_len = strlen(rbname);
  267. assert(rbname_len < 256);
  268. bufc[msglen++] = rbname_len;
  269. memcpy(bufc + msglen, rbname, rbname_len);
  270. msglen += rbname_len;
  271. msg_type = MIG_RP_MSG_REQ_PAGES_ID;
  272. } else {
  273. msg_type = MIG_RP_MSG_REQ_PAGES;
  274. }
  275. return migrate_send_rp_message(mis, msg_type, msglen, bufc);
  276. }
  277. static bool migration_colo_enabled;
  278. bool migration_incoming_colo_enabled(void)
  279. {
  280. return migration_colo_enabled;
  281. }
  282. void migration_incoming_disable_colo(void)
  283. {
  284. migration_colo_enabled = false;
  285. }
  286. void migration_incoming_enable_colo(void)
  287. {
  288. migration_colo_enabled = true;
  289. }
  290. void migrate_add_address(SocketAddress *address)
  291. {
  292. MigrationIncomingState *mis = migration_incoming_get_current();
  293. SocketAddressList *addrs;
  294. addrs = g_new0(SocketAddressList, 1);
  295. addrs->next = mis->socket_address_list;
  296. mis->socket_address_list = addrs;
  297. addrs->value = QAPI_CLONE(SocketAddress, address);
  298. }
  299. void qemu_start_incoming_migration(const char *uri, Error **errp)
  300. {
  301. const char *p;
  302. qapi_event_send_migration(MIGRATION_STATUS_SETUP);
  303. if (!strcmp(uri, "defer")) {
  304. deferred_incoming_migration(errp);
  305. } else if (strstart(uri, "tcp:", &p)) {
  306. tcp_start_incoming_migration(p, errp);
  307. #ifdef CONFIG_RDMA
  308. } else if (strstart(uri, "rdma:", &p)) {
  309. rdma_start_incoming_migration(p, errp);
  310. #endif
  311. } else if (strstart(uri, "exec:", &p)) {
  312. exec_start_incoming_migration(p, errp);
  313. } else if (strstart(uri, "unix:", &p)) {
  314. unix_start_incoming_migration(p, errp);
  315. } else if (strstart(uri, "fd:", &p)) {
  316. fd_start_incoming_migration(p, errp);
  317. } else {
  318. error_setg(errp, "unknown migration protocol: %s", uri);
  319. }
  320. }
  321. static void process_incoming_migration_bh(void *opaque)
  322. {
  323. Error *local_err = NULL;
  324. MigrationIncomingState *mis = opaque;
  325. /* If capability late_block_activate is set:
  326. * Only fire up the block code now if we're going to restart the
  327. * VM, else 'cont' will do it.
  328. * This causes file locking to happen; so we don't want it to happen
  329. * unless we really are starting the VM.
  330. */
  331. if (!migrate_late_block_activate() ||
  332. (autostart && (!global_state_received() ||
  333. global_state_get_runstate() == RUN_STATE_RUNNING))) {
  334. /* Make sure all file formats flush their mutable metadata.
  335. * If we get an error here, just don't restart the VM yet. */
  336. bdrv_invalidate_cache_all(&local_err);
  337. if (local_err) {
  338. error_report_err(local_err);
  339. local_err = NULL;
  340. autostart = false;
  341. }
  342. }
  343. /*
  344. * This must happen after all error conditions are dealt with and
  345. * we're sure the VM is going to be running on this host.
  346. */
  347. qemu_announce_self(&mis->announce_timer, migrate_announce_params());
  348. if (multifd_load_cleanup(&local_err) != 0) {
  349. error_report_err(local_err);
  350. autostart = false;
  351. }
  352. /* If global state section was not received or we are in running
  353. state, we need to obey autostart. Any other state is set with
  354. runstate_set. */
  355. dirty_bitmap_mig_before_vm_start();
  356. if (!global_state_received() ||
  357. global_state_get_runstate() == RUN_STATE_RUNNING) {
  358. if (autostart) {
  359. vm_start();
  360. } else {
  361. runstate_set(RUN_STATE_PAUSED);
  362. }
  363. } else if (migration_incoming_colo_enabled()) {
  364. migration_incoming_disable_colo();
  365. vm_start();
  366. } else {
  367. runstate_set(global_state_get_runstate());
  368. }
  369. /*
  370. * This must happen after any state changes since as soon as an external
  371. * observer sees this event they might start to prod at the VM assuming
  372. * it's ready to use.
  373. */
  374. migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
  375. MIGRATION_STATUS_COMPLETED);
  376. qemu_bh_delete(mis->bh);
  377. migration_incoming_state_destroy();
  378. }
  379. static void process_incoming_migration_co(void *opaque)
  380. {
  381. MigrationIncomingState *mis = migration_incoming_get_current();
  382. PostcopyState ps;
  383. int ret;
  384. Error *local_err = NULL;
  385. assert(mis->from_src_file);
  386. mis->migration_incoming_co = qemu_coroutine_self();
  387. mis->largest_page_size = qemu_ram_pagesize_largest();
  388. postcopy_state_set(POSTCOPY_INCOMING_NONE);
  389. migrate_set_state(&mis->state, MIGRATION_STATUS_NONE,
  390. MIGRATION_STATUS_ACTIVE);
  391. ret = qemu_loadvm_state(mis->from_src_file);
  392. ps = postcopy_state_get();
  393. trace_process_incoming_migration_co_end(ret, ps);
  394. if (ps != POSTCOPY_INCOMING_NONE) {
  395. if (ps == POSTCOPY_INCOMING_ADVISE) {
  396. /*
  397. * Where a migration had postcopy enabled (and thus went to advise)
  398. * but managed to complete within the precopy period, we can use
  399. * the normal exit.
  400. */
  401. postcopy_ram_incoming_cleanup(mis);
  402. } else if (ret >= 0) {
  403. /*
  404. * Postcopy was started, cleanup should happen at the end of the
  405. * postcopy thread.
  406. */
  407. trace_process_incoming_migration_co_postcopy_end_main();
  408. return;
  409. }
  410. /* Else if something went wrong then just fall out of the normal exit */
  411. }
  412. /* we get COLO info, and know if we are in COLO mode */
  413. if (!ret && migration_incoming_colo_enabled()) {
  414. /* Make sure all file formats flush their mutable metadata */
  415. bdrv_invalidate_cache_all(&local_err);
  416. if (local_err) {
  417. error_report_err(local_err);
  418. goto fail;
  419. }
  420. if (colo_init_ram_cache() < 0) {
  421. error_report("Init ram cache failed");
  422. goto fail;
  423. }
  424. qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming",
  425. colo_process_incoming_thread, mis, QEMU_THREAD_JOINABLE);
  426. mis->have_colo_incoming_thread = true;
  427. qemu_coroutine_yield();
  428. /* Wait checkpoint incoming thread exit before free resource */
  429. qemu_thread_join(&mis->colo_incoming_thread);
  430. /* We hold the global iothread lock, so it is safe here */
  431. colo_release_ram_cache();
  432. }
  433. if (ret < 0) {
  434. error_report("load of migration failed: %s", strerror(-ret));
  435. goto fail;
  436. }
  437. mis->bh = qemu_bh_new(process_incoming_migration_bh, mis);
  438. qemu_bh_schedule(mis->bh);
  439. mis->migration_incoming_co = NULL;
  440. return;
  441. fail:
  442. local_err = NULL;
  443. migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
  444. MIGRATION_STATUS_FAILED);
  445. qemu_fclose(mis->from_src_file);
  446. if (multifd_load_cleanup(&local_err) != 0) {
  447. error_report_err(local_err);
  448. }
  449. exit(EXIT_FAILURE);
  450. }
  451. static void migration_incoming_setup(QEMUFile *f)
  452. {
  453. MigrationIncomingState *mis = migration_incoming_get_current();
  454. if (multifd_load_setup() != 0) {
  455. /* We haven't been able to create multifd threads
  456. nothing better to do */
  457. exit(EXIT_FAILURE);
  458. }
  459. if (!mis->from_src_file) {
  460. mis->from_src_file = f;
  461. }
  462. qemu_file_set_blocking(f, false);
  463. }
  464. void migration_incoming_process(void)
  465. {
  466. Coroutine *co = qemu_coroutine_create(process_incoming_migration_co, NULL);
  467. qemu_coroutine_enter(co);
  468. }
  469. /* Returns true if recovered from a paused migration, otherwise false */
  470. static bool postcopy_try_recover(QEMUFile *f)
  471. {
  472. MigrationIncomingState *mis = migration_incoming_get_current();
  473. if (mis->state == MIGRATION_STATUS_POSTCOPY_PAUSED) {
  474. /* Resumed from a paused postcopy migration */
  475. mis->from_src_file = f;
  476. /* Postcopy has standalone thread to do vm load */
  477. qemu_file_set_blocking(f, true);
  478. /* Re-configure the return path */
  479. mis->to_src_file = qemu_file_get_return_path(f);
  480. migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_PAUSED,
  481. MIGRATION_STATUS_POSTCOPY_RECOVER);
  482. /*
  483. * Here, we only wake up the main loading thread (while the
  484. * fault thread will still be waiting), so that we can receive
  485. * commands from source now, and answer it if needed. The
  486. * fault thread will be woken up afterwards until we are sure
  487. * that source is ready to reply to page requests.
  488. */
  489. qemu_sem_post(&mis->postcopy_pause_sem_dst);
  490. return true;
  491. }
  492. return false;
  493. }
  494. void migration_fd_process_incoming(QEMUFile *f)
  495. {
  496. if (postcopy_try_recover(f)) {
  497. return;
  498. }
  499. migration_incoming_setup(f);
  500. migration_incoming_process();
  501. }
  502. void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
  503. {
  504. MigrationIncomingState *mis = migration_incoming_get_current();
  505. bool start_migration;
  506. if (!mis->from_src_file) {
  507. /* The first connection (multifd may have multiple) */
  508. QEMUFile *f = qemu_fopen_channel_input(ioc);
  509. /* If it's a recovery, we're done */
  510. if (postcopy_try_recover(f)) {
  511. return;
  512. }
  513. migration_incoming_setup(f);
  514. /*
  515. * Common migration only needs one channel, so we can start
  516. * right now. Multifd needs more than one channel, we wait.
  517. */
  518. start_migration = !migrate_use_multifd();
  519. } else {
  520. Error *local_err = NULL;
  521. /* Multiple connections */
  522. assert(migrate_use_multifd());
  523. start_migration = multifd_recv_new_channel(ioc, &local_err);
  524. if (local_err) {
  525. error_propagate(errp, local_err);
  526. return;
  527. }
  528. }
  529. if (start_migration) {
  530. migration_incoming_process();
  531. }
  532. }
  533. /**
  534. * @migration_has_all_channels: We have received all channels that we need
  535. *
  536. * Returns true when we have got connections to all the channels that
  537. * we need for migration.
  538. */
  539. bool migration_has_all_channels(void)
  540. {
  541. MigrationIncomingState *mis = migration_incoming_get_current();
  542. bool all_channels;
  543. all_channels = multifd_recv_all_channels_created();
  544. return all_channels && mis->from_src_file != NULL;
  545. }
  546. /*
  547. * Send a 'SHUT' message on the return channel with the given value
  548. * to indicate that we've finished with the RP. Non-0 value indicates
  549. * error.
  550. */
  551. void migrate_send_rp_shut(MigrationIncomingState *mis,
  552. uint32_t value)
  553. {
  554. uint32_t buf;
  555. buf = cpu_to_be32(value);
  556. migrate_send_rp_message(mis, MIG_RP_MSG_SHUT, sizeof(buf), &buf);
  557. }
  558. /*
  559. * Send a 'PONG' message on the return channel with the given value
  560. * (normally in response to a 'PING')
  561. */
  562. void migrate_send_rp_pong(MigrationIncomingState *mis,
  563. uint32_t value)
  564. {
  565. uint32_t buf;
  566. buf = cpu_to_be32(value);
  567. migrate_send_rp_message(mis, MIG_RP_MSG_PONG, sizeof(buf), &buf);
  568. }
  569. void migrate_send_rp_recv_bitmap(MigrationIncomingState *mis,
  570. char *block_name)
  571. {
  572. char buf[512];
  573. int len;
  574. int64_t res;
  575. /*
  576. * First, we send the header part. It contains only the len of
  577. * idstr, and the idstr itself.
  578. */
  579. len = strlen(block_name);
  580. buf[0] = len;
  581. memcpy(buf + 1, block_name, len);
  582. if (mis->state != MIGRATION_STATUS_POSTCOPY_RECOVER) {
  583. error_report("%s: MSG_RP_RECV_BITMAP only used for recovery",
  584. __func__);
  585. return;
  586. }
  587. migrate_send_rp_message(mis, MIG_RP_MSG_RECV_BITMAP, len + 1, buf);
  588. /*
  589. * Next, we dump the received bitmap to the stream.
  590. *
  591. * TODO: currently we are safe since we are the only one that is
  592. * using the to_src_file handle (fault thread is still paused),
  593. * and it's ok even not taking the mutex. However the best way is
  594. * to take the lock before sending the message header, and release
  595. * the lock after sending the bitmap.
  596. */
  597. qemu_mutex_lock(&mis->rp_mutex);
  598. res = ramblock_recv_bitmap_send(mis->to_src_file, block_name);
  599. qemu_mutex_unlock(&mis->rp_mutex);
  600. trace_migrate_send_rp_recv_bitmap(block_name, res);
  601. }
  602. void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value)
  603. {
  604. uint32_t buf;
  605. buf = cpu_to_be32(value);
  606. migrate_send_rp_message(mis, MIG_RP_MSG_RESUME_ACK, sizeof(buf), &buf);
  607. }
  608. MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp)
  609. {
  610. MigrationCapabilityStatusList *head = NULL;
  611. MigrationCapabilityStatusList *caps;
  612. MigrationState *s = migrate_get_current();
  613. int i;
  614. caps = NULL; /* silence compiler warning */
  615. for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
  616. #ifndef CONFIG_LIVE_BLOCK_MIGRATION
  617. if (i == MIGRATION_CAPABILITY_BLOCK) {
  618. continue;
  619. }
  620. #endif
  621. if (head == NULL) {
  622. head = g_malloc0(sizeof(*caps));
  623. caps = head;
  624. } else {
  625. caps->next = g_malloc0(sizeof(*caps));
  626. caps = caps->next;
  627. }
  628. caps->value =
  629. g_malloc(sizeof(*caps->value));
  630. caps->value->capability = i;
  631. caps->value->state = s->enabled_capabilities[i];
  632. }
  633. return head;
  634. }
  635. MigrationParameters *qmp_query_migrate_parameters(Error **errp)
  636. {
  637. MigrationParameters *params;
  638. MigrationState *s = migrate_get_current();
  639. /* TODO use QAPI_CLONE() instead of duplicating it inline */
  640. params = g_malloc0(sizeof(*params));
  641. params->has_compress_level = true;
  642. params->compress_level = s->parameters.compress_level;
  643. params->has_compress_threads = true;
  644. params->compress_threads = s->parameters.compress_threads;
  645. params->has_compress_wait_thread = true;
  646. params->compress_wait_thread = s->parameters.compress_wait_thread;
  647. params->has_decompress_threads = true;
  648. params->decompress_threads = s->parameters.decompress_threads;
  649. params->has_cpu_throttle_initial = true;
  650. params->cpu_throttle_initial = s->parameters.cpu_throttle_initial;
  651. params->has_cpu_throttle_increment = true;
  652. params->cpu_throttle_increment = s->parameters.cpu_throttle_increment;
  653. params->has_tls_creds = true;
  654. params->tls_creds = g_strdup(s->parameters.tls_creds);
  655. params->has_tls_hostname = true;
  656. params->tls_hostname = g_strdup(s->parameters.tls_hostname);
  657. params->has_tls_authz = true;
  658. params->tls_authz = g_strdup(s->parameters.tls_authz);
  659. params->has_max_bandwidth = true;
  660. params->max_bandwidth = s->parameters.max_bandwidth;
  661. params->has_downtime_limit = true;
  662. params->downtime_limit = s->parameters.downtime_limit;
  663. params->has_x_checkpoint_delay = true;
  664. params->x_checkpoint_delay = s->parameters.x_checkpoint_delay;
  665. params->has_block_incremental = true;
  666. params->block_incremental = s->parameters.block_incremental;
  667. params->has_multifd_channels = true;
  668. params->multifd_channels = s->parameters.multifd_channels;
  669. params->has_xbzrle_cache_size = true;
  670. params->xbzrle_cache_size = s->parameters.xbzrle_cache_size;
  671. params->has_max_postcopy_bandwidth = true;
  672. params->max_postcopy_bandwidth = s->parameters.max_postcopy_bandwidth;
  673. params->has_max_cpu_throttle = true;
  674. params->max_cpu_throttle = s->parameters.max_cpu_throttle;
  675. params->has_announce_initial = true;
  676. params->announce_initial = s->parameters.announce_initial;
  677. params->has_announce_max = true;
  678. params->announce_max = s->parameters.announce_max;
  679. params->has_announce_rounds = true;
  680. params->announce_rounds = s->parameters.announce_rounds;
  681. params->has_announce_step = true;
  682. params->announce_step = s->parameters.announce_step;
  683. return params;
  684. }
  685. AnnounceParameters *migrate_announce_params(void)
  686. {
  687. static AnnounceParameters ap;
  688. MigrationState *s = migrate_get_current();
  689. ap.initial = s->parameters.announce_initial;
  690. ap.max = s->parameters.announce_max;
  691. ap.rounds = s->parameters.announce_rounds;
  692. ap.step = s->parameters.announce_step;
  693. return &ap;
  694. }
  695. /*
  696. * Return true if we're already in the middle of a migration
  697. * (i.e. any of the active or setup states)
  698. */
  699. bool migration_is_setup_or_active(int state)
  700. {
  701. switch (state) {
  702. case MIGRATION_STATUS_ACTIVE:
  703. case MIGRATION_STATUS_POSTCOPY_ACTIVE:
  704. case MIGRATION_STATUS_POSTCOPY_PAUSED:
  705. case MIGRATION_STATUS_POSTCOPY_RECOVER:
  706. case MIGRATION_STATUS_SETUP:
  707. case MIGRATION_STATUS_PRE_SWITCHOVER:
  708. case MIGRATION_STATUS_DEVICE:
  709. case MIGRATION_STATUS_WAIT_UNPLUG:
  710. return true;
  711. default:
  712. return false;
  713. }
  714. }
  715. static void populate_time_info(MigrationInfo *info, MigrationState *s)
  716. {
  717. info->has_status = true;
  718. info->has_setup_time = true;
  719. info->setup_time = s->setup_time;
  720. if (s->state == MIGRATION_STATUS_COMPLETED) {
  721. info->has_total_time = true;
  722. info->total_time = s->total_time;
  723. info->has_downtime = true;
  724. info->downtime = s->downtime;
  725. } else {
  726. info->has_total_time = true;
  727. info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) -
  728. s->start_time;
  729. info->has_expected_downtime = true;
  730. info->expected_downtime = s->expected_downtime;
  731. }
  732. }
  733. static void populate_ram_info(MigrationInfo *info, MigrationState *s)
  734. {
  735. info->has_ram = true;
  736. info->ram = g_malloc0(sizeof(*info->ram));
  737. info->ram->transferred = ram_counters.transferred;
  738. info->ram->total = ram_bytes_total();
  739. info->ram->duplicate = ram_counters.duplicate;
  740. /* legacy value. It is not used anymore */
  741. info->ram->skipped = 0;
  742. info->ram->normal = ram_counters.normal;
  743. info->ram->normal_bytes = ram_counters.normal *
  744. qemu_target_page_size();
  745. info->ram->mbps = s->mbps;
  746. info->ram->dirty_sync_count = ram_counters.dirty_sync_count;
  747. info->ram->postcopy_requests = ram_counters.postcopy_requests;
  748. info->ram->page_size = qemu_target_page_size();
  749. info->ram->multifd_bytes = ram_counters.multifd_bytes;
  750. info->ram->pages_per_second = s->pages_per_second;
  751. if (migrate_use_xbzrle()) {
  752. info->has_xbzrle_cache = true;
  753. info->xbzrle_cache = g_malloc0(sizeof(*info->xbzrle_cache));
  754. info->xbzrle_cache->cache_size = migrate_xbzrle_cache_size();
  755. info->xbzrle_cache->bytes = xbzrle_counters.bytes;
  756. info->xbzrle_cache->pages = xbzrle_counters.pages;
  757. info->xbzrle_cache->cache_miss = xbzrle_counters.cache_miss;
  758. info->xbzrle_cache->cache_miss_rate = xbzrle_counters.cache_miss_rate;
  759. info->xbzrle_cache->overflow = xbzrle_counters.overflow;
  760. }
  761. if (migrate_use_compression()) {
  762. info->has_compression = true;
  763. info->compression = g_malloc0(sizeof(*info->compression));
  764. info->compression->pages = compression_counters.pages;
  765. info->compression->busy = compression_counters.busy;
  766. info->compression->busy_rate = compression_counters.busy_rate;
  767. info->compression->compressed_size =
  768. compression_counters.compressed_size;
  769. info->compression->compression_rate =
  770. compression_counters.compression_rate;
  771. }
  772. if (cpu_throttle_active()) {
  773. info->has_cpu_throttle_percentage = true;
  774. info->cpu_throttle_percentage = cpu_throttle_get_percentage();
  775. }
  776. if (s->state != MIGRATION_STATUS_COMPLETED) {
  777. info->ram->remaining = ram_bytes_remaining();
  778. info->ram->dirty_pages_rate = ram_counters.dirty_pages_rate;
  779. }
  780. }
  781. static void populate_disk_info(MigrationInfo *info)
  782. {
  783. if (blk_mig_active()) {
  784. info->has_disk = true;
  785. info->disk = g_malloc0(sizeof(*info->disk));
  786. info->disk->transferred = blk_mig_bytes_transferred();
  787. info->disk->remaining = blk_mig_bytes_remaining();
  788. info->disk->total = blk_mig_bytes_total();
  789. }
  790. }
  791. static void fill_source_migration_info(MigrationInfo *info)
  792. {
  793. MigrationState *s = migrate_get_current();
  794. switch (s->state) {
  795. case MIGRATION_STATUS_NONE:
  796. /* no migration has happened ever */
  797. /* do not overwrite destination migration status */
  798. return;
  799. break;
  800. case MIGRATION_STATUS_SETUP:
  801. info->has_status = true;
  802. info->has_total_time = false;
  803. break;
  804. case MIGRATION_STATUS_ACTIVE:
  805. case MIGRATION_STATUS_CANCELLING:
  806. case MIGRATION_STATUS_POSTCOPY_ACTIVE:
  807. case MIGRATION_STATUS_PRE_SWITCHOVER:
  808. case MIGRATION_STATUS_DEVICE:
  809. case MIGRATION_STATUS_POSTCOPY_PAUSED:
  810. case MIGRATION_STATUS_POSTCOPY_RECOVER:
  811. /* TODO add some postcopy stats */
  812. populate_time_info(info, s);
  813. populate_ram_info(info, s);
  814. populate_disk_info(info);
  815. break;
  816. case MIGRATION_STATUS_COLO:
  817. info->has_status = true;
  818. /* TODO: display COLO specific information (checkpoint info etc.) */
  819. break;
  820. case MIGRATION_STATUS_COMPLETED:
  821. populate_time_info(info, s);
  822. populate_ram_info(info, s);
  823. break;
  824. case MIGRATION_STATUS_FAILED:
  825. info->has_status = true;
  826. if (s->error) {
  827. info->has_error_desc = true;
  828. info->error_desc = g_strdup(error_get_pretty(s->error));
  829. }
  830. break;
  831. case MIGRATION_STATUS_CANCELLED:
  832. info->has_status = true;
  833. break;
  834. case MIGRATION_STATUS_WAIT_UNPLUG:
  835. info->has_status = true;
  836. break;
  837. }
  838. info->status = s->state;
  839. }
  840. /**
  841. * @migration_caps_check - check capability validity
  842. *
  843. * @cap_list: old capability list, array of bool
  844. * @params: new capabilities to be applied soon
  845. * @errp: set *errp if the check failed, with reason
  846. *
  847. * Returns true if check passed, otherwise false.
  848. */
  849. static bool migrate_caps_check(bool *cap_list,
  850. MigrationCapabilityStatusList *params,
  851. Error **errp)
  852. {
  853. MigrationCapabilityStatusList *cap;
  854. bool old_postcopy_cap;
  855. MigrationIncomingState *mis = migration_incoming_get_current();
  856. old_postcopy_cap = cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM];
  857. for (cap = params; cap; cap = cap->next) {
  858. cap_list[cap->value->capability] = cap->value->state;
  859. }
  860. #ifndef CONFIG_LIVE_BLOCK_MIGRATION
  861. if (cap_list[MIGRATION_CAPABILITY_BLOCK]) {
  862. error_setg(errp, "QEMU compiled without old-style (blk/-b, inc/-i) "
  863. "block migration");
  864. error_append_hint(errp, "Use drive_mirror+NBD instead.\n");
  865. return false;
  866. }
  867. #endif
  868. #ifndef CONFIG_REPLICATION
  869. if (cap_list[MIGRATION_CAPABILITY_X_COLO]) {
  870. error_setg(errp, "QEMU compiled without replication module"
  871. " can't enable COLO");
  872. error_append_hint(errp, "Please enable replication before COLO.\n");
  873. return false;
  874. }
  875. #endif
  876. if (cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]) {
  877. if (cap_list[MIGRATION_CAPABILITY_COMPRESS]) {
  878. /* The decompression threads asynchronously write into RAM
  879. * rather than use the atomic copies needed to avoid
  880. * userfaulting. It should be possible to fix the decompression
  881. * threads for compatibility in future.
  882. */
  883. error_setg(errp, "Postcopy is not currently compatible "
  884. "with compression");
  885. return false;
  886. }
  887. /* This check is reasonably expensive, so only when it's being
  888. * set the first time, also it's only the destination that needs
  889. * special support.
  890. */
  891. if (!old_postcopy_cap && runstate_check(RUN_STATE_INMIGRATE) &&
  892. !postcopy_ram_supported_by_host(mis)) {
  893. /* postcopy_ram_supported_by_host will have emitted a more
  894. * detailed message
  895. */
  896. error_setg(errp, "Postcopy is not supported");
  897. return false;
  898. }
  899. if (cap_list[MIGRATION_CAPABILITY_X_IGNORE_SHARED]) {
  900. error_setg(errp, "Postcopy is not compatible with ignore-shared");
  901. return false;
  902. }
  903. }
  904. return true;
  905. }
  906. static void fill_destination_migration_info(MigrationInfo *info)
  907. {
  908. MigrationIncomingState *mis = migration_incoming_get_current();
  909. if (mis->socket_address_list) {
  910. info->has_socket_address = true;
  911. info->socket_address =
  912. QAPI_CLONE(SocketAddressList, mis->socket_address_list);
  913. }
  914. switch (mis->state) {
  915. case MIGRATION_STATUS_NONE:
  916. return;
  917. break;
  918. case MIGRATION_STATUS_SETUP:
  919. case MIGRATION_STATUS_CANCELLING:
  920. case MIGRATION_STATUS_CANCELLED:
  921. case MIGRATION_STATUS_ACTIVE:
  922. case MIGRATION_STATUS_POSTCOPY_ACTIVE:
  923. case MIGRATION_STATUS_POSTCOPY_PAUSED:
  924. case MIGRATION_STATUS_POSTCOPY_RECOVER:
  925. case MIGRATION_STATUS_FAILED:
  926. case MIGRATION_STATUS_COLO:
  927. info->has_status = true;
  928. break;
  929. case MIGRATION_STATUS_COMPLETED:
  930. info->has_status = true;
  931. fill_destination_postcopy_migration_info(info);
  932. break;
  933. }
  934. info->status = mis->state;
  935. }
  936. MigrationInfo *qmp_query_migrate(Error **errp)
  937. {
  938. MigrationInfo *info = g_malloc0(sizeof(*info));
  939. fill_destination_migration_info(info);
  940. fill_source_migration_info(info);
  941. return info;
  942. }
  943. void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
  944. Error **errp)
  945. {
  946. MigrationState *s = migrate_get_current();
  947. MigrationCapabilityStatusList *cap;
  948. bool cap_list[MIGRATION_CAPABILITY__MAX];
  949. if (migration_is_setup_or_active(s->state)) {
  950. error_setg(errp, QERR_MIGRATION_ACTIVE);
  951. return;
  952. }
  953. memcpy(cap_list, s->enabled_capabilities, sizeof(cap_list));
  954. if (!migrate_caps_check(cap_list, params, errp)) {
  955. return;
  956. }
  957. for (cap = params; cap; cap = cap->next) {
  958. s->enabled_capabilities[cap->value->capability] = cap->value->state;
  959. }
  960. }
  961. /*
  962. * Check whether the parameters are valid. Error will be put into errp
  963. * (if provided). Return true if valid, otherwise false.
  964. */
  965. static bool migrate_params_check(MigrationParameters *params, Error **errp)
  966. {
  967. if (params->has_compress_level &&
  968. (params->compress_level > 9)) {
  969. error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level",
  970. "is invalid, it should be in the range of 0 to 9");
  971. return false;
  972. }
  973. if (params->has_compress_threads && (params->compress_threads < 1)) {
  974. error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
  975. "compress_threads",
  976. "is invalid, it should be in the range of 1 to 255");
  977. return false;
  978. }
  979. if (params->has_decompress_threads && (params->decompress_threads < 1)) {
  980. error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
  981. "decompress_threads",
  982. "is invalid, it should be in the range of 1 to 255");
  983. return false;
  984. }
  985. if (params->has_cpu_throttle_initial &&
  986. (params->cpu_throttle_initial < 1 ||
  987. params->cpu_throttle_initial > 99)) {
  988. error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
  989. "cpu_throttle_initial",
  990. "an integer in the range of 1 to 99");
  991. return false;
  992. }
  993. if (params->has_cpu_throttle_increment &&
  994. (params->cpu_throttle_increment < 1 ||
  995. params->cpu_throttle_increment > 99)) {
  996. error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
  997. "cpu_throttle_increment",
  998. "an integer in the range of 1 to 99");
  999. return false;
  1000. }
  1001. if (params->has_max_bandwidth && (params->max_bandwidth > SIZE_MAX)) {
  1002. error_setg(errp, "Parameter 'max_bandwidth' expects an integer in the"
  1003. " range of 0 to %zu bytes/second", SIZE_MAX);
  1004. return false;
  1005. }
  1006. if (params->has_downtime_limit &&
  1007. (params->downtime_limit > MAX_MIGRATE_DOWNTIME)) {
  1008. error_setg(errp, "Parameter 'downtime_limit' expects an integer in "
  1009. "the range of 0 to %d milliseconds",
  1010. MAX_MIGRATE_DOWNTIME);
  1011. return false;
  1012. }
  1013. /* x_checkpoint_delay is now always positive */
  1014. if (params->has_multifd_channels && (params->multifd_channels < 1)) {
  1015. error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
  1016. "multifd_channels",
  1017. "is invalid, it should be in the range of 1 to 255");
  1018. return false;
  1019. }
  1020. if (params->has_xbzrle_cache_size &&
  1021. (params->xbzrle_cache_size < qemu_target_page_size() ||
  1022. !is_power_of_2(params->xbzrle_cache_size))) {
  1023. error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
  1024. "xbzrle_cache_size",
  1025. "is invalid, it should be bigger than target page size"
  1026. " and a power of two");
  1027. return false;
  1028. }
  1029. if (params->has_max_cpu_throttle &&
  1030. (params->max_cpu_throttle < params->cpu_throttle_initial ||
  1031. params->max_cpu_throttle > 99)) {
  1032. error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
  1033. "max_cpu_throttle",
  1034. "an integer in the range of cpu_throttle_initial to 99");
  1035. return false;
  1036. }
  1037. if (params->has_announce_initial &&
  1038. params->announce_initial > 100000) {
  1039. error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
  1040. "announce_initial",
  1041. "is invalid, it must be less than 100000 ms");
  1042. return false;
  1043. }
  1044. if (params->has_announce_max &&
  1045. params->announce_max > 100000) {
  1046. error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
  1047. "announce_max",
  1048. "is invalid, it must be less than 100000 ms");
  1049. return false;
  1050. }
  1051. if (params->has_announce_rounds &&
  1052. params->announce_rounds > 1000) {
  1053. error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
  1054. "announce_rounds",
  1055. "is invalid, it must be in the range of 0 to 1000");
  1056. return false;
  1057. }
  1058. if (params->has_announce_step &&
  1059. (params->announce_step < 1 ||
  1060. params->announce_step > 10000)) {
  1061. error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
  1062. "announce_step",
  1063. "is invalid, it must be in the range of 1 to 10000 ms");
  1064. return false;
  1065. }
  1066. return true;
  1067. }
  1068. static void migrate_params_test_apply(MigrateSetParameters *params,
  1069. MigrationParameters *dest)
  1070. {
  1071. *dest = migrate_get_current()->parameters;
  1072. /* TODO use QAPI_CLONE() instead of duplicating it inline */
  1073. if (params->has_compress_level) {
  1074. dest->compress_level = params->compress_level;
  1075. }
  1076. if (params->has_compress_threads) {
  1077. dest->compress_threads = params->compress_threads;
  1078. }
  1079. if (params->has_compress_wait_thread) {
  1080. dest->compress_wait_thread = params->compress_wait_thread;
  1081. }
  1082. if (params->has_decompress_threads) {
  1083. dest->decompress_threads = params->decompress_threads;
  1084. }
  1085. if (params->has_cpu_throttle_initial) {
  1086. dest->cpu_throttle_initial = params->cpu_throttle_initial;
  1087. }
  1088. if (params->has_cpu_throttle_increment) {
  1089. dest->cpu_throttle_increment = params->cpu_throttle_increment;
  1090. }
  1091. if (params->has_tls_creds) {
  1092. assert(params->tls_creds->type == QTYPE_QSTRING);
  1093. dest->tls_creds = g_strdup(params->tls_creds->u.s);
  1094. }
  1095. if (params->has_tls_hostname) {
  1096. assert(params->tls_hostname->type == QTYPE_QSTRING);
  1097. dest->tls_hostname = g_strdup(params->tls_hostname->u.s);
  1098. }
  1099. if (params->has_max_bandwidth) {
  1100. dest->max_bandwidth = params->max_bandwidth;
  1101. }
  1102. if (params->has_downtime_limit) {
  1103. dest->downtime_limit = params->downtime_limit;
  1104. }
  1105. if (params->has_x_checkpoint_delay) {
  1106. dest->x_checkpoint_delay = params->x_checkpoint_delay;
  1107. }
  1108. if (params->has_block_incremental) {
  1109. dest->block_incremental = params->block_incremental;
  1110. }
  1111. if (params->has_multifd_channels) {
  1112. dest->multifd_channels = params->multifd_channels;
  1113. }
  1114. if (params->has_xbzrle_cache_size) {
  1115. dest->xbzrle_cache_size = params->xbzrle_cache_size;
  1116. }
  1117. if (params->has_max_postcopy_bandwidth) {
  1118. dest->max_postcopy_bandwidth = params->max_postcopy_bandwidth;
  1119. }
  1120. if (params->has_max_cpu_throttle) {
  1121. dest->max_cpu_throttle = params->max_cpu_throttle;
  1122. }
  1123. if (params->has_announce_initial) {
  1124. dest->announce_initial = params->announce_initial;
  1125. }
  1126. if (params->has_announce_max) {
  1127. dest->announce_max = params->announce_max;
  1128. }
  1129. if (params->has_announce_rounds) {
  1130. dest->announce_rounds = params->announce_rounds;
  1131. }
  1132. if (params->has_announce_step) {
  1133. dest->announce_step = params->announce_step;
  1134. }
  1135. }
  1136. static void migrate_params_apply(MigrateSetParameters *params, Error **errp)
  1137. {
  1138. MigrationState *s = migrate_get_current();
  1139. /* TODO use QAPI_CLONE() instead of duplicating it inline */
  1140. if (params->has_compress_level) {
  1141. s->parameters.compress_level = params->compress_level;
  1142. }
  1143. if (params->has_compress_threads) {
  1144. s->parameters.compress_threads = params->compress_threads;
  1145. }
  1146. if (params->has_compress_wait_thread) {
  1147. s->parameters.compress_wait_thread = params->compress_wait_thread;
  1148. }
  1149. if (params->has_decompress_threads) {
  1150. s->parameters.decompress_threads = params->decompress_threads;
  1151. }
  1152. if (params->has_cpu_throttle_initial) {
  1153. s->parameters.cpu_throttle_initial = params->cpu_throttle_initial;
  1154. }
  1155. if (params->has_cpu_throttle_increment) {
  1156. s->parameters.cpu_throttle_increment = params->cpu_throttle_increment;
  1157. }
  1158. if (params->has_tls_creds) {
  1159. g_free(s->parameters.tls_creds);
  1160. assert(params->tls_creds->type == QTYPE_QSTRING);
  1161. s->parameters.tls_creds = g_strdup(params->tls_creds->u.s);
  1162. }
  1163. if (params->has_tls_hostname) {
  1164. g_free(s->parameters.tls_hostname);
  1165. assert(params->tls_hostname->type == QTYPE_QSTRING);
  1166. s->parameters.tls_hostname = g_strdup(params->tls_hostname->u.s);
  1167. }
  1168. if (params->has_tls_authz) {
  1169. g_free(s->parameters.tls_authz);
  1170. assert(params->tls_authz->type == QTYPE_QSTRING);
  1171. s->parameters.tls_authz = g_strdup(params->tls_authz->u.s);
  1172. }
  1173. if (params->has_max_bandwidth) {
  1174. s->parameters.max_bandwidth = params->max_bandwidth;
  1175. if (s->to_dst_file && !migration_in_postcopy()) {
  1176. qemu_file_set_rate_limit(s->to_dst_file,
  1177. s->parameters.max_bandwidth / XFER_LIMIT_RATIO);
  1178. }
  1179. }
  1180. if (params->has_downtime_limit) {
  1181. s->parameters.downtime_limit = params->downtime_limit;
  1182. }
  1183. if (params->has_x_checkpoint_delay) {
  1184. s->parameters.x_checkpoint_delay = params->x_checkpoint_delay;
  1185. if (migration_in_colo_state()) {
  1186. colo_checkpoint_notify(s);
  1187. }
  1188. }
  1189. if (params->has_block_incremental) {
  1190. s->parameters.block_incremental = params->block_incremental;
  1191. }
  1192. if (params->has_multifd_channels) {
  1193. s->parameters.multifd_channels = params->multifd_channels;
  1194. }
  1195. if (params->has_xbzrle_cache_size) {
  1196. s->parameters.xbzrle_cache_size = params->xbzrle_cache_size;
  1197. xbzrle_cache_resize(params->xbzrle_cache_size, errp);
  1198. }
  1199. if (params->has_max_postcopy_bandwidth) {
  1200. s->parameters.max_postcopy_bandwidth = params->max_postcopy_bandwidth;
  1201. if (s->to_dst_file && migration_in_postcopy()) {
  1202. qemu_file_set_rate_limit(s->to_dst_file,
  1203. s->parameters.max_postcopy_bandwidth / XFER_LIMIT_RATIO);
  1204. }
  1205. }
  1206. if (params->has_max_cpu_throttle) {
  1207. s->parameters.max_cpu_throttle = params->max_cpu_throttle;
  1208. }
  1209. if (params->has_announce_initial) {
  1210. s->parameters.announce_initial = params->announce_initial;
  1211. }
  1212. if (params->has_announce_max) {
  1213. s->parameters.announce_max = params->announce_max;
  1214. }
  1215. if (params->has_announce_rounds) {
  1216. s->parameters.announce_rounds = params->announce_rounds;
  1217. }
  1218. if (params->has_announce_step) {
  1219. s->parameters.announce_step = params->announce_step;
  1220. }
  1221. }
  1222. void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp)
  1223. {
  1224. MigrationParameters tmp;
  1225. /* TODO Rewrite "" to null instead */
  1226. if (params->has_tls_creds
  1227. && params->tls_creds->type == QTYPE_QNULL) {
  1228. qobject_unref(params->tls_creds->u.n);
  1229. params->tls_creds->type = QTYPE_QSTRING;
  1230. params->tls_creds->u.s = strdup("");
  1231. }
  1232. /* TODO Rewrite "" to null instead */
  1233. if (params->has_tls_hostname
  1234. && params->tls_hostname->type == QTYPE_QNULL) {
  1235. qobject_unref(params->tls_hostname->u.n);
  1236. params->tls_hostname->type = QTYPE_QSTRING;
  1237. params->tls_hostname->u.s = strdup("");
  1238. }
  1239. migrate_params_test_apply(params, &tmp);
  1240. if (!migrate_params_check(&tmp, errp)) {
  1241. /* Invalid parameter */
  1242. return;
  1243. }
  1244. migrate_params_apply(params, errp);
  1245. }
  1246. void qmp_migrate_start_postcopy(Error **errp)
  1247. {
  1248. MigrationState *s = migrate_get_current();
  1249. if (!migrate_postcopy()) {
  1250. error_setg(errp, "Enable postcopy with migrate_set_capability before"
  1251. " the start of migration");
  1252. return;
  1253. }
  1254. if (s->state == MIGRATION_STATUS_NONE) {
  1255. error_setg(errp, "Postcopy must be started after migration has been"
  1256. " started");
  1257. return;
  1258. }
  1259. /*
  1260. * we don't error if migration has finished since that would be racy
  1261. * with issuing this command.
  1262. */
  1263. atomic_set(&s->start_postcopy, true);
  1264. }
  1265. /* shared migration helpers */
  1266. void migrate_set_state(int *state, int old_state, int new_state)
  1267. {
  1268. assert(new_state < MIGRATION_STATUS__MAX);
  1269. if (atomic_cmpxchg(state, old_state, new_state) == old_state) {
  1270. trace_migrate_set_state(MigrationStatus_str(new_state));
  1271. migrate_generate_event(new_state);
  1272. }
  1273. }
  1274. static MigrationCapabilityStatusList *migrate_cap_add(
  1275. MigrationCapabilityStatusList *list,
  1276. MigrationCapability index,
  1277. bool state)
  1278. {
  1279. MigrationCapabilityStatusList *cap;
  1280. cap = g_new0(MigrationCapabilityStatusList, 1);
  1281. cap->value = g_new0(MigrationCapabilityStatus, 1);
  1282. cap->value->capability = index;
  1283. cap->value->state = state;
  1284. cap->next = list;
  1285. return cap;
  1286. }
  1287. void migrate_set_block_enabled(bool value, Error **errp)
  1288. {
  1289. MigrationCapabilityStatusList *cap;
  1290. cap = migrate_cap_add(NULL, MIGRATION_CAPABILITY_BLOCK, value);
  1291. qmp_migrate_set_capabilities(cap, errp);
  1292. qapi_free_MigrationCapabilityStatusList(cap);
  1293. }
  1294. static void migrate_set_block_incremental(MigrationState *s, bool value)
  1295. {
  1296. s->parameters.block_incremental = value;
  1297. }
  1298. static void block_cleanup_parameters(MigrationState *s)
  1299. {
  1300. if (s->must_remove_block_options) {
  1301. /* setting to false can never fail */
  1302. migrate_set_block_enabled(false, &error_abort);
  1303. migrate_set_block_incremental(s, false);
  1304. s->must_remove_block_options = false;
  1305. }
  1306. }
  1307. static void migrate_fd_cleanup(MigrationState *s)
  1308. {
  1309. qemu_bh_delete(s->cleanup_bh);
  1310. s->cleanup_bh = NULL;
  1311. qemu_savevm_state_cleanup();
  1312. if (s->to_dst_file) {
  1313. QEMUFile *tmp;
  1314. trace_migrate_fd_cleanup();
  1315. qemu_mutex_unlock_iothread();
  1316. if (s->migration_thread_running) {
  1317. qemu_thread_join(&s->thread);
  1318. s->migration_thread_running = false;
  1319. }
  1320. qemu_mutex_lock_iothread();
  1321. multifd_save_cleanup();
  1322. qemu_mutex_lock(&s->qemu_file_lock);
  1323. tmp = s->to_dst_file;
  1324. s->to_dst_file = NULL;
  1325. qemu_mutex_unlock(&s->qemu_file_lock);
  1326. /*
  1327. * Close the file handle without the lock to make sure the
  1328. * critical section won't block for long.
  1329. */
  1330. qemu_fclose(tmp);
  1331. }
  1332. assert(!migration_is_active(s));
  1333. if (s->state == MIGRATION_STATUS_CANCELLING) {
  1334. migrate_set_state(&s->state, MIGRATION_STATUS_CANCELLING,
  1335. MIGRATION_STATUS_CANCELLED);
  1336. }
  1337. if (s->error) {
  1338. /* It is used on info migrate. We can't free it */
  1339. error_report_err(error_copy(s->error));
  1340. }
  1341. notifier_list_notify(&migration_state_notifiers, s);
  1342. block_cleanup_parameters(s);
  1343. }
  1344. static void migrate_fd_cleanup_schedule(MigrationState *s)
  1345. {
  1346. /*
  1347. * Ref the state for bh, because it may be called when
  1348. * there're already no other refs
  1349. */
  1350. object_ref(OBJECT(s));
  1351. qemu_bh_schedule(s->cleanup_bh);
  1352. }
  1353. static void migrate_fd_cleanup_bh(void *opaque)
  1354. {
  1355. MigrationState *s = opaque;
  1356. migrate_fd_cleanup(s);
  1357. object_unref(OBJECT(s));
  1358. }
  1359. void migrate_set_error(MigrationState *s, const Error *error)
  1360. {
  1361. qemu_mutex_lock(&s->error_mutex);
  1362. if (!s->error) {
  1363. s->error = error_copy(error);
  1364. }
  1365. qemu_mutex_unlock(&s->error_mutex);
  1366. }
  1367. void migrate_fd_error(MigrationState *s, const Error *error)
  1368. {
  1369. trace_migrate_fd_error(error_get_pretty(error));
  1370. assert(s->to_dst_file == NULL);
  1371. migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
  1372. MIGRATION_STATUS_FAILED);
  1373. migrate_set_error(s, error);
  1374. }
  1375. static void migrate_fd_cancel(MigrationState *s)
  1376. {
  1377. int old_state ;
  1378. QEMUFile *f = migrate_get_current()->to_dst_file;
  1379. trace_migrate_fd_cancel();
  1380. if (s->rp_state.from_dst_file) {
  1381. /* shutdown the rp socket, so causing the rp thread to shutdown */
  1382. qemu_file_shutdown(s->rp_state.from_dst_file);
  1383. }
  1384. do {
  1385. old_state = s->state;
  1386. if (!migration_is_setup_or_active(old_state)) {
  1387. break;
  1388. }
  1389. /* If the migration is paused, kick it out of the pause */
  1390. if (old_state == MIGRATION_STATUS_PRE_SWITCHOVER) {
  1391. qemu_sem_post(&s->pause_sem);
  1392. }
  1393. migrate_set_state(&s->state, old_state, MIGRATION_STATUS_CANCELLING);
  1394. } while (s->state != MIGRATION_STATUS_CANCELLING);
  1395. /*
  1396. * If we're unlucky the migration code might be stuck somewhere in a
  1397. * send/write while the network has failed and is waiting to timeout;
  1398. * if we've got shutdown(2) available then we can force it to quit.
  1399. * The outgoing qemu file gets closed in migrate_fd_cleanup that is
  1400. * called in a bh, so there is no race against this cancel.
  1401. */
  1402. if (s->state == MIGRATION_STATUS_CANCELLING && f) {
  1403. qemu_file_shutdown(f);
  1404. }
  1405. if (s->state == MIGRATION_STATUS_CANCELLING && s->block_inactive) {
  1406. Error *local_err = NULL;
  1407. bdrv_invalidate_cache_all(&local_err);
  1408. if (local_err) {
  1409. error_report_err(local_err);
  1410. } else {
  1411. s->block_inactive = false;
  1412. }
  1413. }
  1414. }
  1415. void add_migration_state_change_notifier(Notifier *notify)
  1416. {
  1417. notifier_list_add(&migration_state_notifiers, notify);
  1418. }
  1419. void remove_migration_state_change_notifier(Notifier *notify)
  1420. {
  1421. notifier_remove(notify);
  1422. }
  1423. bool migration_in_setup(MigrationState *s)
  1424. {
  1425. return s->state == MIGRATION_STATUS_SETUP;
  1426. }
  1427. bool migration_has_finished(MigrationState *s)
  1428. {
  1429. return s->state == MIGRATION_STATUS_COMPLETED;
  1430. }
  1431. bool migration_has_failed(MigrationState *s)
  1432. {
  1433. return (s->state == MIGRATION_STATUS_CANCELLED ||
  1434. s->state == MIGRATION_STATUS_FAILED);
  1435. }
  1436. bool migration_in_postcopy(void)
  1437. {
  1438. MigrationState *s = migrate_get_current();
  1439. switch (s->state) {
  1440. case MIGRATION_STATUS_POSTCOPY_ACTIVE:
  1441. case MIGRATION_STATUS_POSTCOPY_PAUSED:
  1442. case MIGRATION_STATUS_POSTCOPY_RECOVER:
  1443. return true;
  1444. default:
  1445. return false;
  1446. }
  1447. }
  1448. bool migration_in_postcopy_after_devices(MigrationState *s)
  1449. {
  1450. return migration_in_postcopy() && s->postcopy_after_devices;
  1451. }
  1452. bool migration_is_idle(void)
  1453. {
  1454. MigrationState *s = current_migration;
  1455. if (!s) {
  1456. return true;
  1457. }
  1458. switch (s->state) {
  1459. case MIGRATION_STATUS_NONE:
  1460. case MIGRATION_STATUS_CANCELLED:
  1461. case MIGRATION_STATUS_COMPLETED:
  1462. case MIGRATION_STATUS_FAILED:
  1463. return true;
  1464. case MIGRATION_STATUS_SETUP:
  1465. case MIGRATION_STATUS_CANCELLING:
  1466. case MIGRATION_STATUS_ACTIVE:
  1467. case MIGRATION_STATUS_POSTCOPY_ACTIVE:
  1468. case MIGRATION_STATUS_COLO:
  1469. case MIGRATION_STATUS_PRE_SWITCHOVER:
  1470. case MIGRATION_STATUS_DEVICE:
  1471. case MIGRATION_STATUS_WAIT_UNPLUG:
  1472. return false;
  1473. case MIGRATION_STATUS__MAX:
  1474. g_assert_not_reached();
  1475. }
  1476. return false;
  1477. }
  1478. bool migration_is_active(MigrationState *s)
  1479. {
  1480. return (s->state == MIGRATION_STATUS_ACTIVE ||
  1481. s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
  1482. }
  1483. void migrate_init(MigrationState *s)
  1484. {
  1485. /*
  1486. * Reinitialise all migration state, except
  1487. * parameters/capabilities that the user set, and
  1488. * locks.
  1489. */
  1490. s->cleanup_bh = 0;
  1491. s->to_dst_file = NULL;
  1492. s->state = MIGRATION_STATUS_NONE;
  1493. s->rp_state.from_dst_file = NULL;
  1494. s->rp_state.error = false;
  1495. s->mbps = 0.0;
  1496. s->pages_per_second = 0.0;
  1497. s->downtime = 0;
  1498. s->expected_downtime = 0;
  1499. s->setup_time = 0;
  1500. s->start_postcopy = false;
  1501. s->postcopy_after_devices = false;
  1502. s->migration_thread_running = false;
  1503. error_free(s->error);
  1504. s->error = NULL;
  1505. migrate_set_state(&s->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP);
  1506. s->start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
  1507. s->total_time = 0;
  1508. s->vm_was_running = false;
  1509. s->iteration_initial_bytes = 0;
  1510. s->threshold_size = 0;
  1511. }
  1512. static GSList *migration_blockers;
  1513. int migrate_add_blocker(Error *reason, Error **errp)
  1514. {
  1515. if (only_migratable) {
  1516. error_propagate_prepend(errp, error_copy(reason),
  1517. "disallowing migration blocker "
  1518. "(--only-migratable) for: ");
  1519. return -EACCES;
  1520. }
  1521. if (migration_is_idle()) {
  1522. migration_blockers = g_slist_prepend(migration_blockers, reason);
  1523. return 0;
  1524. }
  1525. error_propagate_prepend(errp, error_copy(reason),
  1526. "disallowing migration blocker "
  1527. "(migration in progress) for: ");
  1528. return -EBUSY;
  1529. }
  1530. void migrate_del_blocker(Error *reason)
  1531. {
  1532. migration_blockers = g_slist_remove(migration_blockers, reason);
  1533. }
  1534. void qmp_migrate_incoming(const char *uri, Error **errp)
  1535. {
  1536. Error *local_err = NULL;
  1537. static bool once = true;
  1538. if (!deferred_incoming) {
  1539. error_setg(errp, "For use with '-incoming defer'");
  1540. return;
  1541. }
  1542. if (!once) {
  1543. error_setg(errp, "The incoming migration has already been started");
  1544. }
  1545. qemu_start_incoming_migration(uri, &local_err);
  1546. if (local_err) {
  1547. error_propagate(errp, local_err);
  1548. return;
  1549. }
  1550. once = false;
  1551. }
  1552. void qmp_migrate_recover(const char *uri, Error **errp)
  1553. {
  1554. MigrationIncomingState *mis = migration_incoming_get_current();
  1555. if (mis->state != MIGRATION_STATUS_POSTCOPY_PAUSED) {
  1556. error_setg(errp, "Migrate recover can only be run "
  1557. "when postcopy is paused.");
  1558. return;
  1559. }
  1560. if (atomic_cmpxchg(&mis->postcopy_recover_triggered,
  1561. false, true) == true) {
  1562. error_setg(errp, "Migrate recovery is triggered already");
  1563. return;
  1564. }
  1565. /*
  1566. * Note that this call will never start a real migration; it will
  1567. * only re-setup the migration stream and poke existing migration
  1568. * to continue using that newly established channel.
  1569. */
  1570. qemu_start_incoming_migration(uri, errp);
  1571. }
  1572. void qmp_migrate_pause(Error **errp)
  1573. {
  1574. MigrationState *ms = migrate_get_current();
  1575. MigrationIncomingState *mis = migration_incoming_get_current();
  1576. int ret;
  1577. if (ms->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
  1578. /* Source side, during postcopy */
  1579. qemu_mutex_lock(&ms->qemu_file_lock);
  1580. ret = qemu_file_shutdown(ms->to_dst_file);
  1581. qemu_mutex_unlock(&ms->qemu_file_lock);
  1582. if (ret) {
  1583. error_setg(errp, "Failed to pause source migration");
  1584. }
  1585. return;
  1586. }
  1587. if (mis->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
  1588. ret = qemu_file_shutdown(mis->from_src_file);
  1589. if (ret) {
  1590. error_setg(errp, "Failed to pause destination migration");
  1591. }
  1592. return;
  1593. }
  1594. error_setg(errp, "migrate-pause is currently only supported "
  1595. "during postcopy-active state");
  1596. }
  1597. bool migration_is_blocked(Error **errp)
  1598. {
  1599. if (qemu_savevm_state_blocked(errp)) {
  1600. return true;
  1601. }
  1602. if (migration_blockers) {
  1603. error_propagate(errp, error_copy(migration_blockers->data));
  1604. return true;
  1605. }
  1606. return false;
  1607. }
  1608. /* Returns true if continue to migrate, or false if error detected */
  1609. static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc,
  1610. bool resume, Error **errp)
  1611. {
  1612. Error *local_err = NULL;
  1613. if (resume) {
  1614. if (s->state != MIGRATION_STATUS_POSTCOPY_PAUSED) {
  1615. error_setg(errp, "Cannot resume if there is no "
  1616. "paused migration");
  1617. return false;
  1618. }
  1619. /*
  1620. * Postcopy recovery won't work well with release-ram
  1621. * capability since release-ram will drop the page buffer as
  1622. * long as the page is put into the send buffer. So if there
  1623. * is a network failure happened, any page buffers that have
  1624. * not yet reached the destination VM but have already been
  1625. * sent from the source VM will be lost forever. Let's refuse
  1626. * the client from resuming such a postcopy migration.
  1627. * Luckily release-ram was designed to only be used when src
  1628. * and destination VMs are on the same host, so it should be
  1629. * fine.
  1630. */
  1631. if (migrate_release_ram()) {
  1632. error_setg(errp, "Postcopy recovery cannot work "
  1633. "when release-ram capability is set");
  1634. return false;
  1635. }
  1636. /* This is a resume, skip init status */
  1637. return true;
  1638. }
  1639. if (migration_is_setup_or_active(s->state) ||
  1640. s->state == MIGRATION_STATUS_CANCELLING ||
  1641. s->state == MIGRATION_STATUS_COLO) {
  1642. error_setg(errp, QERR_MIGRATION_ACTIVE);
  1643. return false;
  1644. }
  1645. if (runstate_check(RUN_STATE_INMIGRATE)) {
  1646. error_setg(errp, "Guest is waiting for an incoming migration");
  1647. return false;
  1648. }
  1649. if (migration_is_blocked(errp)) {
  1650. return false;
  1651. }
  1652. if (blk || blk_inc) {
  1653. if (migrate_use_block() || migrate_use_block_incremental()) {
  1654. error_setg(errp, "Command options are incompatible with "
  1655. "current migration capabilities");
  1656. return false;
  1657. }
  1658. migrate_set_block_enabled(true, &local_err);
  1659. if (local_err) {
  1660. error_propagate(errp, local_err);
  1661. return false;
  1662. }
  1663. s->must_remove_block_options = true;
  1664. }
  1665. if (blk_inc) {
  1666. migrate_set_block_incremental(s, true);
  1667. }
  1668. migrate_init(s);
  1669. /*
  1670. * set ram_counters memory to zero for a
  1671. * new migration
  1672. */
  1673. memset(&ram_counters, 0, sizeof(ram_counters));
  1674. return true;
  1675. }
  1676. void qmp_migrate(const char *uri, bool has_blk, bool blk,
  1677. bool has_inc, bool inc, bool has_detach, bool detach,
  1678. bool has_resume, bool resume, Error **errp)
  1679. {
  1680. Error *local_err = NULL;
  1681. MigrationState *s = migrate_get_current();
  1682. const char *p;
  1683. if (!migrate_prepare(s, has_blk && blk, has_inc && inc,
  1684. has_resume && resume, errp)) {
  1685. /* Error detected, put into errp */
  1686. return;
  1687. }
  1688. if (strstart(uri, "tcp:", &p)) {
  1689. tcp_start_outgoing_migration(s, p, &local_err);
  1690. #ifdef CONFIG_RDMA
  1691. } else if (strstart(uri, "rdma:", &p)) {
  1692. rdma_start_outgoing_migration(s, p, &local_err);
  1693. #endif
  1694. } else if (strstart(uri, "exec:", &p)) {
  1695. exec_start_outgoing_migration(s, p, &local_err);
  1696. } else if (strstart(uri, "unix:", &p)) {
  1697. unix_start_outgoing_migration(s, p, &local_err);
  1698. } else if (strstart(uri, "fd:", &p)) {
  1699. fd_start_outgoing_migration(s, p, &local_err);
  1700. } else {
  1701. error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "uri",
  1702. "a valid migration protocol");
  1703. migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
  1704. MIGRATION_STATUS_FAILED);
  1705. block_cleanup_parameters(s);
  1706. return;
  1707. }
  1708. if (local_err) {
  1709. migrate_fd_error(s, local_err);
  1710. error_propagate(errp, local_err);
  1711. return;
  1712. }
  1713. }
  1714. void qmp_migrate_cancel(Error **errp)
  1715. {
  1716. migrate_fd_cancel(migrate_get_current());
  1717. }
  1718. void qmp_migrate_continue(MigrationStatus state, Error **errp)
  1719. {
  1720. MigrationState *s = migrate_get_current();
  1721. if (s->state != state) {
  1722. error_setg(errp, "Migration not in expected state: %s",
  1723. MigrationStatus_str(s->state));
  1724. return;
  1725. }
  1726. qemu_sem_post(&s->pause_sem);
  1727. }
  1728. void qmp_migrate_set_cache_size(int64_t value, Error **errp)
  1729. {
  1730. MigrateSetParameters p = {
  1731. .has_xbzrle_cache_size = true,
  1732. .xbzrle_cache_size = value,
  1733. };
  1734. qmp_migrate_set_parameters(&p, errp);
  1735. }
  1736. int64_t qmp_query_migrate_cache_size(Error **errp)
  1737. {
  1738. return migrate_xbzrle_cache_size();
  1739. }
  1740. void qmp_migrate_set_speed(int64_t value, Error **errp)
  1741. {
  1742. MigrateSetParameters p = {
  1743. .has_max_bandwidth = true,
  1744. .max_bandwidth = value,
  1745. };
  1746. qmp_migrate_set_parameters(&p, errp);
  1747. }
  1748. void qmp_migrate_set_downtime(double value, Error **errp)
  1749. {
  1750. if (value < 0 || value > MAX_MIGRATE_DOWNTIME_SECONDS) {
  1751. error_setg(errp, "Parameter 'downtime_limit' expects an integer in "
  1752. "the range of 0 to %d seconds",
  1753. MAX_MIGRATE_DOWNTIME_SECONDS);
  1754. return;
  1755. }
  1756. value *= 1000; /* Convert to milliseconds */
  1757. value = MAX(0, MIN(INT64_MAX, value));
  1758. MigrateSetParameters p = {
  1759. .has_downtime_limit = true,
  1760. .downtime_limit = value,
  1761. };
  1762. qmp_migrate_set_parameters(&p, errp);
  1763. }
  1764. bool migrate_release_ram(void)
  1765. {
  1766. MigrationState *s;
  1767. s = migrate_get_current();
  1768. return s->enabled_capabilities[MIGRATION_CAPABILITY_RELEASE_RAM];
  1769. }
  1770. bool migrate_postcopy_ram(void)
  1771. {
  1772. MigrationState *s;
  1773. s = migrate_get_current();
  1774. return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM];
  1775. }
  1776. bool migrate_postcopy(void)
  1777. {
  1778. return migrate_postcopy_ram() || migrate_dirty_bitmaps();
  1779. }
  1780. bool migrate_auto_converge(void)
  1781. {
  1782. MigrationState *s;
  1783. s = migrate_get_current();
  1784. return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE];
  1785. }
  1786. bool migrate_zero_blocks(void)
  1787. {
  1788. MigrationState *s;
  1789. s = migrate_get_current();
  1790. return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS];
  1791. }
  1792. bool migrate_postcopy_blocktime(void)
  1793. {
  1794. MigrationState *s;
  1795. s = migrate_get_current();
  1796. return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME];
  1797. }
  1798. bool migrate_use_compression(void)
  1799. {
  1800. MigrationState *s;
  1801. s = migrate_get_current();
  1802. return s->enabled_capabilities[MIGRATION_CAPABILITY_COMPRESS];
  1803. }
  1804. int migrate_compress_level(void)
  1805. {
  1806. MigrationState *s;
  1807. s = migrate_get_current();
  1808. return s->parameters.compress_level;
  1809. }
  1810. int migrate_compress_threads(void)
  1811. {
  1812. MigrationState *s;
  1813. s = migrate_get_current();
  1814. return s->parameters.compress_threads;
  1815. }
  1816. int migrate_compress_wait_thread(void)
  1817. {
  1818. MigrationState *s;
  1819. s = migrate_get_current();
  1820. return s->parameters.compress_wait_thread;
  1821. }
  1822. int migrate_decompress_threads(void)
  1823. {
  1824. MigrationState *s;
  1825. s = migrate_get_current();
  1826. return s->parameters.decompress_threads;
  1827. }
  1828. bool migrate_dirty_bitmaps(void)
  1829. {
  1830. MigrationState *s;
  1831. s = migrate_get_current();
  1832. return s->enabled_capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS];
  1833. }
  1834. bool migrate_ignore_shared(void)
  1835. {
  1836. MigrationState *s;
  1837. s = migrate_get_current();
  1838. return s->enabled_capabilities[MIGRATION_CAPABILITY_X_IGNORE_SHARED];
  1839. }
  1840. bool migrate_validate_uuid(void)
  1841. {
  1842. MigrationState *s;
  1843. s = migrate_get_current();
  1844. return s->enabled_capabilities[MIGRATION_CAPABILITY_VALIDATE_UUID];
  1845. }
  1846. bool migrate_use_events(void)
  1847. {
  1848. MigrationState *s;
  1849. s = migrate_get_current();
  1850. return s->enabled_capabilities[MIGRATION_CAPABILITY_EVENTS];
  1851. }
  1852. bool migrate_use_multifd(void)
  1853. {
  1854. MigrationState *s;
  1855. s = migrate_get_current();
  1856. return s->enabled_capabilities[MIGRATION_CAPABILITY_MULTIFD];
  1857. }
  1858. bool migrate_pause_before_switchover(void)
  1859. {
  1860. MigrationState *s;
  1861. s = migrate_get_current();
  1862. return s->enabled_capabilities[
  1863. MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER];
  1864. }
  1865. int migrate_multifd_channels(void)
  1866. {
  1867. MigrationState *s;
  1868. s = migrate_get_current();
  1869. return s->parameters.multifd_channels;
  1870. }
  1871. int migrate_use_xbzrle(void)
  1872. {
  1873. MigrationState *s;
  1874. s = migrate_get_current();
  1875. return s->enabled_capabilities[MIGRATION_CAPABILITY_XBZRLE];
  1876. }
  1877. int64_t migrate_xbzrle_cache_size(void)
  1878. {
  1879. MigrationState *s;
  1880. s = migrate_get_current();
  1881. return s->parameters.xbzrle_cache_size;
  1882. }
  1883. static int64_t migrate_max_postcopy_bandwidth(void)
  1884. {
  1885. MigrationState *s;
  1886. s = migrate_get_current();
  1887. return s->parameters.max_postcopy_bandwidth;
  1888. }
  1889. bool migrate_use_block(void)
  1890. {
  1891. MigrationState *s;
  1892. s = migrate_get_current();
  1893. return s->enabled_capabilities[MIGRATION_CAPABILITY_BLOCK];
  1894. }
  1895. bool migrate_use_return_path(void)
  1896. {
  1897. MigrationState *s;
  1898. s = migrate_get_current();
  1899. return s->enabled_capabilities[MIGRATION_CAPABILITY_RETURN_PATH];
  1900. }
  1901. bool migrate_use_block_incremental(void)
  1902. {
  1903. MigrationState *s;
  1904. s = migrate_get_current();
  1905. return s->parameters.block_incremental;
  1906. }
  1907. /* migration thread support */
  1908. /*
  1909. * Something bad happened to the RP stream, mark an error
  1910. * The caller shall print or trace something to indicate why
  1911. */
  1912. static void mark_source_rp_bad(MigrationState *s)
  1913. {
  1914. s->rp_state.error = true;
  1915. }
  1916. static struct rp_cmd_args {
  1917. ssize_t len; /* -1 = variable */
  1918. const char *name;
  1919. } rp_cmd_args[] = {
  1920. [MIG_RP_MSG_INVALID] = { .len = -1, .name = "INVALID" },
  1921. [MIG_RP_MSG_SHUT] = { .len = 4, .name = "SHUT" },
  1922. [MIG_RP_MSG_PONG] = { .len = 4, .name = "PONG" },
  1923. [MIG_RP_MSG_REQ_PAGES] = { .len = 12, .name = "REQ_PAGES" },
  1924. [MIG_RP_MSG_REQ_PAGES_ID] = { .len = -1, .name = "REQ_PAGES_ID" },
  1925. [MIG_RP_MSG_RECV_BITMAP] = { .len = -1, .name = "RECV_BITMAP" },
  1926. [MIG_RP_MSG_RESUME_ACK] = { .len = 4, .name = "RESUME_ACK" },
  1927. [MIG_RP_MSG_MAX] = { .len = -1, .name = "MAX" },
  1928. };
  1929. /*
  1930. * Process a request for pages received on the return path,
  1931. * We're allowed to send more than requested (e.g. to round to our page size)
  1932. * and we don't need to send pages that have already been sent.
  1933. */
  1934. static void migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname,
  1935. ram_addr_t start, size_t len)
  1936. {
  1937. long our_host_ps = qemu_real_host_page_size;
  1938. trace_migrate_handle_rp_req_pages(rbname, start, len);
  1939. /*
  1940. * Since we currently insist on matching page sizes, just sanity check
  1941. * we're being asked for whole host pages.
  1942. */
  1943. if (start & (our_host_ps-1) ||
  1944. (len & (our_host_ps-1))) {
  1945. error_report("%s: Misaligned page request, start: " RAM_ADDR_FMT
  1946. " len: %zd", __func__, start, len);
  1947. mark_source_rp_bad(ms);
  1948. return;
  1949. }
  1950. if (ram_save_queue_pages(rbname, start, len)) {
  1951. mark_source_rp_bad(ms);
  1952. }
  1953. }
  1954. /* Return true to retry, false to quit */
  1955. static bool postcopy_pause_return_path_thread(MigrationState *s)
  1956. {
  1957. trace_postcopy_pause_return_path();
  1958. qemu_sem_wait(&s->postcopy_pause_rp_sem);
  1959. trace_postcopy_pause_return_path_continued();
  1960. return true;
  1961. }
  1962. static int migrate_handle_rp_recv_bitmap(MigrationState *s, char *block_name)
  1963. {
  1964. RAMBlock *block = qemu_ram_block_by_name(block_name);
  1965. if (!block) {
  1966. error_report("%s: invalid block name '%s'", __func__, block_name);
  1967. return -EINVAL;
  1968. }
  1969. /* Fetch the received bitmap and refresh the dirty bitmap */
  1970. return ram_dirty_bitmap_reload(s, block);
  1971. }
  1972. static int migrate_handle_rp_resume_ack(MigrationState *s, uint32_t value)
  1973. {
  1974. trace_source_return_path_thread_resume_ack(value);
  1975. if (value != MIGRATION_RESUME_ACK_VALUE) {
  1976. error_report("%s: illegal resume_ack value %"PRIu32,
  1977. __func__, value);
  1978. return -1;
  1979. }
  1980. /* Now both sides are active. */
  1981. migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_RECOVER,
  1982. MIGRATION_STATUS_POSTCOPY_ACTIVE);
  1983. /* Notify send thread that time to continue send pages */
  1984. qemu_sem_post(&s->rp_state.rp_sem);
  1985. return 0;
  1986. }
  1987. /*
  1988. * Handles messages sent on the return path towards the source VM
  1989. *
  1990. */
  1991. static void *source_return_path_thread(void *opaque)
  1992. {
  1993. MigrationState *ms = opaque;
  1994. QEMUFile *rp = ms->rp_state.from_dst_file;
  1995. uint16_t header_len, header_type;
  1996. uint8_t buf[512];
  1997. uint32_t tmp32, sibling_error;
  1998. ram_addr_t start = 0; /* =0 to silence warning */
  1999. size_t len = 0, expected_len;
  2000. int res;
  2001. trace_source_return_path_thread_entry();
  2002. rcu_register_thread();
  2003. retry:
  2004. while (!ms->rp_state.error && !qemu_file_get_error(rp) &&
  2005. migration_is_setup_or_active(ms->state)) {
  2006. trace_source_return_path_thread_loop_top();
  2007. header_type = qemu_get_be16(rp);
  2008. header_len = qemu_get_be16(rp);
  2009. if (qemu_file_get_error(rp)) {
  2010. mark_source_rp_bad(ms);
  2011. goto out;
  2012. }
  2013. if (header_type >= MIG_RP_MSG_MAX ||
  2014. header_type == MIG_RP_MSG_INVALID) {
  2015. error_report("RP: Received invalid message 0x%04x length 0x%04x",
  2016. header_type, header_len);
  2017. mark_source_rp_bad(ms);
  2018. goto out;
  2019. }
  2020. if ((rp_cmd_args[header_type].len != -1 &&
  2021. header_len != rp_cmd_args[header_type].len) ||
  2022. header_len > sizeof(buf)) {
  2023. error_report("RP: Received '%s' message (0x%04x) with"
  2024. "incorrect length %d expecting %zu",
  2025. rp_cmd_args[header_type].name, header_type, header_len,
  2026. (size_t)rp_cmd_args[header_type].len);
  2027. mark_source_rp_bad(ms);
  2028. goto out;
  2029. }
  2030. /* We know we've got a valid header by this point */
  2031. res = qemu_get_buffer(rp, buf, header_len);
  2032. if (res != header_len) {
  2033. error_report("RP: Failed reading data for message 0x%04x"
  2034. " read %d expected %d",
  2035. header_type, res, header_len);
  2036. mark_source_rp_bad(ms);
  2037. goto out;
  2038. }
  2039. /* OK, we have the message and the data */
  2040. switch (header_type) {
  2041. case MIG_RP_MSG_SHUT:
  2042. sibling_error = ldl_be_p(buf);
  2043. trace_source_return_path_thread_shut(sibling_error);
  2044. if (sibling_error) {
  2045. error_report("RP: Sibling indicated error %d", sibling_error);
  2046. mark_source_rp_bad(ms);
  2047. }
  2048. /*
  2049. * We'll let the main thread deal with closing the RP
  2050. * we could do a shutdown(2) on it, but we're the only user
  2051. * anyway, so there's nothing gained.
  2052. */
  2053. goto out;
  2054. case MIG_RP_MSG_PONG:
  2055. tmp32 = ldl_be_p(buf);
  2056. trace_source_return_path_thread_pong(tmp32);
  2057. break;
  2058. case MIG_RP_MSG_REQ_PAGES:
  2059. start = ldq_be_p(buf);
  2060. len = ldl_be_p(buf + 8);
  2061. migrate_handle_rp_req_pages(ms, NULL, start, len);
  2062. break;
  2063. case MIG_RP_MSG_REQ_PAGES_ID:
  2064. expected_len = 12 + 1; /* header + termination */
  2065. if (header_len >= expected_len) {
  2066. start = ldq_be_p(buf);
  2067. len = ldl_be_p(buf + 8);
  2068. /* Now we expect an idstr */
  2069. tmp32 = buf[12]; /* Length of the following idstr */
  2070. buf[13 + tmp32] = '\0';
  2071. expected_len += tmp32;
  2072. }
  2073. if (header_len != expected_len) {
  2074. error_report("RP: Req_Page_id with length %d expecting %zd",
  2075. header_len, expected_len);
  2076. mark_source_rp_bad(ms);
  2077. goto out;
  2078. }
  2079. migrate_handle_rp_req_pages(ms, (char *)&buf[13], start, len);
  2080. break;
  2081. case MIG_RP_MSG_RECV_BITMAP:
  2082. if (header_len < 1) {
  2083. error_report("%s: missing block name", __func__);
  2084. mark_source_rp_bad(ms);
  2085. goto out;
  2086. }
  2087. /* Format: len (1B) + idstr (<255B). This ends the idstr. */
  2088. buf[buf[0] + 1] = '\0';
  2089. if (migrate_handle_rp_recv_bitmap(ms, (char *)(buf + 1))) {
  2090. mark_source_rp_bad(ms);
  2091. goto out;
  2092. }
  2093. break;
  2094. case MIG_RP_MSG_RESUME_ACK:
  2095. tmp32 = ldl_be_p(buf);
  2096. if (migrate_handle_rp_resume_ack(ms, tmp32)) {
  2097. mark_source_rp_bad(ms);
  2098. goto out;
  2099. }
  2100. break;
  2101. default:
  2102. break;
  2103. }
  2104. }
  2105. out:
  2106. res = qemu_file_get_error(rp);
  2107. if (res) {
  2108. if (res == -EIO && migration_in_postcopy()) {
  2109. /*
  2110. * Maybe there is something we can do: it looks like a
  2111. * network down issue, and we pause for a recovery.
  2112. */
  2113. if (postcopy_pause_return_path_thread(ms)) {
  2114. /* Reload rp, reset the rest */
  2115. if (rp != ms->rp_state.from_dst_file) {
  2116. qemu_fclose(rp);
  2117. rp = ms->rp_state.from_dst_file;
  2118. }
  2119. ms->rp_state.error = false;
  2120. goto retry;
  2121. }
  2122. }
  2123. trace_source_return_path_thread_bad_end();
  2124. mark_source_rp_bad(ms);
  2125. }
  2126. trace_source_return_path_thread_end();
  2127. ms->rp_state.from_dst_file = NULL;
  2128. qemu_fclose(rp);
  2129. rcu_unregister_thread();
  2130. return NULL;
  2131. }
  2132. static int open_return_path_on_source(MigrationState *ms,
  2133. bool create_thread)
  2134. {
  2135. ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->to_dst_file);
  2136. if (!ms->rp_state.from_dst_file) {
  2137. return -1;
  2138. }
  2139. trace_open_return_path_on_source();
  2140. if (!create_thread) {
  2141. /* We're done */
  2142. return 0;
  2143. }
  2144. qemu_thread_create(&ms->rp_state.rp_thread, "return path",
  2145. source_return_path_thread, ms, QEMU_THREAD_JOINABLE);
  2146. trace_open_return_path_on_source_continue();
  2147. return 0;
  2148. }
  2149. /* Returns 0 if the RP was ok, otherwise there was an error on the RP */
  2150. static int await_return_path_close_on_source(MigrationState *ms)
  2151. {
  2152. /*
  2153. * If this is a normal exit then the destination will send a SHUT and the
  2154. * rp_thread will exit, however if there's an error we need to cause
  2155. * it to exit.
  2156. */
  2157. if (qemu_file_get_error(ms->to_dst_file) && ms->rp_state.from_dst_file) {
  2158. /*
  2159. * shutdown(2), if we have it, will cause it to unblock if it's stuck
  2160. * waiting for the destination.
  2161. */
  2162. qemu_file_shutdown(ms->rp_state.from_dst_file);
  2163. mark_source_rp_bad(ms);
  2164. }
  2165. trace_await_return_path_close_on_source_joining();
  2166. qemu_thread_join(&ms->rp_state.rp_thread);
  2167. trace_await_return_path_close_on_source_close();
  2168. return ms->rp_state.error;
  2169. }
  2170. /*
  2171. * Switch from normal iteration to postcopy
  2172. * Returns non-0 on error
  2173. */
  2174. static int postcopy_start(MigrationState *ms)
  2175. {
  2176. int ret;
  2177. QIOChannelBuffer *bioc;
  2178. QEMUFile *fb;
  2179. int64_t time_at_stop = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
  2180. int64_t bandwidth = migrate_max_postcopy_bandwidth();
  2181. bool restart_block = false;
  2182. int cur_state = MIGRATION_STATUS_ACTIVE;
  2183. if (!migrate_pause_before_switchover()) {
  2184. migrate_set_state(&ms->state, MIGRATION_STATUS_ACTIVE,
  2185. MIGRATION_STATUS_POSTCOPY_ACTIVE);
  2186. }
  2187. trace_postcopy_start();
  2188. qemu_mutex_lock_iothread();
  2189. trace_postcopy_start_set_run();
  2190. qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
  2191. global_state_store();
  2192. ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
  2193. if (ret < 0) {
  2194. goto fail;
  2195. }
  2196. ret = migration_maybe_pause(ms, &cur_state,
  2197. MIGRATION_STATUS_POSTCOPY_ACTIVE);
  2198. if (ret < 0) {
  2199. goto fail;
  2200. }
  2201. ret = bdrv_inactivate_all();
  2202. if (ret < 0) {
  2203. goto fail;
  2204. }
  2205. restart_block = true;
  2206. /*
  2207. * Cause any non-postcopiable, but iterative devices to
  2208. * send out their final data.
  2209. */
  2210. qemu_savevm_state_complete_precopy(ms->to_dst_file, true, false);
  2211. /*
  2212. * in Finish migrate and with the io-lock held everything should
  2213. * be quiet, but we've potentially still got dirty pages and we
  2214. * need to tell the destination to throw any pages it's already received
  2215. * that are dirty
  2216. */
  2217. if (migrate_postcopy_ram()) {
  2218. if (ram_postcopy_send_discard_bitmap(ms)) {
  2219. error_report("postcopy send discard bitmap failed");
  2220. goto fail;
  2221. }
  2222. }
  2223. /*
  2224. * send rest of state - note things that are doing postcopy
  2225. * will notice we're in POSTCOPY_ACTIVE and not actually
  2226. * wrap their state up here
  2227. */
  2228. /* 0 max-postcopy-bandwidth means unlimited */
  2229. if (!bandwidth) {
  2230. qemu_file_set_rate_limit(ms->to_dst_file, INT64_MAX);
  2231. } else {
  2232. qemu_file_set_rate_limit(ms->to_dst_file, bandwidth / XFER_LIMIT_RATIO);
  2233. }
  2234. if (migrate_postcopy_ram()) {
  2235. /* Ping just for debugging, helps line traces up */
  2236. qemu_savevm_send_ping(ms->to_dst_file, 2);
  2237. }
  2238. /*
  2239. * While loading the device state we may trigger page transfer
  2240. * requests and the fd must be free to process those, and thus
  2241. * the destination must read the whole device state off the fd before
  2242. * it starts processing it. Unfortunately the ad-hoc migration format
  2243. * doesn't allow the destination to know the size to read without fully
  2244. * parsing it through each devices load-state code (especially the open
  2245. * coded devices that use get/put).
  2246. * So we wrap the device state up in a package with a length at the start;
  2247. * to do this we use a qemu_buf to hold the whole of the device state.
  2248. */
  2249. bioc = qio_channel_buffer_new(4096);
  2250. qio_channel_set_name(QIO_CHANNEL(bioc), "migration-postcopy-buffer");
  2251. fb = qemu_fopen_channel_output(QIO_CHANNEL(bioc));
  2252. object_unref(OBJECT(bioc));
  2253. /*
  2254. * Make sure the receiver can get incoming pages before we send the rest
  2255. * of the state
  2256. */
  2257. qemu_savevm_send_postcopy_listen(fb);
  2258. qemu_savevm_state_complete_precopy(fb, false, false);
  2259. if (migrate_postcopy_ram()) {
  2260. qemu_savevm_send_ping(fb, 3);
  2261. }
  2262. qemu_savevm_send_postcopy_run(fb);
  2263. /* <><> end of stuff going into the package */
  2264. /* Last point of recovery; as soon as we send the package the destination
  2265. * can open devices and potentially start running.
  2266. * Lets just check again we've not got any errors.
  2267. */
  2268. ret = qemu_file_get_error(ms->to_dst_file);
  2269. if (ret) {
  2270. error_report("postcopy_start: Migration stream errored (pre package)");
  2271. goto fail_closefb;
  2272. }
  2273. restart_block = false;
  2274. /* Now send that blob */
  2275. if (qemu_savevm_send_packaged(ms->to_dst_file, bioc->data, bioc->usage)) {
  2276. goto fail_closefb;
  2277. }
  2278. qemu_fclose(fb);
  2279. /* Send a notify to give a chance for anything that needs to happen
  2280. * at the transition to postcopy and after the device state; in particular
  2281. * spice needs to trigger a transition now
  2282. */
  2283. ms->postcopy_after_devices = true;
  2284. notifier_list_notify(&migration_state_notifiers, ms);
  2285. ms->downtime = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - time_at_stop;
  2286. qemu_mutex_unlock_iothread();
  2287. if (migrate_postcopy_ram()) {
  2288. /*
  2289. * Although this ping is just for debug, it could potentially be
  2290. * used for getting a better measurement of downtime at the source.
  2291. */
  2292. qemu_savevm_send_ping(ms->to_dst_file, 4);
  2293. }
  2294. if (migrate_release_ram()) {
  2295. ram_postcopy_migrated_memory_release(ms);
  2296. }
  2297. ret = qemu_file_get_error(ms->to_dst_file);
  2298. if (ret) {
  2299. error_report("postcopy_start: Migration stream errored");
  2300. migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
  2301. MIGRATION_STATUS_FAILED);
  2302. }
  2303. return ret;
  2304. fail_closefb:
  2305. qemu_fclose(fb);
  2306. fail:
  2307. migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
  2308. MIGRATION_STATUS_FAILED);
  2309. if (restart_block) {
  2310. /* A failure happened early enough that we know the destination hasn't
  2311. * accessed block devices, so we're safe to recover.
  2312. */
  2313. Error *local_err = NULL;
  2314. bdrv_invalidate_cache_all(&local_err);
  2315. if (local_err) {
  2316. error_report_err(local_err);
  2317. }
  2318. }
  2319. qemu_mutex_unlock_iothread();
  2320. return -1;
  2321. }
  2322. /**
  2323. * migration_maybe_pause: Pause if required to by
  2324. * migrate_pause_before_switchover called with the iothread locked
  2325. * Returns: 0 on success
  2326. */
  2327. static int migration_maybe_pause(MigrationState *s,
  2328. int *current_active_state,
  2329. int new_state)
  2330. {
  2331. if (!migrate_pause_before_switchover()) {
  2332. return 0;
  2333. }
  2334. /* Since leaving this state is not atomic with posting the semaphore
  2335. * it's possible that someone could have issued multiple migrate_continue
  2336. * and the semaphore is incorrectly positive at this point;
  2337. * the docs say it's undefined to reinit a semaphore that's already
  2338. * init'd, so use timedwait to eat up any existing posts.
  2339. */
  2340. while (qemu_sem_timedwait(&s->pause_sem, 1) == 0) {
  2341. /* This block intentionally left blank */
  2342. }
  2343. qemu_mutex_unlock_iothread();
  2344. migrate_set_state(&s->state, *current_active_state,
  2345. MIGRATION_STATUS_PRE_SWITCHOVER);
  2346. qemu_sem_wait(&s->pause_sem);
  2347. migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER,
  2348. new_state);
  2349. *current_active_state = new_state;
  2350. qemu_mutex_lock_iothread();
  2351. return s->state == new_state ? 0 : -EINVAL;
  2352. }
  2353. /**
  2354. * migration_completion: Used by migration_thread when there's not much left.
  2355. * The caller 'breaks' the loop when this returns.
  2356. *
  2357. * @s: Current migration state
  2358. */
  2359. static void migration_completion(MigrationState *s)
  2360. {
  2361. int ret;
  2362. int current_active_state = s->state;
  2363. if (s->state == MIGRATION_STATUS_ACTIVE) {
  2364. qemu_mutex_lock_iothread();
  2365. s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
  2366. qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
  2367. s->vm_was_running = runstate_is_running();
  2368. ret = global_state_store();
  2369. if (!ret) {
  2370. bool inactivate = !migrate_colo_enabled();
  2371. ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
  2372. if (ret >= 0) {
  2373. ret = migration_maybe_pause(s, &current_active_state,
  2374. MIGRATION_STATUS_DEVICE);
  2375. }
  2376. if (ret >= 0) {
  2377. qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX);
  2378. ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false,
  2379. inactivate);
  2380. }
  2381. if (inactivate && ret >= 0) {
  2382. s->block_inactive = true;
  2383. }
  2384. }
  2385. qemu_mutex_unlock_iothread();
  2386. if (ret < 0) {
  2387. goto fail;
  2388. }
  2389. } else if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
  2390. trace_migration_completion_postcopy_end();
  2391. qemu_savevm_state_complete_postcopy(s->to_dst_file);
  2392. trace_migration_completion_postcopy_end_after_complete();
  2393. }
  2394. /*
  2395. * If rp was opened we must clean up the thread before
  2396. * cleaning everything else up (since if there are no failures
  2397. * it will wait for the destination to send it's status in
  2398. * a SHUT command).
  2399. */
  2400. if (s->rp_state.from_dst_file) {
  2401. int rp_error;
  2402. trace_migration_return_path_end_before();
  2403. rp_error = await_return_path_close_on_source(s);
  2404. trace_migration_return_path_end_after(rp_error);
  2405. if (rp_error) {
  2406. goto fail_invalidate;
  2407. }
  2408. }
  2409. if (qemu_file_get_error(s->to_dst_file)) {
  2410. trace_migration_completion_file_err();
  2411. goto fail_invalidate;
  2412. }
  2413. if (!migrate_colo_enabled()) {
  2414. migrate_set_state(&s->state, current_active_state,
  2415. MIGRATION_STATUS_COMPLETED);
  2416. }
  2417. return;
  2418. fail_invalidate:
  2419. /* If not doing postcopy, vm_start() will be called: let's regain
  2420. * control on images.
  2421. */
  2422. if (s->state == MIGRATION_STATUS_ACTIVE ||
  2423. s->state == MIGRATION_STATUS_DEVICE) {
  2424. Error *local_err = NULL;
  2425. qemu_mutex_lock_iothread();
  2426. bdrv_invalidate_cache_all(&local_err);
  2427. if (local_err) {
  2428. error_report_err(local_err);
  2429. } else {
  2430. s->block_inactive = false;
  2431. }
  2432. qemu_mutex_unlock_iothread();
  2433. }
  2434. fail:
  2435. migrate_set_state(&s->state, current_active_state,
  2436. MIGRATION_STATUS_FAILED);
  2437. }
  2438. bool migrate_colo_enabled(void)
  2439. {
  2440. MigrationState *s = migrate_get_current();
  2441. return s->enabled_capabilities[MIGRATION_CAPABILITY_X_COLO];
  2442. }
  2443. typedef enum MigThrError {
  2444. /* No error detected */
  2445. MIG_THR_ERR_NONE = 0,
  2446. /* Detected error, but resumed successfully */
  2447. MIG_THR_ERR_RECOVERED = 1,
  2448. /* Detected fatal error, need to exit */
  2449. MIG_THR_ERR_FATAL = 2,
  2450. } MigThrError;
  2451. static int postcopy_resume_handshake(MigrationState *s)
  2452. {
  2453. qemu_savevm_send_postcopy_resume(s->to_dst_file);
  2454. while (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) {
  2455. qemu_sem_wait(&s->rp_state.rp_sem);
  2456. }
  2457. if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
  2458. return 0;
  2459. }
  2460. return -1;
  2461. }
  2462. /* Return zero if success, or <0 for error */
  2463. static int postcopy_do_resume(MigrationState *s)
  2464. {
  2465. int ret;
  2466. /*
  2467. * Call all the resume_prepare() hooks, so that modules can be
  2468. * ready for the migration resume.
  2469. */
  2470. ret = qemu_savevm_state_resume_prepare(s);
  2471. if (ret) {
  2472. error_report("%s: resume_prepare() failure detected: %d",
  2473. __func__, ret);
  2474. return ret;
  2475. }
  2476. /*
  2477. * Last handshake with destination on the resume (destination will
  2478. * switch to postcopy-active afterwards)
  2479. */
  2480. ret = postcopy_resume_handshake(s);
  2481. if (ret) {
  2482. error_report("%s: handshake failed: %d", __func__, ret);
  2483. return ret;
  2484. }
  2485. return 0;
  2486. }
  2487. /*
  2488. * We don't return until we are in a safe state to continue current
  2489. * postcopy migration. Returns MIG_THR_ERR_RECOVERED if recovered, or
  2490. * MIG_THR_ERR_FATAL if unrecovery failure happened.
  2491. */
  2492. static MigThrError postcopy_pause(MigrationState *s)
  2493. {
  2494. assert(s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
  2495. while (true) {
  2496. QEMUFile *file;
  2497. migrate_set_state(&s->state, s->state,
  2498. MIGRATION_STATUS_POSTCOPY_PAUSED);
  2499. /* Current channel is possibly broken. Release it. */
  2500. assert(s->to_dst_file);
  2501. qemu_mutex_lock(&s->qemu_file_lock);
  2502. file = s->to_dst_file;
  2503. s->to_dst_file = NULL;
  2504. qemu_mutex_unlock(&s->qemu_file_lock);
  2505. qemu_file_shutdown(file);
  2506. qemu_fclose(file);
  2507. error_report("Detected IO failure for postcopy. "
  2508. "Migration paused.");
  2509. /*
  2510. * We wait until things fixed up. Then someone will setup the
  2511. * status back for us.
  2512. */
  2513. while (s->state == MIGRATION_STATUS_POSTCOPY_PAUSED) {
  2514. qemu_sem_wait(&s->postcopy_pause_sem);
  2515. }
  2516. if (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) {
  2517. /* Woken up by a recover procedure. Give it a shot */
  2518. /*
  2519. * Firstly, let's wake up the return path now, with a new
  2520. * return path channel.
  2521. */
  2522. qemu_sem_post(&s->postcopy_pause_rp_sem);
  2523. /* Do the resume logic */
  2524. if (postcopy_do_resume(s) == 0) {
  2525. /* Let's continue! */
  2526. trace_postcopy_pause_continued();
  2527. return MIG_THR_ERR_RECOVERED;
  2528. } else {
  2529. /*
  2530. * Something wrong happened during the recovery, let's
  2531. * pause again. Pause is always better than throwing
  2532. * data away.
  2533. */
  2534. continue;
  2535. }
  2536. } else {
  2537. /* This is not right... Time to quit. */
  2538. return MIG_THR_ERR_FATAL;
  2539. }
  2540. }
  2541. }
  2542. static MigThrError migration_detect_error(MigrationState *s)
  2543. {
  2544. int ret;
  2545. int state = s->state;
  2546. Error *local_error = NULL;
  2547. if (state == MIGRATION_STATUS_CANCELLING ||
  2548. state == MIGRATION_STATUS_CANCELLED) {
  2549. /* End the migration, but don't set the state to failed */
  2550. return MIG_THR_ERR_FATAL;
  2551. }
  2552. /* Try to detect any file errors */
  2553. ret = qemu_file_get_error_obj(s->to_dst_file, &local_error);
  2554. if (!ret) {
  2555. /* Everything is fine */
  2556. assert(!local_error);
  2557. return MIG_THR_ERR_NONE;
  2558. }
  2559. if (local_error) {
  2560. migrate_set_error(s, local_error);
  2561. error_free(local_error);
  2562. }
  2563. if (state == MIGRATION_STATUS_POSTCOPY_ACTIVE && ret == -EIO) {
  2564. /*
  2565. * For postcopy, we allow the network to be down for a
  2566. * while. After that, it can be continued by a
  2567. * recovery phase.
  2568. */
  2569. return postcopy_pause(s);
  2570. } else {
  2571. /*
  2572. * For precopy (or postcopy with error outside IO), we fail
  2573. * with no time.
  2574. */
  2575. migrate_set_state(&s->state, state, MIGRATION_STATUS_FAILED);
  2576. trace_migration_thread_file_err();
  2577. /* Time to stop the migration, now. */
  2578. return MIG_THR_ERR_FATAL;
  2579. }
  2580. }
  2581. /* How many bytes have we transferred since the beginning of the migration */
  2582. static uint64_t migration_total_bytes(MigrationState *s)
  2583. {
  2584. return qemu_ftell(s->to_dst_file) + ram_counters.multifd_bytes;
  2585. }
  2586. static void migration_calculate_complete(MigrationState *s)
  2587. {
  2588. uint64_t bytes = migration_total_bytes(s);
  2589. int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
  2590. int64_t transfer_time;
  2591. s->total_time = end_time - s->start_time;
  2592. if (!s->downtime) {
  2593. /*
  2594. * It's still not set, so we are precopy migration. For
  2595. * postcopy, downtime is calculated during postcopy_start().
  2596. */
  2597. s->downtime = end_time - s->downtime_start;
  2598. }
  2599. transfer_time = s->total_time - s->setup_time;
  2600. if (transfer_time) {
  2601. s->mbps = ((double) bytes * 8.0) / transfer_time / 1000;
  2602. }
  2603. }
  2604. static void update_iteration_initial_status(MigrationState *s)
  2605. {
  2606. /*
  2607. * Update these three fields at the same time to avoid mismatch info lead
  2608. * wrong speed calculation.
  2609. */
  2610. s->iteration_start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
  2611. s->iteration_initial_bytes = migration_total_bytes(s);
  2612. s->iteration_initial_pages = ram_get_total_transferred_pages();
  2613. }
  2614. static void migration_update_counters(MigrationState *s,
  2615. int64_t current_time)
  2616. {
  2617. uint64_t transferred, transferred_pages, time_spent;
  2618. uint64_t current_bytes; /* bytes transferred since the beginning */
  2619. double bandwidth;
  2620. if (current_time < s->iteration_start_time + BUFFER_DELAY) {
  2621. return;
  2622. }
  2623. current_bytes = migration_total_bytes(s);
  2624. transferred = current_bytes - s->iteration_initial_bytes;
  2625. time_spent = current_time - s->iteration_start_time;
  2626. bandwidth = (double)transferred / time_spent;
  2627. s->threshold_size = bandwidth * s->parameters.downtime_limit;
  2628. s->mbps = (((double) transferred * 8.0) /
  2629. ((double) time_spent / 1000.0)) / 1000.0 / 1000.0;
  2630. transferred_pages = ram_get_total_transferred_pages() -
  2631. s->iteration_initial_pages;
  2632. s->pages_per_second = (double) transferred_pages /
  2633. (((double) time_spent / 1000.0));
  2634. /*
  2635. * if we haven't sent anything, we don't want to
  2636. * recalculate. 10000 is a small enough number for our purposes
  2637. */
  2638. if (ram_counters.dirty_pages_rate && transferred > 10000) {
  2639. s->expected_downtime = ram_counters.remaining / bandwidth;
  2640. }
  2641. qemu_file_reset_rate_limit(s->to_dst_file);
  2642. update_iteration_initial_status(s);
  2643. trace_migrate_transferred(transferred, time_spent,
  2644. bandwidth, s->threshold_size);
  2645. }
  2646. /* Migration thread iteration status */
  2647. typedef enum {
  2648. MIG_ITERATE_RESUME, /* Resume current iteration */
  2649. MIG_ITERATE_SKIP, /* Skip current iteration */
  2650. MIG_ITERATE_BREAK, /* Break the loop */
  2651. } MigIterateState;
  2652. /*
  2653. * Return true if continue to the next iteration directly, false
  2654. * otherwise.
  2655. */
  2656. static MigIterateState migration_iteration_run(MigrationState *s)
  2657. {
  2658. uint64_t pending_size, pend_pre, pend_compat, pend_post;
  2659. bool in_postcopy = s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE;
  2660. qemu_savevm_state_pending(s->to_dst_file, s->threshold_size, &pend_pre,
  2661. &pend_compat, &pend_post);
  2662. pending_size = pend_pre + pend_compat + pend_post;
  2663. trace_migrate_pending(pending_size, s->threshold_size,
  2664. pend_pre, pend_compat, pend_post);
  2665. if (pending_size && pending_size >= s->threshold_size) {
  2666. /* Still a significant amount to transfer */
  2667. if (!in_postcopy && pend_pre <= s->threshold_size &&
  2668. atomic_read(&s->start_postcopy)) {
  2669. if (postcopy_start(s)) {
  2670. error_report("%s: postcopy failed to start", __func__);
  2671. }
  2672. return MIG_ITERATE_SKIP;
  2673. }
  2674. /* Just another iteration step */
  2675. qemu_savevm_state_iterate(s->to_dst_file, in_postcopy);
  2676. } else {
  2677. trace_migration_thread_low_pending(pending_size);
  2678. migration_completion(s);
  2679. return MIG_ITERATE_BREAK;
  2680. }
  2681. return MIG_ITERATE_RESUME;
  2682. }
  2683. static void migration_iteration_finish(MigrationState *s)
  2684. {
  2685. /* If we enabled cpu throttling for auto-converge, turn it off. */
  2686. cpu_throttle_stop();
  2687. qemu_mutex_lock_iothread();
  2688. switch (s->state) {
  2689. case MIGRATION_STATUS_COMPLETED:
  2690. migration_calculate_complete(s);
  2691. runstate_set(RUN_STATE_POSTMIGRATE);
  2692. break;
  2693. case MIGRATION_STATUS_ACTIVE:
  2694. /*
  2695. * We should really assert here, but since it's during
  2696. * migration, let's try to reduce the usage of assertions.
  2697. */
  2698. if (!migrate_colo_enabled()) {
  2699. error_report("%s: critical error: calling COLO code without "
  2700. "COLO enabled", __func__);
  2701. }
  2702. migrate_start_colo_process(s);
  2703. /*
  2704. * Fixme: we will run VM in COLO no matter its old running state.
  2705. * After exited COLO, we will keep running.
  2706. */
  2707. s->vm_was_running = true;
  2708. /* Fallthrough */
  2709. case MIGRATION_STATUS_FAILED:
  2710. case MIGRATION_STATUS_CANCELLED:
  2711. case MIGRATION_STATUS_CANCELLING:
  2712. if (s->vm_was_running) {
  2713. vm_start();
  2714. } else {
  2715. if (runstate_check(RUN_STATE_FINISH_MIGRATE)) {
  2716. runstate_set(RUN_STATE_POSTMIGRATE);
  2717. }
  2718. }
  2719. break;
  2720. default:
  2721. /* Should not reach here, but if so, forgive the VM. */
  2722. error_report("%s: Unknown ending state %d", __func__, s->state);
  2723. break;
  2724. }
  2725. migrate_fd_cleanup_schedule(s);
  2726. qemu_mutex_unlock_iothread();
  2727. }
  2728. void migration_make_urgent_request(void)
  2729. {
  2730. qemu_sem_post(&migrate_get_current()->rate_limit_sem);
  2731. }
  2732. void migration_consume_urgent_request(void)
  2733. {
  2734. qemu_sem_wait(&migrate_get_current()->rate_limit_sem);
  2735. }
  2736. /*
  2737. * Master migration thread on the source VM.
  2738. * It drives the migration and pumps the data down the outgoing channel.
  2739. */
  2740. static void *migration_thread(void *opaque)
  2741. {
  2742. MigrationState *s = opaque;
  2743. int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
  2744. MigThrError thr_error;
  2745. bool urgent = false;
  2746. rcu_register_thread();
  2747. object_ref(OBJECT(s));
  2748. update_iteration_initial_status(s);
  2749. qemu_savevm_state_header(s->to_dst_file);
  2750. /*
  2751. * If we opened the return path, we need to make sure dst has it
  2752. * opened as well.
  2753. */
  2754. if (s->rp_state.from_dst_file) {
  2755. /* Now tell the dest that it should open its end so it can reply */
  2756. qemu_savevm_send_open_return_path(s->to_dst_file);
  2757. /* And do a ping that will make stuff easier to debug */
  2758. qemu_savevm_send_ping(s->to_dst_file, 1);
  2759. }
  2760. if (migrate_postcopy()) {
  2761. /*
  2762. * Tell the destination that we *might* want to do postcopy later;
  2763. * if the other end can't do postcopy it should fail now, nice and
  2764. * early.
  2765. */
  2766. qemu_savevm_send_postcopy_advise(s->to_dst_file);
  2767. }
  2768. if (migrate_colo_enabled()) {
  2769. /* Notify migration destination that we enable COLO */
  2770. qemu_savevm_send_colo_enable(s->to_dst_file);
  2771. }
  2772. qemu_savevm_state_setup(s->to_dst_file);
  2773. if (qemu_savevm_nr_failover_devices()) {
  2774. migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
  2775. MIGRATION_STATUS_WAIT_UNPLUG);
  2776. while (s->state == MIGRATION_STATUS_WAIT_UNPLUG &&
  2777. qemu_savevm_state_guest_unplug_pending()) {
  2778. qemu_sem_timedwait(&s->wait_unplug_sem, 250);
  2779. }
  2780. migrate_set_state(&s->state, MIGRATION_STATUS_WAIT_UNPLUG,
  2781. MIGRATION_STATUS_ACTIVE);
  2782. }
  2783. s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
  2784. migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
  2785. MIGRATION_STATUS_ACTIVE);
  2786. trace_migration_thread_setup_complete();
  2787. while (migration_is_active(s)) {
  2788. int64_t current_time;
  2789. if (urgent || !qemu_file_rate_limit(s->to_dst_file)) {
  2790. MigIterateState iter_state = migration_iteration_run(s);
  2791. if (iter_state == MIG_ITERATE_SKIP) {
  2792. continue;
  2793. } else if (iter_state == MIG_ITERATE_BREAK) {
  2794. break;
  2795. }
  2796. }
  2797. /*
  2798. * Try to detect any kind of failures, and see whether we
  2799. * should stop the migration now.
  2800. */
  2801. thr_error = migration_detect_error(s);
  2802. if (thr_error == MIG_THR_ERR_FATAL) {
  2803. /* Stop migration */
  2804. break;
  2805. } else if (thr_error == MIG_THR_ERR_RECOVERED) {
  2806. /*
  2807. * Just recovered from a e.g. network failure, reset all
  2808. * the local variables. This is important to avoid
  2809. * breaking transferred_bytes and bandwidth calculation
  2810. */
  2811. update_iteration_initial_status(s);
  2812. }
  2813. current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
  2814. migration_update_counters(s, current_time);
  2815. urgent = false;
  2816. if (qemu_file_rate_limit(s->to_dst_file)) {
  2817. /* Wait for a delay to do rate limiting OR
  2818. * something urgent to post the semaphore.
  2819. */
  2820. int ms = s->iteration_start_time + BUFFER_DELAY - current_time;
  2821. trace_migration_thread_ratelimit_pre(ms);
  2822. if (qemu_sem_timedwait(&s->rate_limit_sem, ms) == 0) {
  2823. /* We were worken by one or more urgent things but
  2824. * the timedwait will have consumed one of them.
  2825. * The service routine for the urgent wake will dec
  2826. * the semaphore itself for each item it consumes,
  2827. * so add this one we just eat back.
  2828. */
  2829. qemu_sem_post(&s->rate_limit_sem);
  2830. urgent = true;
  2831. }
  2832. trace_migration_thread_ratelimit_post(urgent);
  2833. }
  2834. }
  2835. trace_migration_thread_after_loop();
  2836. migration_iteration_finish(s);
  2837. object_unref(OBJECT(s));
  2838. rcu_unregister_thread();
  2839. return NULL;
  2840. }
  2841. void migrate_fd_connect(MigrationState *s, Error *error_in)
  2842. {
  2843. int64_t rate_limit;
  2844. bool resume = s->state == MIGRATION_STATUS_POSTCOPY_PAUSED;
  2845. s->expected_downtime = s->parameters.downtime_limit;
  2846. s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup_bh, s);
  2847. if (error_in) {
  2848. migrate_fd_error(s, error_in);
  2849. migrate_fd_cleanup(s);
  2850. return;
  2851. }
  2852. if (resume) {
  2853. /* This is a resumed migration */
  2854. rate_limit = s->parameters.max_postcopy_bandwidth /
  2855. XFER_LIMIT_RATIO;
  2856. } else {
  2857. /* This is a fresh new migration */
  2858. rate_limit = s->parameters.max_bandwidth / XFER_LIMIT_RATIO;
  2859. /* Notify before starting migration thread */
  2860. notifier_list_notify(&migration_state_notifiers, s);
  2861. }
  2862. qemu_file_set_rate_limit(s->to_dst_file, rate_limit);
  2863. qemu_file_set_blocking(s->to_dst_file, true);
  2864. /*
  2865. * Open the return path. For postcopy, it is used exclusively. For
  2866. * precopy, only if user specified "return-path" capability would
  2867. * QEMU uses the return path.
  2868. */
  2869. if (migrate_postcopy_ram() || migrate_use_return_path()) {
  2870. if (open_return_path_on_source(s, !resume)) {
  2871. error_report("Unable to open return-path for postcopy");
  2872. migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED);
  2873. migrate_fd_cleanup(s);
  2874. return;
  2875. }
  2876. }
  2877. if (resume) {
  2878. /* Wakeup the main migration thread to do the recovery */
  2879. migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_PAUSED,
  2880. MIGRATION_STATUS_POSTCOPY_RECOVER);
  2881. qemu_sem_post(&s->postcopy_pause_sem);
  2882. return;
  2883. }
  2884. if (multifd_save_setup() != 0) {
  2885. migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
  2886. MIGRATION_STATUS_FAILED);
  2887. migrate_fd_cleanup(s);
  2888. return;
  2889. }
  2890. qemu_thread_create(&s->thread, "live_migration", migration_thread, s,
  2891. QEMU_THREAD_JOINABLE);
  2892. s->migration_thread_running = true;
  2893. }
  2894. void migration_global_dump(Monitor *mon)
  2895. {
  2896. MigrationState *ms = migrate_get_current();
  2897. monitor_printf(mon, "globals:\n");
  2898. monitor_printf(mon, "store-global-state: %s\n",
  2899. ms->store_global_state ? "on" : "off");
  2900. monitor_printf(mon, "only-migratable: %s\n",
  2901. only_migratable ? "on" : "off");
  2902. monitor_printf(mon, "send-configuration: %s\n",
  2903. ms->send_configuration ? "on" : "off");
  2904. monitor_printf(mon, "send-section-footer: %s\n",
  2905. ms->send_section_footer ? "on" : "off");
  2906. monitor_printf(mon, "decompress-error-check: %s\n",
  2907. ms->decompress_error_check ? "on" : "off");
  2908. monitor_printf(mon, "clear-bitmap-shift: %u\n",
  2909. ms->clear_bitmap_shift);
  2910. }
  2911. #define DEFINE_PROP_MIG_CAP(name, x) \
  2912. DEFINE_PROP_BOOL(name, MigrationState, enabled_capabilities[x], false)
  2913. static Property migration_properties[] = {
  2914. DEFINE_PROP_BOOL("store-global-state", MigrationState,
  2915. store_global_state, true),
  2916. DEFINE_PROP_BOOL("send-configuration", MigrationState,
  2917. send_configuration, true),
  2918. DEFINE_PROP_BOOL("send-section-footer", MigrationState,
  2919. send_section_footer, true),
  2920. DEFINE_PROP_BOOL("decompress-error-check", MigrationState,
  2921. decompress_error_check, true),
  2922. DEFINE_PROP_UINT8("x-clear-bitmap-shift", MigrationState,
  2923. clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT),
  2924. /* Migration parameters */
  2925. DEFINE_PROP_UINT8("x-compress-level", MigrationState,
  2926. parameters.compress_level,
  2927. DEFAULT_MIGRATE_COMPRESS_LEVEL),
  2928. DEFINE_PROP_UINT8("x-compress-threads", MigrationState,
  2929. parameters.compress_threads,
  2930. DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT),
  2931. DEFINE_PROP_BOOL("x-compress-wait-thread", MigrationState,
  2932. parameters.compress_wait_thread, true),
  2933. DEFINE_PROP_UINT8("x-decompress-threads", MigrationState,
  2934. parameters.decompress_threads,
  2935. DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT),
  2936. DEFINE_PROP_UINT8("x-cpu-throttle-initial", MigrationState,
  2937. parameters.cpu_throttle_initial,
  2938. DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL),
  2939. DEFINE_PROP_UINT8("x-cpu-throttle-increment", MigrationState,
  2940. parameters.cpu_throttle_increment,
  2941. DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT),
  2942. DEFINE_PROP_SIZE("x-max-bandwidth", MigrationState,
  2943. parameters.max_bandwidth, MAX_THROTTLE),
  2944. DEFINE_PROP_UINT64("x-downtime-limit", MigrationState,
  2945. parameters.downtime_limit,
  2946. DEFAULT_MIGRATE_SET_DOWNTIME),
  2947. DEFINE_PROP_UINT32("x-checkpoint-delay", MigrationState,
  2948. parameters.x_checkpoint_delay,
  2949. DEFAULT_MIGRATE_X_CHECKPOINT_DELAY),
  2950. DEFINE_PROP_UINT8("multifd-channels", MigrationState,
  2951. parameters.multifd_channels,
  2952. DEFAULT_MIGRATE_MULTIFD_CHANNELS),
  2953. DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState,
  2954. parameters.xbzrle_cache_size,
  2955. DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE),
  2956. DEFINE_PROP_SIZE("max-postcopy-bandwidth", MigrationState,
  2957. parameters.max_postcopy_bandwidth,
  2958. DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH),
  2959. DEFINE_PROP_UINT8("max-cpu-throttle", MigrationState,
  2960. parameters.max_cpu_throttle,
  2961. DEFAULT_MIGRATE_MAX_CPU_THROTTLE),
  2962. DEFINE_PROP_SIZE("announce-initial", MigrationState,
  2963. parameters.announce_initial,
  2964. DEFAULT_MIGRATE_ANNOUNCE_INITIAL),
  2965. DEFINE_PROP_SIZE("announce-max", MigrationState,
  2966. parameters.announce_max,
  2967. DEFAULT_MIGRATE_ANNOUNCE_MAX),
  2968. DEFINE_PROP_SIZE("announce-rounds", MigrationState,
  2969. parameters.announce_rounds,
  2970. DEFAULT_MIGRATE_ANNOUNCE_ROUNDS),
  2971. DEFINE_PROP_SIZE("announce-step", MigrationState,
  2972. parameters.announce_step,
  2973. DEFAULT_MIGRATE_ANNOUNCE_STEP),
  2974. /* Migration capabilities */
  2975. DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE),
  2976. DEFINE_PROP_MIG_CAP("x-rdma-pin-all", MIGRATION_CAPABILITY_RDMA_PIN_ALL),
  2977. DEFINE_PROP_MIG_CAP("x-auto-converge", MIGRATION_CAPABILITY_AUTO_CONVERGE),
  2978. DEFINE_PROP_MIG_CAP("x-zero-blocks", MIGRATION_CAPABILITY_ZERO_BLOCKS),
  2979. DEFINE_PROP_MIG_CAP("x-compress", MIGRATION_CAPABILITY_COMPRESS),
  2980. DEFINE_PROP_MIG_CAP("x-events", MIGRATION_CAPABILITY_EVENTS),
  2981. DEFINE_PROP_MIG_CAP("x-postcopy-ram", MIGRATION_CAPABILITY_POSTCOPY_RAM),
  2982. DEFINE_PROP_MIG_CAP("x-colo", MIGRATION_CAPABILITY_X_COLO),
  2983. DEFINE_PROP_MIG_CAP("x-release-ram", MIGRATION_CAPABILITY_RELEASE_RAM),
  2984. DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK),
  2985. DEFINE_PROP_MIG_CAP("x-return-path", MIGRATION_CAPABILITY_RETURN_PATH),
  2986. DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD),
  2987. DEFINE_PROP_END_OF_LIST(),
  2988. };
  2989. static void migration_class_init(ObjectClass *klass, void *data)
  2990. {
  2991. DeviceClass *dc = DEVICE_CLASS(klass);
  2992. dc->user_creatable = false;
  2993. dc->props = migration_properties;
  2994. }
  2995. static void migration_instance_finalize(Object *obj)
  2996. {
  2997. MigrationState *ms = MIGRATION_OBJ(obj);
  2998. MigrationParameters *params = &ms->parameters;
  2999. qemu_mutex_destroy(&ms->error_mutex);
  3000. qemu_mutex_destroy(&ms->qemu_file_lock);
  3001. g_free(params->tls_hostname);
  3002. g_free(params->tls_creds);
  3003. qemu_sem_destroy(&ms->wait_unplug_sem);
  3004. qemu_sem_destroy(&ms->rate_limit_sem);
  3005. qemu_sem_destroy(&ms->pause_sem);
  3006. qemu_sem_destroy(&ms->postcopy_pause_sem);
  3007. qemu_sem_destroy(&ms->postcopy_pause_rp_sem);
  3008. qemu_sem_destroy(&ms->rp_state.rp_sem);
  3009. error_free(ms->error);
  3010. }
  3011. static void migration_instance_init(Object *obj)
  3012. {
  3013. MigrationState *ms = MIGRATION_OBJ(obj);
  3014. MigrationParameters *params = &ms->parameters;
  3015. ms->state = MIGRATION_STATUS_NONE;
  3016. ms->mbps = -1;
  3017. ms->pages_per_second = -1;
  3018. qemu_sem_init(&ms->pause_sem, 0);
  3019. qemu_mutex_init(&ms->error_mutex);
  3020. params->tls_hostname = g_strdup("");
  3021. params->tls_creds = g_strdup("");
  3022. /* Set has_* up only for parameter checks */
  3023. params->has_compress_level = true;
  3024. params->has_compress_threads = true;
  3025. params->has_decompress_threads = true;
  3026. params->has_cpu_throttle_initial = true;
  3027. params->has_cpu_throttle_increment = true;
  3028. params->has_max_bandwidth = true;
  3029. params->has_downtime_limit = true;
  3030. params->has_x_checkpoint_delay = true;
  3031. params->has_block_incremental = true;
  3032. params->has_multifd_channels = true;
  3033. params->has_xbzrle_cache_size = true;
  3034. params->has_max_postcopy_bandwidth = true;
  3035. params->has_max_cpu_throttle = true;
  3036. params->has_announce_initial = true;
  3037. params->has_announce_max = true;
  3038. params->has_announce_rounds = true;
  3039. params->has_announce_step = true;
  3040. qemu_sem_init(&ms->postcopy_pause_sem, 0);
  3041. qemu_sem_init(&ms->postcopy_pause_rp_sem, 0);
  3042. qemu_sem_init(&ms->rp_state.rp_sem, 0);
  3043. qemu_sem_init(&ms->rate_limit_sem, 0);
  3044. qemu_sem_init(&ms->wait_unplug_sem, 0);
  3045. qemu_mutex_init(&ms->qemu_file_lock);
  3046. }
  3047. /*
  3048. * Return true if check pass, false otherwise. Error will be put
  3049. * inside errp if provided.
  3050. */
  3051. static bool migration_object_check(MigrationState *ms, Error **errp)
  3052. {
  3053. MigrationCapabilityStatusList *head = NULL;
  3054. /* Assuming all off */
  3055. bool cap_list[MIGRATION_CAPABILITY__MAX] = { 0 }, ret;
  3056. int i;
  3057. if (!migrate_params_check(&ms->parameters, errp)) {
  3058. return false;
  3059. }
  3060. for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
  3061. if (ms->enabled_capabilities[i]) {
  3062. head = migrate_cap_add(head, i, true);
  3063. }
  3064. }
  3065. ret = migrate_caps_check(cap_list, head, errp);
  3066. /* It works with head == NULL */
  3067. qapi_free_MigrationCapabilityStatusList(head);
  3068. return ret;
  3069. }
  3070. static const TypeInfo migration_type = {
  3071. .name = TYPE_MIGRATION,
  3072. /*
  3073. * NOTE: TYPE_MIGRATION is not really a device, as the object is
  3074. * not created using qdev_create(), it is not attached to the qdev
  3075. * device tree, and it is never realized.
  3076. *
  3077. * TODO: Make this TYPE_OBJECT once QOM provides something like
  3078. * TYPE_DEVICE's "-global" properties.
  3079. */
  3080. .parent = TYPE_DEVICE,
  3081. .class_init = migration_class_init,
  3082. .class_size = sizeof(MigrationClass),
  3083. .instance_size = sizeof(MigrationState),
  3084. .instance_init = migration_instance_init,
  3085. .instance_finalize = migration_instance_finalize,
  3086. };
  3087. static void register_migration_types(void)
  3088. {
  3089. type_register_static(&migration_type);
  3090. }
  3091. type_init(register_migration_types);