colo.c 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941
  1. /*
  2. * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
  3. * (a.k.a. Fault Tolerance or Continuous Replication)
  4. *
  5. * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
  6. * Copyright (c) 2016 FUJITSU LIMITED
  7. * Copyright (c) 2016 Intel Corporation
  8. *
  9. * This work is licensed under the terms of the GNU GPL, version 2 or
  10. * later. See the COPYING file in the top-level directory.
  11. */
  12. #include "qemu/osdep.h"
  13. #include "sysemu/sysemu.h"
  14. #include "qapi/error.h"
  15. #include "qapi/qapi-commands-migration.h"
  16. #include "migration.h"
  17. #include "qemu-file.h"
  18. #include "savevm.h"
  19. #include "migration/colo.h"
  20. #include "block.h"
  21. #include "io/channel-buffer.h"
  22. #include "trace.h"
  23. #include "qemu/error-report.h"
  24. #include "qemu/main-loop.h"
  25. #include "qemu/rcu.h"
  26. #include "migration/failover.h"
  27. #include "migration/ram.h"
  28. #ifdef CONFIG_REPLICATION
  29. #include "block/replication.h"
  30. #endif
  31. #include "net/colo-compare.h"
  32. #include "net/colo.h"
  33. #include "block/block.h"
  34. #include "qapi/qapi-events-migration.h"
  35. #include "sysemu/cpus.h"
  36. #include "sysemu/runstate.h"
  37. #include "net/filter.h"
  38. static bool vmstate_loading;
  39. static Notifier packets_compare_notifier;
  40. /* User need to know colo mode after COLO failover */
  41. static COLOMode last_colo_mode;
  42. #define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024)
  43. bool migration_in_colo_state(void)
  44. {
  45. MigrationState *s = migrate_get_current();
  46. return (s->state == MIGRATION_STATUS_COLO);
  47. }
  48. bool migration_incoming_in_colo_state(void)
  49. {
  50. MigrationIncomingState *mis = migration_incoming_get_current();
  51. return mis && (mis->state == MIGRATION_STATUS_COLO);
  52. }
  53. static bool colo_runstate_is_stopped(void)
  54. {
  55. return runstate_check(RUN_STATE_COLO) || !runstate_is_running();
  56. }
  57. static void secondary_vm_do_failover(void)
  58. {
  59. /* COLO needs enable block-replication */
  60. #ifdef CONFIG_REPLICATION
  61. int old_state;
  62. MigrationIncomingState *mis = migration_incoming_get_current();
  63. Error *local_err = NULL;
  64. /* Can not do failover during the process of VM's loading VMstate, Or
  65. * it will break the secondary VM.
  66. */
  67. if (vmstate_loading) {
  68. old_state = failover_set_state(FAILOVER_STATUS_ACTIVE,
  69. FAILOVER_STATUS_RELAUNCH);
  70. if (old_state != FAILOVER_STATUS_ACTIVE) {
  71. error_report("Unknown error while do failover for secondary VM,"
  72. "old_state: %s", FailoverStatus_str(old_state));
  73. }
  74. return;
  75. }
  76. migrate_set_state(&mis->state, MIGRATION_STATUS_COLO,
  77. MIGRATION_STATUS_COMPLETED);
  78. replication_stop_all(true, &local_err);
  79. if (local_err) {
  80. error_report_err(local_err);
  81. local_err = NULL;
  82. }
  83. /* Notify all filters of all NIC to do checkpoint */
  84. colo_notify_filters_event(COLO_EVENT_FAILOVER, &local_err);
  85. if (local_err) {
  86. error_report_err(local_err);
  87. }
  88. if (!autostart) {
  89. error_report("\"-S\" qemu option will be ignored in secondary side");
  90. /* recover runstate to normal migration finish state */
  91. autostart = true;
  92. }
  93. /*
  94. * Make sure COLO incoming thread not block in recv or send,
  95. * If mis->from_src_file and mis->to_src_file use the same fd,
  96. * The second shutdown() will return -1, we ignore this value,
  97. * It is harmless.
  98. */
  99. if (mis->from_src_file) {
  100. qemu_file_shutdown(mis->from_src_file);
  101. }
  102. if (mis->to_src_file) {
  103. qemu_file_shutdown(mis->to_src_file);
  104. }
  105. old_state = failover_set_state(FAILOVER_STATUS_ACTIVE,
  106. FAILOVER_STATUS_COMPLETED);
  107. if (old_state != FAILOVER_STATUS_ACTIVE) {
  108. error_report("Incorrect state (%s) while doing failover for "
  109. "secondary VM", FailoverStatus_str(old_state));
  110. return;
  111. }
  112. /* Notify COLO incoming thread that failover work is finished */
  113. qemu_sem_post(&mis->colo_incoming_sem);
  114. /* For Secondary VM, jump to incoming co */
  115. if (mis->migration_incoming_co) {
  116. qemu_coroutine_enter(mis->migration_incoming_co);
  117. }
  118. #else
  119. abort();
  120. #endif
  121. }
  122. static void primary_vm_do_failover(void)
  123. {
  124. #ifdef CONFIG_REPLICATION
  125. MigrationState *s = migrate_get_current();
  126. int old_state;
  127. Error *local_err = NULL;
  128. migrate_set_state(&s->state, MIGRATION_STATUS_COLO,
  129. MIGRATION_STATUS_COMPLETED);
  130. /*
  131. * kick COLO thread which might wait at
  132. * qemu_sem_wait(&s->colo_checkpoint_sem).
  133. */
  134. colo_checkpoint_notify(s);
  135. /*
  136. * Wake up COLO thread which may blocked in recv() or send(),
  137. * The s->rp_state.from_dst_file and s->to_dst_file may use the
  138. * same fd, but we still shutdown the fd for twice, it is harmless.
  139. */
  140. if (s->to_dst_file) {
  141. qemu_file_shutdown(s->to_dst_file);
  142. }
  143. if (s->rp_state.from_dst_file) {
  144. qemu_file_shutdown(s->rp_state.from_dst_file);
  145. }
  146. old_state = failover_set_state(FAILOVER_STATUS_ACTIVE,
  147. FAILOVER_STATUS_COMPLETED);
  148. if (old_state != FAILOVER_STATUS_ACTIVE) {
  149. error_report("Incorrect state (%s) while doing failover for Primary VM",
  150. FailoverStatus_str(old_state));
  151. return;
  152. }
  153. replication_stop_all(true, &local_err);
  154. if (local_err) {
  155. error_report_err(local_err);
  156. local_err = NULL;
  157. }
  158. /* Notify COLO thread that failover work is finished */
  159. qemu_sem_post(&s->colo_exit_sem);
  160. #else
  161. abort();
  162. #endif
  163. }
  164. COLOMode get_colo_mode(void)
  165. {
  166. if (migration_in_colo_state()) {
  167. return COLO_MODE_PRIMARY;
  168. } else if (migration_incoming_in_colo_state()) {
  169. return COLO_MODE_SECONDARY;
  170. } else {
  171. return COLO_MODE_NONE;
  172. }
  173. }
  174. void colo_do_failover(void)
  175. {
  176. /* Make sure VM stopped while failover happened. */
  177. if (!colo_runstate_is_stopped()) {
  178. vm_stop_force_state(RUN_STATE_COLO);
  179. }
  180. switch (last_colo_mode = get_colo_mode()) {
  181. case COLO_MODE_PRIMARY:
  182. primary_vm_do_failover();
  183. break;
  184. case COLO_MODE_SECONDARY:
  185. secondary_vm_do_failover();
  186. break;
  187. default:
  188. error_report("colo_do_failover failed because the colo mode"
  189. " could not be obtained");
  190. }
  191. }
  192. #ifdef CONFIG_REPLICATION
  193. void qmp_xen_set_replication(bool enable, bool primary,
  194. bool has_failover, bool failover,
  195. Error **errp)
  196. {
  197. ReplicationMode mode = primary ?
  198. REPLICATION_MODE_PRIMARY :
  199. REPLICATION_MODE_SECONDARY;
  200. if (has_failover && enable) {
  201. error_setg(errp, "Parameter 'failover' is only for"
  202. " stopping replication");
  203. return;
  204. }
  205. if (enable) {
  206. replication_start_all(mode, errp);
  207. } else {
  208. if (!has_failover) {
  209. failover = NULL;
  210. }
  211. replication_stop_all(failover, failover ? NULL : errp);
  212. }
  213. }
  214. ReplicationStatus *qmp_query_xen_replication_status(Error **errp)
  215. {
  216. Error *err = NULL;
  217. ReplicationStatus *s = g_new0(ReplicationStatus, 1);
  218. replication_get_error_all(&err);
  219. if (err) {
  220. s->error = true;
  221. s->desc = g_strdup(error_get_pretty(err));
  222. } else {
  223. s->error = false;
  224. }
  225. error_free(err);
  226. return s;
  227. }
  228. void qmp_xen_colo_do_checkpoint(Error **errp)
  229. {
  230. Error *err = NULL;
  231. replication_do_checkpoint_all(&err);
  232. if (err) {
  233. error_propagate(errp, err);
  234. return;
  235. }
  236. /* Notify all filters of all NIC to do checkpoint */
  237. colo_notify_filters_event(COLO_EVENT_CHECKPOINT, errp);
  238. }
  239. #endif
  240. COLOStatus *qmp_query_colo_status(Error **errp)
  241. {
  242. COLOStatus *s = g_new0(COLOStatus, 1);
  243. s->mode = get_colo_mode();
  244. s->last_mode = last_colo_mode;
  245. switch (failover_get_state()) {
  246. case FAILOVER_STATUS_NONE:
  247. s->reason = COLO_EXIT_REASON_NONE;
  248. break;
  249. case FAILOVER_STATUS_COMPLETED:
  250. s->reason = COLO_EXIT_REASON_REQUEST;
  251. break;
  252. default:
  253. if (migration_in_colo_state()) {
  254. s->reason = COLO_EXIT_REASON_PROCESSING;
  255. } else {
  256. s->reason = COLO_EXIT_REASON_ERROR;
  257. }
  258. }
  259. return s;
  260. }
  261. static void colo_send_message(QEMUFile *f, COLOMessage msg,
  262. Error **errp)
  263. {
  264. int ret;
  265. if (msg >= COLO_MESSAGE__MAX) {
  266. error_setg(errp, "%s: Invalid message", __func__);
  267. return;
  268. }
  269. qemu_put_be32(f, msg);
  270. qemu_fflush(f);
  271. ret = qemu_file_get_error(f);
  272. if (ret < 0) {
  273. error_setg_errno(errp, -ret, "Can't send COLO message");
  274. }
  275. trace_colo_send_message(COLOMessage_str(msg));
  276. }
  277. static void colo_send_message_value(QEMUFile *f, COLOMessage msg,
  278. uint64_t value, Error **errp)
  279. {
  280. Error *local_err = NULL;
  281. int ret;
  282. colo_send_message(f, msg, &local_err);
  283. if (local_err) {
  284. error_propagate(errp, local_err);
  285. return;
  286. }
  287. qemu_put_be64(f, value);
  288. qemu_fflush(f);
  289. ret = qemu_file_get_error(f);
  290. if (ret < 0) {
  291. error_setg_errno(errp, -ret, "Failed to send value for message:%s",
  292. COLOMessage_str(msg));
  293. }
  294. }
  295. static COLOMessage colo_receive_message(QEMUFile *f, Error **errp)
  296. {
  297. COLOMessage msg;
  298. int ret;
  299. msg = qemu_get_be32(f);
  300. ret = qemu_file_get_error(f);
  301. if (ret < 0) {
  302. error_setg_errno(errp, -ret, "Can't receive COLO message");
  303. return msg;
  304. }
  305. if (msg >= COLO_MESSAGE__MAX) {
  306. error_setg(errp, "%s: Invalid message", __func__);
  307. return msg;
  308. }
  309. trace_colo_receive_message(COLOMessage_str(msg));
  310. return msg;
  311. }
  312. static void colo_receive_check_message(QEMUFile *f, COLOMessage expect_msg,
  313. Error **errp)
  314. {
  315. COLOMessage msg;
  316. Error *local_err = NULL;
  317. msg = colo_receive_message(f, &local_err);
  318. if (local_err) {
  319. error_propagate(errp, local_err);
  320. return;
  321. }
  322. if (msg != expect_msg) {
  323. error_setg(errp, "Unexpected COLO message %d, expected %d",
  324. msg, expect_msg);
  325. }
  326. }
  327. static uint64_t colo_receive_message_value(QEMUFile *f, uint32_t expect_msg,
  328. Error **errp)
  329. {
  330. Error *local_err = NULL;
  331. uint64_t value;
  332. int ret;
  333. colo_receive_check_message(f, expect_msg, &local_err);
  334. if (local_err) {
  335. error_propagate(errp, local_err);
  336. return 0;
  337. }
  338. value = qemu_get_be64(f);
  339. ret = qemu_file_get_error(f);
  340. if (ret < 0) {
  341. error_setg_errno(errp, -ret, "Failed to get value for COLO message: %s",
  342. COLOMessage_str(expect_msg));
  343. }
  344. return value;
  345. }
  346. static int colo_do_checkpoint_transaction(MigrationState *s,
  347. QIOChannelBuffer *bioc,
  348. QEMUFile *fb)
  349. {
  350. Error *local_err = NULL;
  351. int ret = -1;
  352. colo_send_message(s->to_dst_file, COLO_MESSAGE_CHECKPOINT_REQUEST,
  353. &local_err);
  354. if (local_err) {
  355. goto out;
  356. }
  357. colo_receive_check_message(s->rp_state.from_dst_file,
  358. COLO_MESSAGE_CHECKPOINT_REPLY, &local_err);
  359. if (local_err) {
  360. goto out;
  361. }
  362. /* Reset channel-buffer directly */
  363. qio_channel_io_seek(QIO_CHANNEL(bioc), 0, 0, NULL);
  364. bioc->usage = 0;
  365. qemu_mutex_lock_iothread();
  366. if (failover_get_state() != FAILOVER_STATUS_NONE) {
  367. qemu_mutex_unlock_iothread();
  368. goto out;
  369. }
  370. vm_stop_force_state(RUN_STATE_COLO);
  371. qemu_mutex_unlock_iothread();
  372. trace_colo_vm_state_change("run", "stop");
  373. /*
  374. * Failover request bh could be called after vm_stop_force_state(),
  375. * So we need check failover_request_is_active() again.
  376. */
  377. if (failover_get_state() != FAILOVER_STATUS_NONE) {
  378. goto out;
  379. }
  380. qemu_mutex_lock_iothread();
  381. #ifdef CONFIG_REPLICATION
  382. replication_do_checkpoint_all(&local_err);
  383. if (local_err) {
  384. qemu_mutex_unlock_iothread();
  385. goto out;
  386. }
  387. #else
  388. abort();
  389. #endif
  390. colo_send_message(s->to_dst_file, COLO_MESSAGE_VMSTATE_SEND, &local_err);
  391. if (local_err) {
  392. qemu_mutex_unlock_iothread();
  393. goto out;
  394. }
  395. /* Note: device state is saved into buffer */
  396. ret = qemu_save_device_state(fb);
  397. qemu_mutex_unlock_iothread();
  398. if (ret < 0) {
  399. goto out;
  400. }
  401. if (migrate_auto_converge()) {
  402. mig_throttle_counter_reset();
  403. }
  404. /*
  405. * Only save VM's live state, which not including device state.
  406. * TODO: We may need a timeout mechanism to prevent COLO process
  407. * to be blocked here.
  408. */
  409. qemu_savevm_live_state(s->to_dst_file);
  410. qemu_fflush(fb);
  411. /*
  412. * We need the size of the VMstate data in Secondary side,
  413. * With which we can decide how much data should be read.
  414. */
  415. colo_send_message_value(s->to_dst_file, COLO_MESSAGE_VMSTATE_SIZE,
  416. bioc->usage, &local_err);
  417. if (local_err) {
  418. goto out;
  419. }
  420. qemu_put_buffer(s->to_dst_file, bioc->data, bioc->usage);
  421. qemu_fflush(s->to_dst_file);
  422. ret = qemu_file_get_error(s->to_dst_file);
  423. if (ret < 0) {
  424. goto out;
  425. }
  426. colo_receive_check_message(s->rp_state.from_dst_file,
  427. COLO_MESSAGE_VMSTATE_RECEIVED, &local_err);
  428. if (local_err) {
  429. goto out;
  430. }
  431. qemu_event_reset(&s->colo_checkpoint_event);
  432. colo_notify_compares_event(NULL, COLO_EVENT_CHECKPOINT, &local_err);
  433. if (local_err) {
  434. goto out;
  435. }
  436. colo_receive_check_message(s->rp_state.from_dst_file,
  437. COLO_MESSAGE_VMSTATE_LOADED, &local_err);
  438. if (local_err) {
  439. goto out;
  440. }
  441. ret = 0;
  442. qemu_mutex_lock_iothread();
  443. vm_start();
  444. qemu_mutex_unlock_iothread();
  445. trace_colo_vm_state_change("stop", "run");
  446. out:
  447. if (local_err) {
  448. error_report_err(local_err);
  449. }
  450. return ret;
  451. }
  452. static void colo_compare_notify_checkpoint(Notifier *notifier, void *data)
  453. {
  454. colo_checkpoint_notify(data);
  455. }
  456. static void colo_process_checkpoint(MigrationState *s)
  457. {
  458. QIOChannelBuffer *bioc;
  459. QEMUFile *fb = NULL;
  460. Error *local_err = NULL;
  461. int ret;
  462. if (get_colo_mode() != COLO_MODE_PRIMARY) {
  463. error_report("COLO mode must be COLO_MODE_PRIMARY");
  464. return;
  465. }
  466. failover_init_state();
  467. s->rp_state.from_dst_file = qemu_file_get_return_path(s->to_dst_file);
  468. if (!s->rp_state.from_dst_file) {
  469. error_report("Open QEMUFile from_dst_file failed");
  470. goto out;
  471. }
  472. packets_compare_notifier.notify = colo_compare_notify_checkpoint;
  473. colo_compare_register_notifier(&packets_compare_notifier);
  474. /*
  475. * Wait for Secondary finish loading VM states and enter COLO
  476. * restore.
  477. */
  478. colo_receive_check_message(s->rp_state.from_dst_file,
  479. COLO_MESSAGE_CHECKPOINT_READY, &local_err);
  480. if (local_err) {
  481. goto out;
  482. }
  483. bioc = qio_channel_buffer_new(COLO_BUFFER_BASE_SIZE);
  484. fb = qemu_file_new_output(QIO_CHANNEL(bioc));
  485. object_unref(OBJECT(bioc));
  486. qemu_mutex_lock_iothread();
  487. #ifdef CONFIG_REPLICATION
  488. replication_start_all(REPLICATION_MODE_PRIMARY, &local_err);
  489. if (local_err) {
  490. qemu_mutex_unlock_iothread();
  491. goto out;
  492. }
  493. #else
  494. abort();
  495. #endif
  496. vm_start();
  497. qemu_mutex_unlock_iothread();
  498. trace_colo_vm_state_change("stop", "run");
  499. timer_mod(s->colo_delay_timer, qemu_clock_get_ms(QEMU_CLOCK_HOST) +
  500. s->parameters.x_checkpoint_delay);
  501. while (s->state == MIGRATION_STATUS_COLO) {
  502. if (failover_get_state() != FAILOVER_STATUS_NONE) {
  503. error_report("failover request");
  504. goto out;
  505. }
  506. qemu_event_wait(&s->colo_checkpoint_event);
  507. if (s->state != MIGRATION_STATUS_COLO) {
  508. goto out;
  509. }
  510. ret = colo_do_checkpoint_transaction(s, bioc, fb);
  511. if (ret < 0) {
  512. goto out;
  513. }
  514. }
  515. out:
  516. /* Throw the unreported error message after exited from loop */
  517. if (local_err) {
  518. error_report_err(local_err);
  519. }
  520. if (fb) {
  521. qemu_fclose(fb);
  522. }
  523. /*
  524. * There are only two reasons we can get here, some error happened
  525. * or the user triggered failover.
  526. */
  527. switch (failover_get_state()) {
  528. case FAILOVER_STATUS_COMPLETED:
  529. qapi_event_send_colo_exit(COLO_MODE_PRIMARY,
  530. COLO_EXIT_REASON_REQUEST);
  531. break;
  532. default:
  533. qapi_event_send_colo_exit(COLO_MODE_PRIMARY,
  534. COLO_EXIT_REASON_ERROR);
  535. }
  536. /* Hope this not to be too long to wait here */
  537. qemu_sem_wait(&s->colo_exit_sem);
  538. qemu_sem_destroy(&s->colo_exit_sem);
  539. /*
  540. * It is safe to unregister notifier after failover finished.
  541. * Besides, colo_delay_timer and colo_checkpoint_sem can't be
  542. * released before unregister notifier, or there will be use-after-free
  543. * error.
  544. */
  545. colo_compare_unregister_notifier(&packets_compare_notifier);
  546. timer_free(s->colo_delay_timer);
  547. qemu_event_destroy(&s->colo_checkpoint_event);
  548. /*
  549. * Must be called after failover BH is completed,
  550. * Or the failover BH may shutdown the wrong fd that
  551. * re-used by other threads after we release here.
  552. */
  553. if (s->rp_state.from_dst_file) {
  554. qemu_fclose(s->rp_state.from_dst_file);
  555. s->rp_state.from_dst_file = NULL;
  556. }
  557. }
  558. void colo_checkpoint_notify(void *opaque)
  559. {
  560. MigrationState *s = opaque;
  561. int64_t next_notify_time;
  562. qemu_event_set(&s->colo_checkpoint_event);
  563. s->colo_checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
  564. next_notify_time = s->colo_checkpoint_time +
  565. s->parameters.x_checkpoint_delay;
  566. timer_mod(s->colo_delay_timer, next_notify_time);
  567. }
  568. void migrate_start_colo_process(MigrationState *s)
  569. {
  570. qemu_mutex_unlock_iothread();
  571. qemu_event_init(&s->colo_checkpoint_event, false);
  572. s->colo_delay_timer = timer_new_ms(QEMU_CLOCK_HOST,
  573. colo_checkpoint_notify, s);
  574. qemu_sem_init(&s->colo_exit_sem, 0);
  575. colo_process_checkpoint(s);
  576. qemu_mutex_lock_iothread();
  577. }
  578. static void colo_incoming_process_checkpoint(MigrationIncomingState *mis,
  579. QEMUFile *fb, QIOChannelBuffer *bioc, Error **errp)
  580. {
  581. uint64_t total_size;
  582. uint64_t value;
  583. Error *local_err = NULL;
  584. int ret;
  585. qemu_mutex_lock_iothread();
  586. vm_stop_force_state(RUN_STATE_COLO);
  587. qemu_mutex_unlock_iothread();
  588. trace_colo_vm_state_change("run", "stop");
  589. /* FIXME: This is unnecessary for periodic checkpoint mode */
  590. colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_REPLY,
  591. &local_err);
  592. if (local_err) {
  593. error_propagate(errp, local_err);
  594. return;
  595. }
  596. colo_receive_check_message(mis->from_src_file,
  597. COLO_MESSAGE_VMSTATE_SEND, &local_err);
  598. if (local_err) {
  599. error_propagate(errp, local_err);
  600. return;
  601. }
  602. qemu_mutex_lock_iothread();
  603. cpu_synchronize_all_states();
  604. ret = qemu_loadvm_state_main(mis->from_src_file, mis);
  605. qemu_mutex_unlock_iothread();
  606. if (ret < 0) {
  607. error_setg(errp, "Load VM's live state (ram) error");
  608. return;
  609. }
  610. value = colo_receive_message_value(mis->from_src_file,
  611. COLO_MESSAGE_VMSTATE_SIZE, &local_err);
  612. if (local_err) {
  613. error_propagate(errp, local_err);
  614. return;
  615. }
  616. /*
  617. * Read VM device state data into channel buffer,
  618. * It's better to re-use the memory allocated.
  619. * Here we need to handle the channel buffer directly.
  620. */
  621. if (value > bioc->capacity) {
  622. bioc->capacity = value;
  623. bioc->data = g_realloc(bioc->data, bioc->capacity);
  624. }
  625. total_size = qemu_get_buffer(mis->from_src_file, bioc->data, value);
  626. if (total_size != value) {
  627. error_setg(errp, "Got %" PRIu64 " VMState data, less than expected"
  628. " %" PRIu64, total_size, value);
  629. return;
  630. }
  631. bioc->usage = total_size;
  632. qio_channel_io_seek(QIO_CHANNEL(bioc), 0, 0, NULL);
  633. colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_RECEIVED,
  634. &local_err);
  635. if (local_err) {
  636. error_propagate(errp, local_err);
  637. return;
  638. }
  639. qemu_mutex_lock_iothread();
  640. vmstate_loading = true;
  641. colo_flush_ram_cache();
  642. ret = qemu_load_device_state(fb);
  643. if (ret < 0) {
  644. error_setg(errp, "COLO: load device state failed");
  645. vmstate_loading = false;
  646. qemu_mutex_unlock_iothread();
  647. return;
  648. }
  649. #ifdef CONFIG_REPLICATION
  650. replication_get_error_all(&local_err);
  651. if (local_err) {
  652. error_propagate(errp, local_err);
  653. vmstate_loading = false;
  654. qemu_mutex_unlock_iothread();
  655. return;
  656. }
  657. /* discard colo disk buffer */
  658. replication_do_checkpoint_all(&local_err);
  659. if (local_err) {
  660. error_propagate(errp, local_err);
  661. vmstate_loading = false;
  662. qemu_mutex_unlock_iothread();
  663. return;
  664. }
  665. #else
  666. abort();
  667. #endif
  668. /* Notify all filters of all NIC to do checkpoint */
  669. colo_notify_filters_event(COLO_EVENT_CHECKPOINT, &local_err);
  670. if (local_err) {
  671. error_propagate(errp, local_err);
  672. vmstate_loading = false;
  673. qemu_mutex_unlock_iothread();
  674. return;
  675. }
  676. vmstate_loading = false;
  677. vm_start();
  678. qemu_mutex_unlock_iothread();
  679. trace_colo_vm_state_change("stop", "run");
  680. if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) {
  681. return;
  682. }
  683. colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_LOADED,
  684. &local_err);
  685. error_propagate(errp, local_err);
  686. }
  687. static void colo_wait_handle_message(MigrationIncomingState *mis,
  688. QEMUFile *fb, QIOChannelBuffer *bioc, Error **errp)
  689. {
  690. COLOMessage msg;
  691. Error *local_err = NULL;
  692. msg = colo_receive_message(mis->from_src_file, &local_err);
  693. if (local_err) {
  694. error_propagate(errp, local_err);
  695. return;
  696. }
  697. switch (msg) {
  698. case COLO_MESSAGE_CHECKPOINT_REQUEST:
  699. colo_incoming_process_checkpoint(mis, fb, bioc, errp);
  700. break;
  701. default:
  702. error_setg(errp, "Got unknown COLO message: %d", msg);
  703. break;
  704. }
  705. }
  706. void colo_shutdown(void)
  707. {
  708. MigrationIncomingState *mis = NULL;
  709. MigrationState *s = NULL;
  710. switch (get_colo_mode()) {
  711. case COLO_MODE_PRIMARY:
  712. s = migrate_get_current();
  713. qemu_event_set(&s->colo_checkpoint_event);
  714. qemu_sem_post(&s->colo_exit_sem);
  715. break;
  716. case COLO_MODE_SECONDARY:
  717. mis = migration_incoming_get_current();
  718. qemu_sem_post(&mis->colo_incoming_sem);
  719. break;
  720. default:
  721. break;
  722. }
  723. }
  724. void *colo_process_incoming_thread(void *opaque)
  725. {
  726. MigrationIncomingState *mis = opaque;
  727. QEMUFile *fb = NULL;
  728. QIOChannelBuffer *bioc = NULL; /* Cache incoming device state */
  729. Error *local_err = NULL;
  730. rcu_register_thread();
  731. qemu_sem_init(&mis->colo_incoming_sem, 0);
  732. migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
  733. MIGRATION_STATUS_COLO);
  734. if (get_colo_mode() != COLO_MODE_SECONDARY) {
  735. error_report("COLO mode must be COLO_MODE_SECONDARY");
  736. return NULL;
  737. }
  738. failover_init_state();
  739. mis->to_src_file = qemu_file_get_return_path(mis->from_src_file);
  740. if (!mis->to_src_file) {
  741. error_report("COLO incoming thread: Open QEMUFile to_src_file failed");
  742. goto out;
  743. }
  744. /*
  745. * Note: the communication between Primary side and Secondary side
  746. * should be sequential, we set the fd to unblocked in migration incoming
  747. * coroutine, and here we are in the COLO incoming thread, so it is ok to
  748. * set the fd back to blocked.
  749. */
  750. qemu_file_set_blocking(mis->from_src_file, true);
  751. colo_incoming_start_dirty_log();
  752. bioc = qio_channel_buffer_new(COLO_BUFFER_BASE_SIZE);
  753. fb = qemu_file_new_input(QIO_CHANNEL(bioc));
  754. object_unref(OBJECT(bioc));
  755. qemu_mutex_lock_iothread();
  756. #ifdef CONFIG_REPLICATION
  757. replication_start_all(REPLICATION_MODE_SECONDARY, &local_err);
  758. if (local_err) {
  759. qemu_mutex_unlock_iothread();
  760. goto out;
  761. }
  762. #else
  763. abort();
  764. #endif
  765. vm_start();
  766. qemu_mutex_unlock_iothread();
  767. trace_colo_vm_state_change("stop", "run");
  768. colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_READY,
  769. &local_err);
  770. if (local_err) {
  771. goto out;
  772. }
  773. while (mis->state == MIGRATION_STATUS_COLO) {
  774. colo_wait_handle_message(mis, fb, bioc, &local_err);
  775. if (local_err) {
  776. error_report_err(local_err);
  777. break;
  778. }
  779. if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) {
  780. failover_set_state(FAILOVER_STATUS_RELAUNCH,
  781. FAILOVER_STATUS_NONE);
  782. failover_request_active(NULL);
  783. break;
  784. }
  785. if (failover_get_state() != FAILOVER_STATUS_NONE) {
  786. error_report("failover request");
  787. break;
  788. }
  789. }
  790. out:
  791. /*
  792. * There are only two reasons we can get here, some error happened
  793. * or the user triggered failover.
  794. */
  795. switch (failover_get_state()) {
  796. case FAILOVER_STATUS_COMPLETED:
  797. qapi_event_send_colo_exit(COLO_MODE_SECONDARY,
  798. COLO_EXIT_REASON_REQUEST);
  799. break;
  800. default:
  801. qapi_event_send_colo_exit(COLO_MODE_SECONDARY,
  802. COLO_EXIT_REASON_ERROR);
  803. }
  804. if (fb) {
  805. qemu_fclose(fb);
  806. }
  807. /* Hope this not to be too long to loop here */
  808. qemu_sem_wait(&mis->colo_incoming_sem);
  809. qemu_sem_destroy(&mis->colo_incoming_sem);
  810. rcu_unregister_thread();
  811. return NULL;
  812. }