migration.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595
  1. /*
  2. * QEMU live migration
  3. *
  4. * Copyright IBM, Corp. 2008
  5. *
  6. * Authors:
  7. * Anthony Liguori <aliguori@us.ibm.com>
  8. *
  9. * This work is licensed under the terms of the GNU GPL, version 2. See
  10. * the COPYING file in the top-level directory.
  11. *
  12. * Contributions after 2012-01-13 are licensed under the terms of the
  13. * GNU GPL, version 2 or (at your option) any later version.
  14. */
  15. #include "qemu-common.h"
  16. #include "migration.h"
  17. #include "monitor.h"
  18. #include "buffered_file.h"
  19. #include "sysemu.h"
  20. #include "block.h"
  21. #include "qemu_socket.h"
  22. #include "block-migration.h"
  23. #include "qmp-commands.h"
  24. //#define DEBUG_MIGRATION
  25. #ifdef DEBUG_MIGRATION
  26. #define DPRINTF(fmt, ...) \
  27. do { printf("migration: " fmt, ## __VA_ARGS__); } while (0)
  28. #else
  29. #define DPRINTF(fmt, ...) \
  30. do { } while (0)
  31. #endif
  32. enum {
  33. MIG_STATE_ERROR,
  34. MIG_STATE_SETUP,
  35. MIG_STATE_CANCELLED,
  36. MIG_STATE_ACTIVE,
  37. MIG_STATE_COMPLETED,
  38. };
  39. #define MAX_THROTTLE (32 << 20) /* Migration speed throttling */
  40. /* Migration XBZRLE default cache size */
  41. #define DEFAULT_MIGRATE_CACHE_SIZE (64 * 1024 * 1024)
  42. static NotifierList migration_state_notifiers =
  43. NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);
  44. /* When we add fault tolerance, we could have several
  45. migrations at once. For now we don't need to add
  46. dynamic creation of migration */
  47. static MigrationState *migrate_get_current(void)
  48. {
  49. static MigrationState current_migration = {
  50. .state = MIG_STATE_SETUP,
  51. .bandwidth_limit = MAX_THROTTLE,
  52. .xbzrle_cache_size = DEFAULT_MIGRATE_CACHE_SIZE,
  53. };
  54. return &current_migration;
  55. }
  56. int qemu_start_incoming_migration(const char *uri, Error **errp)
  57. {
  58. const char *p;
  59. int ret;
  60. if (strstart(uri, "tcp:", &p))
  61. ret = tcp_start_incoming_migration(p, errp);
  62. #if !defined(WIN32)
  63. else if (strstart(uri, "exec:", &p))
  64. ret = exec_start_incoming_migration(p);
  65. else if (strstart(uri, "unix:", &p))
  66. ret = unix_start_incoming_migration(p);
  67. else if (strstart(uri, "fd:", &p))
  68. ret = fd_start_incoming_migration(p);
  69. #endif
  70. else {
  71. fprintf(stderr, "unknown migration protocol: %s\n", uri);
  72. ret = -EPROTONOSUPPORT;
  73. }
  74. return ret;
  75. }
  76. void process_incoming_migration(QEMUFile *f)
  77. {
  78. if (qemu_loadvm_state(f) < 0) {
  79. fprintf(stderr, "load of migration failed\n");
  80. exit(0);
  81. }
  82. qemu_announce_self();
  83. DPRINTF("successfully loaded vm state\n");
  84. bdrv_clear_incoming_migration_all();
  85. /* Make sure all file formats flush their mutable metadata */
  86. bdrv_invalidate_cache_all();
  87. if (autostart) {
  88. vm_start();
  89. } else {
  90. runstate_set(RUN_STATE_PRELAUNCH);
  91. }
  92. }
  93. /* amount of nanoseconds we are willing to wait for migration to be down.
  94. * the choice of nanoseconds is because it is the maximum resolution that
  95. * get_clock() can achieve. It is an internal measure. All user-visible
  96. * units must be in seconds */
  97. static uint64_t max_downtime = 30000000;
  98. uint64_t migrate_max_downtime(void)
  99. {
  100. return max_downtime;
  101. }
  102. MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp)
  103. {
  104. MigrationCapabilityStatusList *head = NULL;
  105. MigrationCapabilityStatusList *caps;
  106. MigrationState *s = migrate_get_current();
  107. int i;
  108. for (i = 0; i < MIGRATION_CAPABILITY_MAX; i++) {
  109. if (head == NULL) {
  110. head = g_malloc0(sizeof(*caps));
  111. caps = head;
  112. } else {
  113. caps->next = g_malloc0(sizeof(*caps));
  114. caps = caps->next;
  115. }
  116. caps->value =
  117. g_malloc(sizeof(*caps->value));
  118. caps->value->capability = i;
  119. caps->value->state = s->enabled_capabilities[i];
  120. }
  121. return head;
  122. }
  123. static void get_xbzrle_cache_stats(MigrationInfo *info)
  124. {
  125. if (migrate_use_xbzrle()) {
  126. info->has_xbzrle_cache = true;
  127. info->xbzrle_cache = g_malloc0(sizeof(*info->xbzrle_cache));
  128. info->xbzrle_cache->cache_size = migrate_xbzrle_cache_size();
  129. info->xbzrle_cache->bytes = xbzrle_mig_bytes_transferred();
  130. info->xbzrle_cache->pages = xbzrle_mig_pages_transferred();
  131. info->xbzrle_cache->cache_miss = xbzrle_mig_pages_cache_miss();
  132. info->xbzrle_cache->overflow = xbzrle_mig_pages_overflow();
  133. }
  134. }
  135. MigrationInfo *qmp_query_migrate(Error **errp)
  136. {
  137. MigrationInfo *info = g_malloc0(sizeof(*info));
  138. MigrationState *s = migrate_get_current();
  139. switch (s->state) {
  140. case MIG_STATE_SETUP:
  141. /* no migration has happened ever */
  142. break;
  143. case MIG_STATE_ACTIVE:
  144. info->has_status = true;
  145. info->status = g_strdup("active");
  146. info->has_total_time = true;
  147. info->total_time = qemu_get_clock_ms(rt_clock)
  148. - s->total_time;
  149. info->has_ram = true;
  150. info->ram = g_malloc0(sizeof(*info->ram));
  151. info->ram->transferred = ram_bytes_transferred();
  152. info->ram->remaining = ram_bytes_remaining();
  153. info->ram->total = ram_bytes_total();
  154. info->ram->duplicate = dup_mig_pages_transferred();
  155. info->ram->normal = norm_mig_pages_transferred();
  156. info->ram->normal_bytes = norm_mig_bytes_transferred();
  157. if (blk_mig_active()) {
  158. info->has_disk = true;
  159. info->disk = g_malloc0(sizeof(*info->disk));
  160. info->disk->transferred = blk_mig_bytes_transferred();
  161. info->disk->remaining = blk_mig_bytes_remaining();
  162. info->disk->total = blk_mig_bytes_total();
  163. }
  164. get_xbzrle_cache_stats(info);
  165. break;
  166. case MIG_STATE_COMPLETED:
  167. get_xbzrle_cache_stats(info);
  168. info->has_status = true;
  169. info->status = g_strdup("completed");
  170. info->total_time = s->total_time;
  171. info->has_ram = true;
  172. info->ram = g_malloc0(sizeof(*info->ram));
  173. info->ram->transferred = ram_bytes_transferred();
  174. info->ram->remaining = 0;
  175. info->ram->total = ram_bytes_total();
  176. info->ram->duplicate = dup_mig_pages_transferred();
  177. info->ram->normal = norm_mig_pages_transferred();
  178. info->ram->normal_bytes = norm_mig_bytes_transferred();
  179. break;
  180. case MIG_STATE_ERROR:
  181. info->has_status = true;
  182. info->status = g_strdup("failed");
  183. break;
  184. case MIG_STATE_CANCELLED:
  185. info->has_status = true;
  186. info->status = g_strdup("cancelled");
  187. break;
  188. }
  189. return info;
  190. }
  191. void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
  192. Error **errp)
  193. {
  194. MigrationState *s = migrate_get_current();
  195. MigrationCapabilityStatusList *cap;
  196. if (s->state == MIG_STATE_ACTIVE) {
  197. error_set(errp, QERR_MIGRATION_ACTIVE);
  198. return;
  199. }
  200. for (cap = params; cap; cap = cap->next) {
  201. s->enabled_capabilities[cap->value->capability] = cap->value->state;
  202. }
  203. }
  204. /* shared migration helpers */
  205. static int migrate_fd_cleanup(MigrationState *s)
  206. {
  207. int ret = 0;
  208. if (s->fd != -1) {
  209. qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL);
  210. }
  211. if (s->file) {
  212. DPRINTF("closing file\n");
  213. ret = qemu_fclose(s->file);
  214. s->file = NULL;
  215. }
  216. if (s->fd != -1) {
  217. close(s->fd);
  218. s->fd = -1;
  219. }
  220. return ret;
  221. }
  222. void migrate_fd_error(MigrationState *s)
  223. {
  224. DPRINTF("setting error state\n");
  225. s->state = MIG_STATE_ERROR;
  226. notifier_list_notify(&migration_state_notifiers, s);
  227. migrate_fd_cleanup(s);
  228. }
  229. static void migrate_fd_completed(MigrationState *s)
  230. {
  231. DPRINTF("setting completed state\n");
  232. if (migrate_fd_cleanup(s) < 0) {
  233. s->state = MIG_STATE_ERROR;
  234. } else {
  235. s->state = MIG_STATE_COMPLETED;
  236. runstate_set(RUN_STATE_POSTMIGRATE);
  237. }
  238. notifier_list_notify(&migration_state_notifiers, s);
  239. }
  240. static void migrate_fd_put_notify(void *opaque)
  241. {
  242. MigrationState *s = opaque;
  243. qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL);
  244. qemu_file_put_notify(s->file);
  245. if (s->file && qemu_file_get_error(s->file)) {
  246. migrate_fd_error(s);
  247. }
  248. }
  249. static ssize_t migrate_fd_put_buffer(void *opaque, const void *data,
  250. size_t size)
  251. {
  252. MigrationState *s = opaque;
  253. ssize_t ret;
  254. if (s->state != MIG_STATE_ACTIVE) {
  255. return -EIO;
  256. }
  257. do {
  258. ret = s->write(s, data, size);
  259. } while (ret == -1 && ((s->get_error(s)) == EINTR));
  260. if (ret == -1)
  261. ret = -(s->get_error(s));
  262. if (ret == -EAGAIN) {
  263. qemu_set_fd_handler2(s->fd, NULL, NULL, migrate_fd_put_notify, s);
  264. }
  265. return ret;
  266. }
  267. static void migrate_fd_put_ready(void *opaque)
  268. {
  269. MigrationState *s = opaque;
  270. int ret;
  271. if (s->state != MIG_STATE_ACTIVE) {
  272. DPRINTF("put_ready returning because of non-active state\n");
  273. return;
  274. }
  275. DPRINTF("iterate\n");
  276. ret = qemu_savevm_state_iterate(s->file);
  277. if (ret < 0) {
  278. migrate_fd_error(s);
  279. } else if (ret == 1) {
  280. int old_vm_running = runstate_is_running();
  281. DPRINTF("done iterating\n");
  282. qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
  283. vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
  284. if (qemu_savevm_state_complete(s->file) < 0) {
  285. migrate_fd_error(s);
  286. } else {
  287. migrate_fd_completed(s);
  288. }
  289. s->total_time = qemu_get_clock_ms(rt_clock) - s->total_time;
  290. if (s->state != MIG_STATE_COMPLETED) {
  291. if (old_vm_running) {
  292. vm_start();
  293. }
  294. }
  295. }
  296. }
  297. static void migrate_fd_cancel(MigrationState *s)
  298. {
  299. if (s->state != MIG_STATE_ACTIVE)
  300. return;
  301. DPRINTF("cancelling migration\n");
  302. s->state = MIG_STATE_CANCELLED;
  303. notifier_list_notify(&migration_state_notifiers, s);
  304. qemu_savevm_state_cancel(s->file);
  305. migrate_fd_cleanup(s);
  306. }
  307. static void migrate_fd_wait_for_unfreeze(void *opaque)
  308. {
  309. MigrationState *s = opaque;
  310. int ret;
  311. DPRINTF("wait for unfreeze\n");
  312. if (s->state != MIG_STATE_ACTIVE)
  313. return;
  314. do {
  315. fd_set wfds;
  316. FD_ZERO(&wfds);
  317. FD_SET(s->fd, &wfds);
  318. ret = select(s->fd + 1, NULL, &wfds, NULL, NULL);
  319. } while (ret == -1 && (s->get_error(s)) == EINTR);
  320. if (ret == -1) {
  321. qemu_file_set_error(s->file, -s->get_error(s));
  322. }
  323. }
  324. static int migrate_fd_close(void *opaque)
  325. {
  326. MigrationState *s = opaque;
  327. qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL);
  328. return s->close(s);
  329. }
  330. void add_migration_state_change_notifier(Notifier *notify)
  331. {
  332. notifier_list_add(&migration_state_notifiers, notify);
  333. }
  334. void remove_migration_state_change_notifier(Notifier *notify)
  335. {
  336. notifier_remove(notify);
  337. }
  338. bool migration_is_active(MigrationState *s)
  339. {
  340. return s->state == MIG_STATE_ACTIVE;
  341. }
  342. bool migration_has_finished(MigrationState *s)
  343. {
  344. return s->state == MIG_STATE_COMPLETED;
  345. }
  346. bool migration_has_failed(MigrationState *s)
  347. {
  348. return (s->state == MIG_STATE_CANCELLED ||
  349. s->state == MIG_STATE_ERROR);
  350. }
  351. void migrate_fd_connect(MigrationState *s)
  352. {
  353. int ret;
  354. s->state = MIG_STATE_ACTIVE;
  355. s->file = qemu_fopen_ops_buffered(s,
  356. s->bandwidth_limit,
  357. migrate_fd_put_buffer,
  358. migrate_fd_put_ready,
  359. migrate_fd_wait_for_unfreeze,
  360. migrate_fd_close);
  361. DPRINTF("beginning savevm\n");
  362. ret = qemu_savevm_state_begin(s->file, &s->params);
  363. if (ret < 0) {
  364. DPRINTF("failed, %d\n", ret);
  365. migrate_fd_error(s);
  366. return;
  367. }
  368. migrate_fd_put_ready(s);
  369. }
  370. static MigrationState *migrate_init(const MigrationParams *params)
  371. {
  372. MigrationState *s = migrate_get_current();
  373. int64_t bandwidth_limit = s->bandwidth_limit;
  374. bool enabled_capabilities[MIGRATION_CAPABILITY_MAX];
  375. int64_t xbzrle_cache_size = s->xbzrle_cache_size;
  376. memcpy(enabled_capabilities, s->enabled_capabilities,
  377. sizeof(enabled_capabilities));
  378. memset(s, 0, sizeof(*s));
  379. s->bandwidth_limit = bandwidth_limit;
  380. s->params = *params;
  381. memcpy(s->enabled_capabilities, enabled_capabilities,
  382. sizeof(enabled_capabilities));
  383. s->xbzrle_cache_size = xbzrle_cache_size;
  384. s->bandwidth_limit = bandwidth_limit;
  385. s->state = MIG_STATE_SETUP;
  386. s->total_time = qemu_get_clock_ms(rt_clock);
  387. return s;
  388. }
  389. static GSList *migration_blockers;
  390. void migrate_add_blocker(Error *reason)
  391. {
  392. migration_blockers = g_slist_prepend(migration_blockers, reason);
  393. }
  394. void migrate_del_blocker(Error *reason)
  395. {
  396. migration_blockers = g_slist_remove(migration_blockers, reason);
  397. }
  398. void qmp_migrate(const char *uri, bool has_blk, bool blk,
  399. bool has_inc, bool inc, bool has_detach, bool detach,
  400. Error **errp)
  401. {
  402. MigrationState *s = migrate_get_current();
  403. MigrationParams params;
  404. const char *p;
  405. int ret;
  406. params.blk = blk;
  407. params.shared = inc;
  408. if (s->state == MIG_STATE_ACTIVE) {
  409. error_set(errp, QERR_MIGRATION_ACTIVE);
  410. return;
  411. }
  412. if (qemu_savevm_state_blocked(errp)) {
  413. return;
  414. }
  415. if (migration_blockers) {
  416. *errp = error_copy(migration_blockers->data);
  417. return;
  418. }
  419. s = migrate_init(&params);
  420. if (strstart(uri, "tcp:", &p)) {
  421. ret = tcp_start_outgoing_migration(s, p, errp);
  422. #if !defined(WIN32)
  423. } else if (strstart(uri, "exec:", &p)) {
  424. ret = exec_start_outgoing_migration(s, p);
  425. } else if (strstart(uri, "unix:", &p)) {
  426. ret = unix_start_outgoing_migration(s, p);
  427. } else if (strstart(uri, "fd:", &p)) {
  428. ret = fd_start_outgoing_migration(s, p);
  429. #endif
  430. } else {
  431. error_set(errp, QERR_INVALID_PARAMETER_VALUE, "uri", "a valid migration protocol");
  432. return;
  433. }
  434. if (ret < 0) {
  435. if (!error_is_set(errp)) {
  436. DPRINTF("migration failed: %s\n", strerror(-ret));
  437. /* FIXME: we should return meaningful errors */
  438. error_set(errp, QERR_UNDEFINED_ERROR);
  439. }
  440. return;
  441. }
  442. notifier_list_notify(&migration_state_notifiers, s);
  443. }
  444. void qmp_migrate_cancel(Error **errp)
  445. {
  446. migrate_fd_cancel(migrate_get_current());
  447. }
  448. void qmp_migrate_set_cache_size(int64_t value, Error **errp)
  449. {
  450. MigrationState *s = migrate_get_current();
  451. /* Check for truncation */
  452. if (value != (size_t)value) {
  453. error_set(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
  454. "exceeding address space");
  455. return;
  456. }
  457. s->xbzrle_cache_size = xbzrle_cache_resize(value);
  458. }
  459. int64_t qmp_query_migrate_cache_size(Error **errp)
  460. {
  461. return migrate_xbzrle_cache_size();
  462. }
  463. void qmp_migrate_set_speed(int64_t value, Error **errp)
  464. {
  465. MigrationState *s;
  466. if (value < 0) {
  467. value = 0;
  468. }
  469. s = migrate_get_current();
  470. s->bandwidth_limit = value;
  471. qemu_file_set_rate_limit(s->file, s->bandwidth_limit);
  472. }
  473. void qmp_migrate_set_downtime(double value, Error **errp)
  474. {
  475. value *= 1e9;
  476. value = MAX(0, MIN(UINT64_MAX, value));
  477. max_downtime = (uint64_t)value;
  478. }
  479. int migrate_use_xbzrle(void)
  480. {
  481. MigrationState *s;
  482. s = migrate_get_current();
  483. return s->enabled_capabilities[MIGRATION_CAPABILITY_XBZRLE];
  484. }
  485. int64_t migrate_xbzrle_cache_size(void)
  486. {
  487. MigrationState *s;
  488. s = migrate_get_current();
  489. return s->xbzrle_cache_size;
  490. }