2
0

block-migration.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751
  1. /*
  2. * QEMU live block migration
  3. *
  4. * Copyright IBM, Corp. 2009
  5. *
  6. * Authors:
  7. * Liran Schour <lirans@il.ibm.com>
  8. *
  9. * This work is licensed under the terms of the GNU GPL, version 2. See
  10. * the COPYING file in the top-level directory.
  11. *
  12. * Contributions after 2012-01-13 are licensed under the terms of the
  13. * GNU GPL, version 2 or (at your option) any later version.
  14. */
  15. #include "qemu-common.h"
  16. #include "block/block_int.h"
  17. #include "hw/hw.h"
  18. #include "qemu/queue.h"
  19. #include "qemu/timer.h"
  20. #include "migration/block.h"
  21. #include "migration/migration.h"
  22. #include "sysemu/blockdev.h"
  23. #include <assert.h>
  24. #define BLOCK_SIZE (1 << 20)
  25. #define BDRV_SECTORS_PER_DIRTY_CHUNK (BLOCK_SIZE >> BDRV_SECTOR_BITS)
  26. #define BLK_MIG_FLAG_DEVICE_BLOCK 0x01
  27. #define BLK_MIG_FLAG_EOS 0x02
  28. #define BLK_MIG_FLAG_PROGRESS 0x04
  29. #define MAX_IS_ALLOCATED_SEARCH 65536
  30. //#define DEBUG_BLK_MIGRATION
  31. #ifdef DEBUG_BLK_MIGRATION
  32. #define DPRINTF(fmt, ...) \
  33. do { printf("blk_migration: " fmt, ## __VA_ARGS__); } while (0)
  34. #else
  35. #define DPRINTF(fmt, ...) \
  36. do { } while (0)
  37. #endif
  38. typedef struct BlkMigDevState {
  39. BlockDriverState *bs;
  40. int bulk_completed;
  41. int shared_base;
  42. int64_t cur_sector;
  43. int64_t cur_dirty;
  44. int64_t completed_sectors;
  45. int64_t total_sectors;
  46. int64_t dirty;
  47. QSIMPLEQ_ENTRY(BlkMigDevState) entry;
  48. unsigned long *aio_bitmap;
  49. } BlkMigDevState;
  50. typedef struct BlkMigBlock {
  51. uint8_t *buf;
  52. BlkMigDevState *bmds;
  53. int64_t sector;
  54. int nr_sectors;
  55. struct iovec iov;
  56. QEMUIOVector qiov;
  57. BlockDriverAIOCB *aiocb;
  58. int ret;
  59. QSIMPLEQ_ENTRY(BlkMigBlock) entry;
  60. } BlkMigBlock;
  61. typedef struct BlkMigState {
  62. int blk_enable;
  63. int shared_base;
  64. QSIMPLEQ_HEAD(bmds_list, BlkMigDevState) bmds_list;
  65. QSIMPLEQ_HEAD(blk_list, BlkMigBlock) blk_list;
  66. int submitted;
  67. int read_done;
  68. int transferred;
  69. int64_t total_sector_sum;
  70. int prev_progress;
  71. int bulk_completed;
  72. long double prev_time_offset;
  73. } BlkMigState;
  74. static BlkMigState block_mig_state;
  75. static void blk_send(QEMUFile *f, BlkMigBlock * blk)
  76. {
  77. int len;
  78. /* sector number and flags */
  79. qemu_put_be64(f, (blk->sector << BDRV_SECTOR_BITS)
  80. | BLK_MIG_FLAG_DEVICE_BLOCK);
  81. /* device name */
  82. len = strlen(blk->bmds->bs->device_name);
  83. qemu_put_byte(f, len);
  84. qemu_put_buffer(f, (uint8_t *)blk->bmds->bs->device_name, len);
  85. qemu_put_buffer(f, blk->buf, BLOCK_SIZE);
  86. }
  87. int blk_mig_active(void)
  88. {
  89. return !QSIMPLEQ_EMPTY(&block_mig_state.bmds_list);
  90. }
  91. uint64_t blk_mig_bytes_transferred(void)
  92. {
  93. BlkMigDevState *bmds;
  94. uint64_t sum = 0;
  95. QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
  96. sum += bmds->completed_sectors;
  97. }
  98. return sum << BDRV_SECTOR_BITS;
  99. }
  100. uint64_t blk_mig_bytes_remaining(void)
  101. {
  102. return blk_mig_bytes_total() - blk_mig_bytes_transferred();
  103. }
  104. uint64_t blk_mig_bytes_total(void)
  105. {
  106. BlkMigDevState *bmds;
  107. uint64_t sum = 0;
  108. QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
  109. sum += bmds->total_sectors;
  110. }
  111. return sum << BDRV_SECTOR_BITS;
  112. }
  113. static int bmds_aio_inflight(BlkMigDevState *bmds, int64_t sector)
  114. {
  115. int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
  116. if ((sector << BDRV_SECTOR_BITS) < bdrv_getlength(bmds->bs)) {
  117. return !!(bmds->aio_bitmap[chunk / (sizeof(unsigned long) * 8)] &
  118. (1UL << (chunk % (sizeof(unsigned long) * 8))));
  119. } else {
  120. return 0;
  121. }
  122. }
  123. static void bmds_set_aio_inflight(BlkMigDevState *bmds, int64_t sector_num,
  124. int nb_sectors, int set)
  125. {
  126. int64_t start, end;
  127. unsigned long val, idx, bit;
  128. start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
  129. end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
  130. for (; start <= end; start++) {
  131. idx = start / (sizeof(unsigned long) * 8);
  132. bit = start % (sizeof(unsigned long) * 8);
  133. val = bmds->aio_bitmap[idx];
  134. if (set) {
  135. val |= 1UL << bit;
  136. } else {
  137. val &= ~(1UL << bit);
  138. }
  139. bmds->aio_bitmap[idx] = val;
  140. }
  141. }
  142. static void alloc_aio_bitmap(BlkMigDevState *bmds)
  143. {
  144. BlockDriverState *bs = bmds->bs;
  145. int64_t bitmap_size;
  146. bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
  147. BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
  148. bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
  149. bmds->aio_bitmap = g_malloc0(bitmap_size);
  150. }
  151. static void blk_mig_read_cb(void *opaque, int ret)
  152. {
  153. long double curr_time = qemu_get_clock_ns(rt_clock);
  154. BlkMigBlock *blk = opaque;
  155. blk->ret = ret;
  156. block_mig_state.prev_time_offset = curr_time;
  157. QSIMPLEQ_INSERT_TAIL(&block_mig_state.blk_list, blk, entry);
  158. bmds_set_aio_inflight(blk->bmds, blk->sector, blk->nr_sectors, 0);
  159. block_mig_state.submitted--;
  160. block_mig_state.read_done++;
  161. assert(block_mig_state.submitted >= 0);
  162. }
  163. static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds)
  164. {
  165. int64_t total_sectors = bmds->total_sectors;
  166. int64_t cur_sector = bmds->cur_sector;
  167. BlockDriverState *bs = bmds->bs;
  168. BlkMigBlock *blk;
  169. int nr_sectors;
  170. if (bmds->shared_base) {
  171. while (cur_sector < total_sectors &&
  172. !bdrv_is_allocated(bs, cur_sector, MAX_IS_ALLOCATED_SEARCH,
  173. &nr_sectors)) {
  174. cur_sector += nr_sectors;
  175. }
  176. }
  177. if (cur_sector >= total_sectors) {
  178. bmds->cur_sector = bmds->completed_sectors = total_sectors;
  179. return 1;
  180. }
  181. bmds->completed_sectors = cur_sector;
  182. cur_sector &= ~((int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK - 1);
  183. /* we are going to transfer a full block even if it is not allocated */
  184. nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
  185. if (total_sectors - cur_sector < BDRV_SECTORS_PER_DIRTY_CHUNK) {
  186. nr_sectors = total_sectors - cur_sector;
  187. }
  188. blk = g_malloc(sizeof(BlkMigBlock));
  189. blk->buf = g_malloc(BLOCK_SIZE);
  190. blk->bmds = bmds;
  191. blk->sector = cur_sector;
  192. blk->nr_sectors = nr_sectors;
  193. blk->iov.iov_base = blk->buf;
  194. blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
  195. qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);
  196. if (block_mig_state.submitted == 0) {
  197. block_mig_state.prev_time_offset = qemu_get_clock_ns(rt_clock);
  198. }
  199. blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov,
  200. nr_sectors, blk_mig_read_cb, blk);
  201. block_mig_state.submitted++;
  202. bdrv_reset_dirty(bs, cur_sector, nr_sectors);
  203. bmds->cur_sector = cur_sector + nr_sectors;
  204. return (bmds->cur_sector >= total_sectors);
  205. }
  206. static void set_dirty_tracking(int enable)
  207. {
  208. BlkMigDevState *bmds;
  209. QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
  210. bdrv_set_dirty_tracking(bmds->bs, enable ? BLOCK_SIZE : 0);
  211. }
  212. }
  213. static void init_blk_migration_it(void *opaque, BlockDriverState *bs)
  214. {
  215. BlkMigDevState *bmds;
  216. int64_t sectors;
  217. if (!bdrv_is_read_only(bs)) {
  218. sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
  219. if (sectors <= 0) {
  220. return;
  221. }
  222. bmds = g_malloc0(sizeof(BlkMigDevState));
  223. bmds->bs = bs;
  224. bmds->bulk_completed = 0;
  225. bmds->total_sectors = sectors;
  226. bmds->completed_sectors = 0;
  227. bmds->shared_base = block_mig_state.shared_base;
  228. alloc_aio_bitmap(bmds);
  229. drive_get_ref(drive_get_by_blockdev(bs));
  230. bdrv_set_in_use(bs, 1);
  231. block_mig_state.total_sector_sum += sectors;
  232. if (bmds->shared_base) {
  233. DPRINTF("Start migration for %s with shared base image\n",
  234. bs->device_name);
  235. } else {
  236. DPRINTF("Start full migration for %s\n", bs->device_name);
  237. }
  238. QSIMPLEQ_INSERT_TAIL(&block_mig_state.bmds_list, bmds, entry);
  239. }
  240. }
  241. static void init_blk_migration(QEMUFile *f)
  242. {
  243. block_mig_state.submitted = 0;
  244. block_mig_state.read_done = 0;
  245. block_mig_state.transferred = 0;
  246. block_mig_state.total_sector_sum = 0;
  247. block_mig_state.prev_progress = -1;
  248. block_mig_state.bulk_completed = 0;
  249. bdrv_iterate(init_blk_migration_it, NULL);
  250. }
  251. static int blk_mig_save_bulked_block(QEMUFile *f)
  252. {
  253. int64_t completed_sector_sum = 0;
  254. BlkMigDevState *bmds;
  255. int progress;
  256. int ret = 0;
  257. QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
  258. if (bmds->bulk_completed == 0) {
  259. if (mig_save_device_bulk(f, bmds) == 1) {
  260. /* completed bulk section for this device */
  261. bmds->bulk_completed = 1;
  262. }
  263. completed_sector_sum += bmds->completed_sectors;
  264. ret = 1;
  265. break;
  266. } else {
  267. completed_sector_sum += bmds->completed_sectors;
  268. }
  269. }
  270. if (block_mig_state.total_sector_sum != 0) {
  271. progress = completed_sector_sum * 100 /
  272. block_mig_state.total_sector_sum;
  273. } else {
  274. progress = 100;
  275. }
  276. if (progress != block_mig_state.prev_progress) {
  277. block_mig_state.prev_progress = progress;
  278. qemu_put_be64(f, (progress << BDRV_SECTOR_BITS)
  279. | BLK_MIG_FLAG_PROGRESS);
  280. DPRINTF("Completed %d %%\r", progress);
  281. }
  282. return ret;
  283. }
  284. static void blk_mig_reset_dirty_cursor(void)
  285. {
  286. BlkMigDevState *bmds;
  287. QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
  288. bmds->cur_dirty = 0;
  289. }
  290. }
  291. static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds,
  292. int is_async)
  293. {
  294. BlkMigBlock *blk;
  295. int64_t total_sectors = bmds->total_sectors;
  296. int64_t sector;
  297. int nr_sectors;
  298. int ret = -EIO;
  299. for (sector = bmds->cur_dirty; sector < bmds->total_sectors;) {
  300. if (bmds_aio_inflight(bmds, sector)) {
  301. bdrv_drain_all();
  302. }
  303. if (bdrv_get_dirty(bmds->bs, sector)) {
  304. if (total_sectors - sector < BDRV_SECTORS_PER_DIRTY_CHUNK) {
  305. nr_sectors = total_sectors - sector;
  306. } else {
  307. nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
  308. }
  309. blk = g_malloc(sizeof(BlkMigBlock));
  310. blk->buf = g_malloc(BLOCK_SIZE);
  311. blk->bmds = bmds;
  312. blk->sector = sector;
  313. blk->nr_sectors = nr_sectors;
  314. if (is_async) {
  315. blk->iov.iov_base = blk->buf;
  316. blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
  317. qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);
  318. if (block_mig_state.submitted == 0) {
  319. block_mig_state.prev_time_offset = qemu_get_clock_ns(rt_clock);
  320. }
  321. blk->aiocb = bdrv_aio_readv(bmds->bs, sector, &blk->qiov,
  322. nr_sectors, blk_mig_read_cb, blk);
  323. block_mig_state.submitted++;
  324. bmds_set_aio_inflight(bmds, sector, nr_sectors, 1);
  325. } else {
  326. ret = bdrv_read(bmds->bs, sector, blk->buf, nr_sectors);
  327. if (ret < 0) {
  328. goto error;
  329. }
  330. blk_send(f, blk);
  331. g_free(blk->buf);
  332. g_free(blk);
  333. }
  334. bdrv_reset_dirty(bmds->bs, sector, nr_sectors);
  335. break;
  336. }
  337. sector += BDRV_SECTORS_PER_DIRTY_CHUNK;
  338. bmds->cur_dirty = sector;
  339. }
  340. return (bmds->cur_dirty >= bmds->total_sectors);
  341. error:
  342. DPRINTF("Error reading sector %" PRId64 "\n", sector);
  343. g_free(blk->buf);
  344. g_free(blk);
  345. return ret;
  346. }
  347. /* return value:
  348. * 0: too much data for max_downtime
  349. * 1: few enough data for max_downtime
  350. */
  351. static int blk_mig_save_dirty_block(QEMUFile *f, int is_async)
  352. {
  353. BlkMigDevState *bmds;
  354. int ret = 1;
  355. QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
  356. ret = mig_save_device_dirty(f, bmds, is_async);
  357. if (ret <= 0) {
  358. break;
  359. }
  360. }
  361. return ret;
  362. }
  363. static int flush_blks(QEMUFile *f)
  364. {
  365. BlkMigBlock *blk;
  366. int ret = 0;
  367. DPRINTF("%s Enter submitted %d read_done %d transferred %d\n",
  368. __FUNCTION__, block_mig_state.submitted, block_mig_state.read_done,
  369. block_mig_state.transferred);
  370. while ((blk = QSIMPLEQ_FIRST(&block_mig_state.blk_list)) != NULL) {
  371. if (qemu_file_rate_limit(f)) {
  372. break;
  373. }
  374. if (blk->ret < 0) {
  375. ret = blk->ret;
  376. break;
  377. }
  378. blk_send(f, blk);
  379. QSIMPLEQ_REMOVE_HEAD(&block_mig_state.blk_list, entry);
  380. g_free(blk->buf);
  381. g_free(blk);
  382. block_mig_state.read_done--;
  383. block_mig_state.transferred++;
  384. assert(block_mig_state.read_done >= 0);
  385. }
  386. DPRINTF("%s Exit submitted %d read_done %d transferred %d\n", __FUNCTION__,
  387. block_mig_state.submitted, block_mig_state.read_done,
  388. block_mig_state.transferred);
  389. return ret;
  390. }
  391. static int64_t get_remaining_dirty(void)
  392. {
  393. BlkMigDevState *bmds;
  394. int64_t dirty = 0;
  395. QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
  396. dirty += bdrv_get_dirty_count(bmds->bs);
  397. }
  398. return dirty << BDRV_SECTOR_BITS;
  399. }
  400. static void blk_mig_cleanup(void)
  401. {
  402. BlkMigDevState *bmds;
  403. BlkMigBlock *blk;
  404. bdrv_drain_all();
  405. set_dirty_tracking(0);
  406. while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) {
  407. QSIMPLEQ_REMOVE_HEAD(&block_mig_state.bmds_list, entry);
  408. bdrv_set_in_use(bmds->bs, 0);
  409. drive_put_ref(drive_get_by_blockdev(bmds->bs));
  410. g_free(bmds->aio_bitmap);
  411. g_free(bmds);
  412. }
  413. while ((blk = QSIMPLEQ_FIRST(&block_mig_state.blk_list)) != NULL) {
  414. QSIMPLEQ_REMOVE_HEAD(&block_mig_state.blk_list, entry);
  415. g_free(blk->buf);
  416. g_free(blk);
  417. }
  418. }
  419. static void block_migration_cancel(void *opaque)
  420. {
  421. blk_mig_cleanup();
  422. }
  423. static int block_save_setup(QEMUFile *f, void *opaque)
  424. {
  425. int ret;
  426. DPRINTF("Enter save live setup submitted %d transferred %d\n",
  427. block_mig_state.submitted, block_mig_state.transferred);
  428. init_blk_migration(f);
  429. /* start track dirty blocks */
  430. set_dirty_tracking(1);
  431. ret = flush_blks(f);
  432. if (ret) {
  433. blk_mig_cleanup();
  434. return ret;
  435. }
  436. blk_mig_reset_dirty_cursor();
  437. qemu_put_be64(f, BLK_MIG_FLAG_EOS);
  438. return 0;
  439. }
  440. static int block_save_iterate(QEMUFile *f, void *opaque)
  441. {
  442. int ret;
  443. int64_t last_ftell = qemu_ftell(f);
  444. DPRINTF("Enter save live iterate submitted %d transferred %d\n",
  445. block_mig_state.submitted, block_mig_state.transferred);
  446. ret = flush_blks(f);
  447. if (ret) {
  448. blk_mig_cleanup();
  449. return ret;
  450. }
  451. blk_mig_reset_dirty_cursor();
  452. /* control the rate of transfer */
  453. while ((block_mig_state.submitted +
  454. block_mig_state.read_done) * BLOCK_SIZE <
  455. qemu_file_get_rate_limit(f)) {
  456. if (block_mig_state.bulk_completed == 0) {
  457. /* first finish the bulk phase */
  458. if (blk_mig_save_bulked_block(f) == 0) {
  459. /* finished saving bulk on all devices */
  460. block_mig_state.bulk_completed = 1;
  461. }
  462. } else {
  463. ret = blk_mig_save_dirty_block(f, 1);
  464. if (ret != 0) {
  465. /* no more dirty blocks */
  466. break;
  467. }
  468. }
  469. }
  470. if (ret < 0) {
  471. blk_mig_cleanup();
  472. return ret;
  473. }
  474. ret = flush_blks(f);
  475. if (ret) {
  476. blk_mig_cleanup();
  477. return ret;
  478. }
  479. qemu_put_be64(f, BLK_MIG_FLAG_EOS);
  480. return qemu_ftell(f) - last_ftell;
  481. }
  482. static int block_save_complete(QEMUFile *f, void *opaque)
  483. {
  484. int ret;
  485. DPRINTF("Enter save live complete submitted %d transferred %d\n",
  486. block_mig_state.submitted, block_mig_state.transferred);
  487. ret = flush_blks(f);
  488. if (ret) {
  489. blk_mig_cleanup();
  490. return ret;
  491. }
  492. blk_mig_reset_dirty_cursor();
  493. /* we know for sure that save bulk is completed and
  494. all async read completed */
  495. assert(block_mig_state.submitted == 0);
  496. do {
  497. ret = blk_mig_save_dirty_block(f, 0);
  498. } while (ret == 0);
  499. blk_mig_cleanup();
  500. if (ret < 0) {
  501. return ret;
  502. }
  503. /* report completion */
  504. qemu_put_be64(f, (100 << BDRV_SECTOR_BITS) | BLK_MIG_FLAG_PROGRESS);
  505. DPRINTF("Block migration completed\n");
  506. qemu_put_be64(f, BLK_MIG_FLAG_EOS);
  507. return 0;
  508. }
  509. static uint64_t block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size)
  510. {
  511. /* Estimate pending number of bytes to send */
  512. uint64_t pending = get_remaining_dirty() +
  513. block_mig_state.submitted * BLOCK_SIZE +
  514. block_mig_state.read_done * BLOCK_SIZE;
  515. /* Report at least one block pending during bulk phase */
  516. if (pending == 0 && !block_mig_state.bulk_completed) {
  517. pending = BLOCK_SIZE;
  518. }
  519. DPRINTF("Enter save live pending %" PRIu64 "\n", pending);
  520. return pending;
  521. }
  522. static int block_load(QEMUFile *f, void *opaque, int version_id)
  523. {
  524. static int banner_printed;
  525. int len, flags;
  526. char device_name[256];
  527. int64_t addr;
  528. BlockDriverState *bs, *bs_prev = NULL;
  529. uint8_t *buf;
  530. int64_t total_sectors = 0;
  531. int nr_sectors;
  532. int ret;
  533. do {
  534. addr = qemu_get_be64(f);
  535. flags = addr & ~BDRV_SECTOR_MASK;
  536. addr >>= BDRV_SECTOR_BITS;
  537. if (flags & BLK_MIG_FLAG_DEVICE_BLOCK) {
  538. /* get device name */
  539. len = qemu_get_byte(f);
  540. qemu_get_buffer(f, (uint8_t *)device_name, len);
  541. device_name[len] = '\0';
  542. bs = bdrv_find(device_name);
  543. if (!bs) {
  544. fprintf(stderr, "Error unknown block device %s\n",
  545. device_name);
  546. return -EINVAL;
  547. }
  548. if (bs != bs_prev) {
  549. bs_prev = bs;
  550. total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
  551. if (total_sectors <= 0) {
  552. error_report("Error getting length of block device %s",
  553. device_name);
  554. return -EINVAL;
  555. }
  556. }
  557. if (total_sectors - addr < BDRV_SECTORS_PER_DIRTY_CHUNK) {
  558. nr_sectors = total_sectors - addr;
  559. } else {
  560. nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
  561. }
  562. buf = g_malloc(BLOCK_SIZE);
  563. qemu_get_buffer(f, buf, BLOCK_SIZE);
  564. ret = bdrv_write(bs, addr, buf, nr_sectors);
  565. g_free(buf);
  566. if (ret < 0) {
  567. return ret;
  568. }
  569. } else if (flags & BLK_MIG_FLAG_PROGRESS) {
  570. if (!banner_printed) {
  571. printf("Receiving block device images\n");
  572. banner_printed = 1;
  573. }
  574. printf("Completed %d %%%c", (int)addr,
  575. (addr == 100) ? '\n' : '\r');
  576. fflush(stdout);
  577. } else if (!(flags & BLK_MIG_FLAG_EOS)) {
  578. fprintf(stderr, "Unknown block migration flags: %#x\n", flags);
  579. return -EINVAL;
  580. }
  581. ret = qemu_file_get_error(f);
  582. if (ret != 0) {
  583. return ret;
  584. }
  585. } while (!(flags & BLK_MIG_FLAG_EOS));
  586. return 0;
  587. }
  588. static void block_set_params(const MigrationParams *params, void *opaque)
  589. {
  590. block_mig_state.blk_enable = params->blk;
  591. block_mig_state.shared_base = params->shared;
  592. /* shared base means that blk_enable = 1 */
  593. block_mig_state.blk_enable |= params->shared;
  594. }
  595. static bool block_is_active(void *opaque)
  596. {
  597. return block_mig_state.blk_enable == 1;
  598. }
  599. SaveVMHandlers savevm_block_handlers = {
  600. .set_params = block_set_params,
  601. .save_live_setup = block_save_setup,
  602. .save_live_iterate = block_save_iterate,
  603. .save_live_complete = block_save_complete,
  604. .save_live_pending = block_save_pending,
  605. .load_state = block_load,
  606. .cancel = block_migration_cancel,
  607. .is_active = block_is_active,
  608. };
  609. void blk_mig_init(void)
  610. {
  611. QSIMPLEQ_INIT(&block_mig_state.bmds_list);
  612. QSIMPLEQ_INIT(&block_mig_state.blk_list);
  613. register_savevm_live(NULL, "block", 0, 1, &savevm_block_handlers,
  614. &block_mig_state);
  615. }