block-dirty-bitmap.c 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934
  1. /*
  2. * Block dirty bitmap postcopy migration
  3. *
  4. * Copyright IBM, Corp. 2009
  5. * Copyright (c) 2016-2017 Virtuozzo International GmbH. All rights reserved.
  6. *
  7. * Authors:
  8. * Liran Schour <lirans@il.ibm.com>
  9. * Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
  10. *
  11. * This work is licensed under the terms of the GNU GPL, version 2. See
  12. * the COPYING file in the top-level directory.
  13. * This file is derived from migration/block.c, so it's author and IBM copyright
  14. * are here, although content is quite different.
  15. *
  16. * Contributions after 2012-01-13 are licensed under the terms of the
  17. * GNU GPL, version 2 or (at your option) any later version.
  18. *
  19. * ***
  20. *
  21. * Here postcopy migration of dirty bitmaps is realized. Only QMP-addressable
  22. * bitmaps are migrated.
  23. *
  24. * Bitmap migration implies creating bitmap with the same name and granularity
  25. * in destination QEMU. If the bitmap with the same name (for the same node)
  26. * already exists on destination an error will be generated.
  27. *
  28. * format of migration:
  29. *
  30. * # Header (shared for different chunk types)
  31. * 1, 2 or 4 bytes: flags (see qemu_{put,put}_flags)
  32. * [ 1 byte: node name size ] \ flags & DEVICE_NAME
  33. * [ n bytes: node name ] /
  34. * [ 1 byte: bitmap name size ] \ flags & BITMAP_NAME
  35. * [ n bytes: bitmap name ] /
  36. *
  37. * # Start of bitmap migration (flags & START)
  38. * header
  39. * be64: granularity
  40. * 1 byte: bitmap flags (corresponds to BdrvDirtyBitmap)
  41. * bit 0 - bitmap is enabled
  42. * bit 1 - bitmap is persistent
  43. * bit 2 - bitmap is autoloading
  44. * bits 3-7 - reserved, must be zero
  45. *
  46. * # Complete of bitmap migration (flags & COMPLETE)
  47. * header
  48. *
  49. * # Data chunk of bitmap migration
  50. * header
  51. * be64: start sector
  52. * be32: number of sectors
  53. * [ be64: buffer size ] \ ! (flags & ZEROES)
  54. * [ n bytes: buffer ] /
  55. *
  56. * The last chunk in stream should contain flags & EOS. The chunk may skip
  57. * device and/or bitmap names, assuming them to be the same with the previous
  58. * chunk.
  59. */
  60. #include "qemu/osdep.h"
  61. #include "block/block.h"
  62. #include "block/block_int.h"
  63. #include "sysemu/block-backend.h"
  64. #include "sysemu/runstate.h"
  65. #include "qemu/main-loop.h"
  66. #include "qemu/error-report.h"
  67. #include "migration/misc.h"
  68. #include "migration/migration.h"
  69. #include "qemu-file.h"
  70. #include "migration/vmstate.h"
  71. #include "migration/register.h"
  72. #include "qemu/hbitmap.h"
  73. #include "qemu/cutils.h"
  74. #include "qapi/error.h"
  75. #include "trace.h"
  76. #define CHUNK_SIZE (1 << 10)
  77. /* Flags occupy one, two or four bytes (Big Endian). The size is determined as
  78. * follows:
  79. * in first (most significant) byte bit 8 is clear --> one byte
  80. * in first byte bit 8 is set --> two or four bytes, depending on second
  81. * byte:
  82. * | in second byte bit 8 is clear --> two bytes
  83. * | in second byte bit 8 is set --> four bytes
  84. */
  85. #define DIRTY_BITMAP_MIG_FLAG_EOS 0x01
  86. #define DIRTY_BITMAP_MIG_FLAG_ZEROES 0x02
  87. #define DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME 0x04
  88. #define DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME 0x08
  89. #define DIRTY_BITMAP_MIG_FLAG_START 0x10
  90. #define DIRTY_BITMAP_MIG_FLAG_COMPLETE 0x20
  91. #define DIRTY_BITMAP_MIG_FLAG_BITS 0x40
  92. #define DIRTY_BITMAP_MIG_EXTRA_FLAGS 0x80
  93. #define DIRTY_BITMAP_MIG_START_FLAG_ENABLED 0x01
  94. #define DIRTY_BITMAP_MIG_START_FLAG_PERSISTENT 0x02
  95. /* 0x04 was "AUTOLOAD" flags on older versions, now it is ignored */
  96. #define DIRTY_BITMAP_MIG_START_FLAG_RESERVED_MASK 0xf8
  97. /* State of one bitmap during save process */
  98. typedef struct SaveBitmapState {
  99. /* Written during setup phase. */
  100. BlockDriverState *bs;
  101. const char *node_name;
  102. BdrvDirtyBitmap *bitmap;
  103. uint64_t total_sectors;
  104. uint64_t sectors_per_chunk;
  105. QSIMPLEQ_ENTRY(SaveBitmapState) entry;
  106. uint8_t flags;
  107. /* For bulk phase. */
  108. bool bulk_completed;
  109. uint64_t cur_sector;
  110. } SaveBitmapState;
  111. /* State of the dirty bitmap migration (DBM) during save process */
  112. typedef struct DBMSaveState {
  113. QSIMPLEQ_HEAD(, SaveBitmapState) dbms_list;
  114. bool bulk_completed;
  115. bool no_bitmaps;
  116. /* for send_bitmap_bits() */
  117. BlockDriverState *prev_bs;
  118. BdrvDirtyBitmap *prev_bitmap;
  119. } DBMSaveState;
  120. typedef struct LoadBitmapState {
  121. BlockDriverState *bs;
  122. BdrvDirtyBitmap *bitmap;
  123. bool migrated;
  124. bool enabled;
  125. } LoadBitmapState;
  126. /* State of the dirty bitmap migration (DBM) during load process */
  127. typedef struct DBMLoadState {
  128. uint32_t flags;
  129. char node_name[256];
  130. char bitmap_name[256];
  131. BlockDriverState *bs;
  132. BdrvDirtyBitmap *bitmap;
  133. bool before_vm_start_handled; /* set in dirty_bitmap_mig_before_vm_start */
  134. /*
  135. * cancelled
  136. * Incoming migration is cancelled for some reason. That means that we
  137. * still should read our chunks from migration stream, to not affect other
  138. * migration objects (like RAM), but just ignore them and do not touch any
  139. * bitmaps or nodes.
  140. */
  141. bool cancelled;
  142. GSList *bitmaps;
  143. QemuMutex lock; /* protect bitmaps */
  144. } DBMLoadState;
  145. typedef struct DBMState {
  146. DBMSaveState save;
  147. DBMLoadState load;
  148. } DBMState;
  149. static DBMState dbm_state;
  150. static uint32_t qemu_get_bitmap_flags(QEMUFile *f)
  151. {
  152. uint8_t flags = qemu_get_byte(f);
  153. if (flags & DIRTY_BITMAP_MIG_EXTRA_FLAGS) {
  154. flags = flags << 8 | qemu_get_byte(f);
  155. if (flags & DIRTY_BITMAP_MIG_EXTRA_FLAGS) {
  156. flags = flags << 16 | qemu_get_be16(f);
  157. }
  158. }
  159. return flags;
  160. }
  161. static void qemu_put_bitmap_flags(QEMUFile *f, uint32_t flags)
  162. {
  163. /* The code currently does not send flags as more than one byte */
  164. assert(!(flags & (0xffffff00 | DIRTY_BITMAP_MIG_EXTRA_FLAGS)));
  165. qemu_put_byte(f, flags);
  166. }
  167. static void send_bitmap_header(QEMUFile *f, DBMSaveState *s,
  168. SaveBitmapState *dbms, uint32_t additional_flags)
  169. {
  170. BlockDriverState *bs = dbms->bs;
  171. BdrvDirtyBitmap *bitmap = dbms->bitmap;
  172. uint32_t flags = additional_flags;
  173. trace_send_bitmap_header_enter();
  174. if (bs != s->prev_bs) {
  175. s->prev_bs = bs;
  176. flags |= DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME;
  177. }
  178. if (bitmap != s->prev_bitmap) {
  179. s->prev_bitmap = bitmap;
  180. flags |= DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME;
  181. }
  182. qemu_put_bitmap_flags(f, flags);
  183. if (flags & DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME) {
  184. qemu_put_counted_string(f, dbms->node_name);
  185. }
  186. if (flags & DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME) {
  187. qemu_put_counted_string(f, bdrv_dirty_bitmap_name(bitmap));
  188. }
  189. }
  190. static void send_bitmap_start(QEMUFile *f, DBMSaveState *s,
  191. SaveBitmapState *dbms)
  192. {
  193. send_bitmap_header(f, s, dbms, DIRTY_BITMAP_MIG_FLAG_START);
  194. qemu_put_be32(f, bdrv_dirty_bitmap_granularity(dbms->bitmap));
  195. qemu_put_byte(f, dbms->flags);
  196. }
  197. static void send_bitmap_complete(QEMUFile *f, DBMSaveState *s,
  198. SaveBitmapState *dbms)
  199. {
  200. send_bitmap_header(f, s, dbms, DIRTY_BITMAP_MIG_FLAG_COMPLETE);
  201. }
  202. static void send_bitmap_bits(QEMUFile *f, DBMSaveState *s,
  203. SaveBitmapState *dbms,
  204. uint64_t start_sector, uint32_t nr_sectors)
  205. {
  206. /* align for buffer_is_zero() */
  207. uint64_t align = 4 * sizeof(long);
  208. uint64_t unaligned_size =
  209. bdrv_dirty_bitmap_serialization_size(
  210. dbms->bitmap, start_sector << BDRV_SECTOR_BITS,
  211. (uint64_t)nr_sectors << BDRV_SECTOR_BITS);
  212. uint64_t buf_size = QEMU_ALIGN_UP(unaligned_size, align);
  213. uint8_t *buf = g_malloc0(buf_size);
  214. uint32_t flags = DIRTY_BITMAP_MIG_FLAG_BITS;
  215. bdrv_dirty_bitmap_serialize_part(
  216. dbms->bitmap, buf, start_sector << BDRV_SECTOR_BITS,
  217. (uint64_t)nr_sectors << BDRV_SECTOR_BITS);
  218. if (buffer_is_zero(buf, buf_size)) {
  219. g_free(buf);
  220. buf = NULL;
  221. flags |= DIRTY_BITMAP_MIG_FLAG_ZEROES;
  222. }
  223. trace_send_bitmap_bits(flags, start_sector, nr_sectors, buf_size);
  224. send_bitmap_header(f, s, dbms, flags);
  225. qemu_put_be64(f, start_sector);
  226. qemu_put_be32(f, nr_sectors);
  227. /* if a block is zero we need to flush here since the network
  228. * bandwidth is now a lot higher than the storage device bandwidth.
  229. * thus if we queue zero blocks we slow down the migration. */
  230. if (flags & DIRTY_BITMAP_MIG_FLAG_ZEROES) {
  231. qemu_fflush(f);
  232. } else {
  233. qemu_put_be64(f, buf_size);
  234. qemu_put_buffer(f, buf, buf_size);
  235. }
  236. g_free(buf);
  237. }
  238. /* Called with iothread lock taken. */
  239. static void dirty_bitmap_do_save_cleanup(DBMSaveState *s)
  240. {
  241. SaveBitmapState *dbms;
  242. while ((dbms = QSIMPLEQ_FIRST(&s->dbms_list)) != NULL) {
  243. QSIMPLEQ_REMOVE_HEAD(&s->dbms_list, entry);
  244. bdrv_dirty_bitmap_set_busy(dbms->bitmap, false);
  245. bdrv_unref(dbms->bs);
  246. g_free(dbms);
  247. }
  248. }
  249. /* Called with iothread lock taken. */
  250. static int add_bitmaps_to_list(DBMSaveState *s, BlockDriverState *bs,
  251. const char *bs_name)
  252. {
  253. BdrvDirtyBitmap *bitmap;
  254. SaveBitmapState *dbms;
  255. Error *local_err = NULL;
  256. FOR_EACH_DIRTY_BITMAP(bs, bitmap) {
  257. if (bdrv_dirty_bitmap_name(bitmap)) {
  258. break;
  259. }
  260. }
  261. if (!bitmap) {
  262. return 0;
  263. }
  264. if (!bs_name || strcmp(bs_name, "") == 0) {
  265. error_report("Bitmap '%s' in unnamed node can't be migrated",
  266. bdrv_dirty_bitmap_name(bitmap));
  267. return -1;
  268. }
  269. if (bs_name[0] == '#') {
  270. error_report("Bitmap '%s' in a node with auto-generated "
  271. "name '%s' can't be migrated",
  272. bdrv_dirty_bitmap_name(bitmap), bs_name);
  273. return -1;
  274. }
  275. FOR_EACH_DIRTY_BITMAP(bs, bitmap) {
  276. if (!bdrv_dirty_bitmap_name(bitmap)) {
  277. continue;
  278. }
  279. if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_DEFAULT, &local_err)) {
  280. error_report_err(local_err);
  281. return -1;
  282. }
  283. bdrv_ref(bs);
  284. bdrv_dirty_bitmap_set_busy(bitmap, true);
  285. dbms = g_new0(SaveBitmapState, 1);
  286. dbms->bs = bs;
  287. dbms->node_name = bs_name;
  288. dbms->bitmap = bitmap;
  289. dbms->total_sectors = bdrv_nb_sectors(bs);
  290. dbms->sectors_per_chunk = CHUNK_SIZE * 8 *
  291. bdrv_dirty_bitmap_granularity(bitmap) >> BDRV_SECTOR_BITS;
  292. if (bdrv_dirty_bitmap_enabled(bitmap)) {
  293. dbms->flags |= DIRTY_BITMAP_MIG_START_FLAG_ENABLED;
  294. }
  295. if (bdrv_dirty_bitmap_get_persistence(bitmap)) {
  296. dbms->flags |= DIRTY_BITMAP_MIG_START_FLAG_PERSISTENT;
  297. }
  298. QSIMPLEQ_INSERT_TAIL(&s->dbms_list, dbms, entry);
  299. }
  300. return 0;
  301. }
  302. /* Called with iothread lock taken. */
  303. static int init_dirty_bitmap_migration(DBMSaveState *s)
  304. {
  305. BlockDriverState *bs;
  306. SaveBitmapState *dbms;
  307. GHashTable *handled_by_blk = g_hash_table_new(NULL, NULL);
  308. BlockBackend *blk;
  309. s->bulk_completed = false;
  310. s->prev_bs = NULL;
  311. s->prev_bitmap = NULL;
  312. s->no_bitmaps = false;
  313. /*
  314. * Use blockdevice name for direct (or filtered) children of named block
  315. * backends.
  316. */
  317. for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
  318. const char *name = blk_name(blk);
  319. if (!name || strcmp(name, "") == 0) {
  320. continue;
  321. }
  322. bs = blk_bs(blk);
  323. /* Skip filters without bitmaps */
  324. while (bs && bs->drv && bs->drv->is_filter &&
  325. !bdrv_has_named_bitmaps(bs))
  326. {
  327. if (bs->backing) {
  328. bs = bs->backing->bs;
  329. } else if (bs->file) {
  330. bs = bs->file->bs;
  331. } else {
  332. bs = NULL;
  333. }
  334. }
  335. if (bs && bs->drv && !bs->drv->is_filter) {
  336. if (add_bitmaps_to_list(s, bs, name)) {
  337. goto fail;
  338. }
  339. g_hash_table_add(handled_by_blk, bs);
  340. }
  341. }
  342. for (bs = bdrv_next_all_states(NULL); bs; bs = bdrv_next_all_states(bs)) {
  343. if (g_hash_table_contains(handled_by_blk, bs)) {
  344. continue;
  345. }
  346. if (add_bitmaps_to_list(s, bs, bdrv_get_node_name(bs))) {
  347. goto fail;
  348. }
  349. }
  350. /* unset migration flags here, to not roll back it */
  351. QSIMPLEQ_FOREACH(dbms, &s->dbms_list, entry) {
  352. bdrv_dirty_bitmap_skip_store(dbms->bitmap, true);
  353. }
  354. if (QSIMPLEQ_EMPTY(&s->dbms_list)) {
  355. s->no_bitmaps = true;
  356. }
  357. g_hash_table_destroy(handled_by_blk);
  358. return 0;
  359. fail:
  360. g_hash_table_destroy(handled_by_blk);
  361. dirty_bitmap_do_save_cleanup(s);
  362. return -1;
  363. }
  364. /* Called with no lock taken. */
  365. static void bulk_phase_send_chunk(QEMUFile *f, DBMSaveState *s,
  366. SaveBitmapState *dbms)
  367. {
  368. uint32_t nr_sectors = MIN(dbms->total_sectors - dbms->cur_sector,
  369. dbms->sectors_per_chunk);
  370. send_bitmap_bits(f, s, dbms, dbms->cur_sector, nr_sectors);
  371. dbms->cur_sector += nr_sectors;
  372. if (dbms->cur_sector >= dbms->total_sectors) {
  373. dbms->bulk_completed = true;
  374. }
  375. }
  376. /* Called with no lock taken. */
  377. static void bulk_phase(QEMUFile *f, DBMSaveState *s, bool limit)
  378. {
  379. SaveBitmapState *dbms;
  380. QSIMPLEQ_FOREACH(dbms, &s->dbms_list, entry) {
  381. while (!dbms->bulk_completed) {
  382. bulk_phase_send_chunk(f, s, dbms);
  383. if (limit && qemu_file_rate_limit(f)) {
  384. return;
  385. }
  386. }
  387. }
  388. s->bulk_completed = true;
  389. }
  390. /* for SaveVMHandlers */
  391. static void dirty_bitmap_save_cleanup(void *opaque)
  392. {
  393. DBMSaveState *s = &((DBMState *)opaque)->save;
  394. dirty_bitmap_do_save_cleanup(s);
  395. }
  396. static int dirty_bitmap_save_iterate(QEMUFile *f, void *opaque)
  397. {
  398. DBMSaveState *s = &((DBMState *)opaque)->save;
  399. trace_dirty_bitmap_save_iterate(migration_in_postcopy());
  400. if (migration_in_postcopy() && !s->bulk_completed) {
  401. bulk_phase(f, s, true);
  402. }
  403. qemu_put_bitmap_flags(f, DIRTY_BITMAP_MIG_FLAG_EOS);
  404. return s->bulk_completed;
  405. }
  406. /* Called with iothread lock taken. */
  407. static int dirty_bitmap_save_complete(QEMUFile *f, void *opaque)
  408. {
  409. DBMSaveState *s = &((DBMState *)opaque)->save;
  410. SaveBitmapState *dbms;
  411. trace_dirty_bitmap_save_complete_enter();
  412. if (!s->bulk_completed) {
  413. bulk_phase(f, s, false);
  414. }
  415. QSIMPLEQ_FOREACH(dbms, &s->dbms_list, entry) {
  416. send_bitmap_complete(f, s, dbms);
  417. }
  418. qemu_put_bitmap_flags(f, DIRTY_BITMAP_MIG_FLAG_EOS);
  419. trace_dirty_bitmap_save_complete_finish();
  420. dirty_bitmap_save_cleanup(opaque);
  421. return 0;
  422. }
  423. static void dirty_bitmap_save_pending(QEMUFile *f, void *opaque,
  424. uint64_t max_size,
  425. uint64_t *res_precopy_only,
  426. uint64_t *res_compatible,
  427. uint64_t *res_postcopy_only)
  428. {
  429. DBMSaveState *s = &((DBMState *)opaque)->save;
  430. SaveBitmapState *dbms;
  431. uint64_t pending = 0;
  432. qemu_mutex_lock_iothread();
  433. QSIMPLEQ_FOREACH(dbms, &s->dbms_list, entry) {
  434. uint64_t gran = bdrv_dirty_bitmap_granularity(dbms->bitmap);
  435. uint64_t sectors = dbms->bulk_completed ? 0 :
  436. dbms->total_sectors - dbms->cur_sector;
  437. pending += DIV_ROUND_UP(sectors * BDRV_SECTOR_SIZE, gran);
  438. }
  439. qemu_mutex_unlock_iothread();
  440. trace_dirty_bitmap_save_pending(pending, max_size);
  441. *res_postcopy_only += pending;
  442. }
  443. /* First occurrence of this bitmap. It should be created if doesn't exist */
  444. static int dirty_bitmap_load_start(QEMUFile *f, DBMLoadState *s)
  445. {
  446. Error *local_err = NULL;
  447. uint32_t granularity = qemu_get_be32(f);
  448. uint8_t flags = qemu_get_byte(f);
  449. LoadBitmapState *b;
  450. if (s->cancelled) {
  451. return 0;
  452. }
  453. if (s->bitmap) {
  454. error_report("Bitmap with the same name ('%s') already exists on "
  455. "destination", bdrv_dirty_bitmap_name(s->bitmap));
  456. return -EINVAL;
  457. } else {
  458. s->bitmap = bdrv_create_dirty_bitmap(s->bs, granularity,
  459. s->bitmap_name, &local_err);
  460. if (!s->bitmap) {
  461. error_report_err(local_err);
  462. return -EINVAL;
  463. }
  464. }
  465. if (flags & DIRTY_BITMAP_MIG_START_FLAG_RESERVED_MASK) {
  466. error_report("Unknown flags in migrated dirty bitmap header: %x",
  467. flags);
  468. return -EINVAL;
  469. }
  470. if (flags & DIRTY_BITMAP_MIG_START_FLAG_PERSISTENT) {
  471. bdrv_dirty_bitmap_set_persistence(s->bitmap, true);
  472. }
  473. bdrv_disable_dirty_bitmap(s->bitmap);
  474. if (flags & DIRTY_BITMAP_MIG_START_FLAG_ENABLED) {
  475. bdrv_dirty_bitmap_create_successor(s->bitmap, &local_err);
  476. if (local_err) {
  477. error_report_err(local_err);
  478. return -EINVAL;
  479. }
  480. }
  481. b = g_new(LoadBitmapState, 1);
  482. b->bs = s->bs;
  483. b->bitmap = s->bitmap;
  484. b->migrated = false;
  485. b->enabled = flags & DIRTY_BITMAP_MIG_START_FLAG_ENABLED;
  486. s->bitmaps = g_slist_prepend(s->bitmaps, b);
  487. return 0;
  488. }
  489. /*
  490. * before_vm_start_handle_item
  491. *
  492. * g_slist_foreach helper
  493. *
  494. * item is LoadBitmapState*
  495. * opaque is DBMLoadState*
  496. */
  497. static void before_vm_start_handle_item(void *item, void *opaque)
  498. {
  499. DBMLoadState *s = opaque;
  500. LoadBitmapState *b = item;
  501. if (b->enabled) {
  502. if (b->migrated) {
  503. bdrv_enable_dirty_bitmap(b->bitmap);
  504. } else {
  505. bdrv_dirty_bitmap_enable_successor(b->bitmap);
  506. }
  507. }
  508. if (b->migrated) {
  509. s->bitmaps = g_slist_remove(s->bitmaps, b);
  510. g_free(b);
  511. }
  512. }
  513. void dirty_bitmap_mig_before_vm_start(void)
  514. {
  515. DBMLoadState *s = &dbm_state.load;
  516. qemu_mutex_lock(&s->lock);
  517. assert(!s->before_vm_start_handled);
  518. g_slist_foreach(s->bitmaps, before_vm_start_handle_item, s);
  519. s->before_vm_start_handled = true;
  520. qemu_mutex_unlock(&s->lock);
  521. }
  522. static void cancel_incoming_locked(DBMLoadState *s)
  523. {
  524. GSList *item;
  525. if (s->cancelled) {
  526. return;
  527. }
  528. s->cancelled = true;
  529. s->bs = NULL;
  530. s->bitmap = NULL;
  531. /* Drop all unfinished bitmaps */
  532. for (item = s->bitmaps; item; item = g_slist_next(item)) {
  533. LoadBitmapState *b = item->data;
  534. /*
  535. * Bitmap must be unfinished, as finished bitmaps should already be
  536. * removed from the list.
  537. */
  538. assert(!s->before_vm_start_handled || !b->migrated);
  539. if (bdrv_dirty_bitmap_has_successor(b->bitmap)) {
  540. bdrv_reclaim_dirty_bitmap(b->bitmap, &error_abort);
  541. }
  542. bdrv_release_dirty_bitmap(b->bitmap);
  543. }
  544. g_slist_free_full(s->bitmaps, g_free);
  545. s->bitmaps = NULL;
  546. }
  547. void dirty_bitmap_mig_cancel_outgoing(void)
  548. {
  549. dirty_bitmap_do_save_cleanup(&dbm_state.save);
  550. }
  551. void dirty_bitmap_mig_cancel_incoming(void)
  552. {
  553. DBMLoadState *s = &dbm_state.load;
  554. qemu_mutex_lock(&s->lock);
  555. cancel_incoming_locked(s);
  556. qemu_mutex_unlock(&s->lock);
  557. }
  558. static void dirty_bitmap_load_complete(QEMUFile *f, DBMLoadState *s)
  559. {
  560. GSList *item;
  561. trace_dirty_bitmap_load_complete();
  562. if (s->cancelled) {
  563. return;
  564. }
  565. bdrv_dirty_bitmap_deserialize_finish(s->bitmap);
  566. if (bdrv_dirty_bitmap_has_successor(s->bitmap)) {
  567. bdrv_reclaim_dirty_bitmap(s->bitmap, &error_abort);
  568. }
  569. for (item = s->bitmaps; item; item = g_slist_next(item)) {
  570. LoadBitmapState *b = item->data;
  571. if (b->bitmap == s->bitmap) {
  572. b->migrated = true;
  573. if (s->before_vm_start_handled) {
  574. s->bitmaps = g_slist_remove(s->bitmaps, b);
  575. g_free(b);
  576. }
  577. break;
  578. }
  579. }
  580. }
  581. static int dirty_bitmap_load_bits(QEMUFile *f, DBMLoadState *s)
  582. {
  583. uint64_t first_byte = qemu_get_be64(f) << BDRV_SECTOR_BITS;
  584. uint64_t nr_bytes = (uint64_t)qemu_get_be32(f) << BDRV_SECTOR_BITS;
  585. trace_dirty_bitmap_load_bits_enter(first_byte >> BDRV_SECTOR_BITS,
  586. nr_bytes >> BDRV_SECTOR_BITS);
  587. if (s->flags & DIRTY_BITMAP_MIG_FLAG_ZEROES) {
  588. trace_dirty_bitmap_load_bits_zeroes();
  589. if (!s->cancelled) {
  590. bdrv_dirty_bitmap_deserialize_zeroes(s->bitmap, first_byte,
  591. nr_bytes, false);
  592. }
  593. } else {
  594. size_t ret;
  595. g_autofree uint8_t *buf = NULL;
  596. uint64_t buf_size = qemu_get_be64(f);
  597. uint64_t needed_size;
  598. /*
  599. * The actual check for buf_size is done a bit later. We can't do it in
  600. * cancelled mode as we don't have the bitmap to check the constraints
  601. * (so, we allocate a buffer and read prior to the check). On the other
  602. * hand, we shouldn't blindly g_malloc the number from the stream.
  603. * Actually one chunk should not be larger than CHUNK_SIZE. Let's allow
  604. * a bit larger (which means that bitmap migration will fail anyway and
  605. * the whole migration will most probably fail soon due to broken
  606. * stream).
  607. */
  608. if (buf_size > 10 * CHUNK_SIZE) {
  609. error_report("Bitmap migration stream buffer allocation request "
  610. "is too large");
  611. return -EIO;
  612. }
  613. buf = g_malloc(buf_size);
  614. ret = qemu_get_buffer(f, buf, buf_size);
  615. if (ret != buf_size) {
  616. error_report("Failed to read bitmap bits");
  617. return -EIO;
  618. }
  619. if (s->cancelled) {
  620. return 0;
  621. }
  622. needed_size = bdrv_dirty_bitmap_serialization_size(s->bitmap,
  623. first_byte,
  624. nr_bytes);
  625. if (needed_size > buf_size ||
  626. buf_size > QEMU_ALIGN_UP(needed_size, 4 * sizeof(long))
  627. /* Here used same alignment as in send_bitmap_bits */
  628. ) {
  629. error_report("Migrated bitmap granularity doesn't "
  630. "match the destination bitmap '%s' granularity",
  631. bdrv_dirty_bitmap_name(s->bitmap));
  632. cancel_incoming_locked(s);
  633. return 0;
  634. }
  635. bdrv_dirty_bitmap_deserialize_part(s->bitmap, buf, first_byte, nr_bytes,
  636. false);
  637. }
  638. return 0;
  639. }
  640. static int dirty_bitmap_load_header(QEMUFile *f, DBMLoadState *s)
  641. {
  642. Error *local_err = NULL;
  643. bool nothing;
  644. s->flags = qemu_get_bitmap_flags(f);
  645. trace_dirty_bitmap_load_header(s->flags);
  646. nothing = s->flags == (s->flags & DIRTY_BITMAP_MIG_FLAG_EOS);
  647. if (s->flags & DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME) {
  648. if (!qemu_get_counted_string(f, s->node_name)) {
  649. error_report("Unable to read node name string");
  650. return -EINVAL;
  651. }
  652. if (!s->cancelled) {
  653. s->bs = bdrv_lookup_bs(s->node_name, s->node_name, &local_err);
  654. if (!s->bs) {
  655. error_report_err(local_err);
  656. cancel_incoming_locked(s);
  657. }
  658. }
  659. } else if (!s->bs && !nothing && !s->cancelled) {
  660. error_report("Error: block device name is not set");
  661. cancel_incoming_locked(s);
  662. }
  663. if (s->flags & DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME) {
  664. if (!qemu_get_counted_string(f, s->bitmap_name)) {
  665. error_report("Unable to read bitmap name string");
  666. return -EINVAL;
  667. }
  668. if (!s->cancelled) {
  669. s->bitmap = bdrv_find_dirty_bitmap(s->bs, s->bitmap_name);
  670. /*
  671. * bitmap may be NULL here, it wouldn't be an error if it is the
  672. * first occurrence of the bitmap
  673. */
  674. if (!s->bitmap && !(s->flags & DIRTY_BITMAP_MIG_FLAG_START)) {
  675. error_report("Error: unknown dirty bitmap "
  676. "'%s' for block device '%s'",
  677. s->bitmap_name, s->node_name);
  678. cancel_incoming_locked(s);
  679. }
  680. }
  681. } else if (!s->bitmap && !nothing && !s->cancelled) {
  682. error_report("Error: block device name is not set");
  683. cancel_incoming_locked(s);
  684. }
  685. return 0;
  686. }
  687. /*
  688. * dirty_bitmap_load
  689. *
  690. * Load sequence of dirty bitmap chunks. Return error only on fatal io stream
  691. * violations. On other errors just cancel bitmaps incoming migration and return
  692. * 0.
  693. *
  694. * Note, than when incoming bitmap migration is canceled, we still must read all
  695. * our chunks (and just ignore them), to not affect other migration objects.
  696. */
  697. static int dirty_bitmap_load(QEMUFile *f, void *opaque, int version_id)
  698. {
  699. DBMLoadState *s = &((DBMState *)opaque)->load;
  700. int ret = 0;
  701. trace_dirty_bitmap_load_enter();
  702. if (version_id != 1) {
  703. QEMU_LOCK_GUARD(&s->lock);
  704. cancel_incoming_locked(s);
  705. return -EINVAL;
  706. }
  707. do {
  708. QEMU_LOCK_GUARD(&s->lock);
  709. ret = dirty_bitmap_load_header(f, s);
  710. if (ret < 0) {
  711. cancel_incoming_locked(s);
  712. return ret;
  713. }
  714. if (s->flags & DIRTY_BITMAP_MIG_FLAG_START) {
  715. ret = dirty_bitmap_load_start(f, s);
  716. } else if (s->flags & DIRTY_BITMAP_MIG_FLAG_COMPLETE) {
  717. dirty_bitmap_load_complete(f, s);
  718. } else if (s->flags & DIRTY_BITMAP_MIG_FLAG_BITS) {
  719. ret = dirty_bitmap_load_bits(f, s);
  720. }
  721. if (!ret) {
  722. ret = qemu_file_get_error(f);
  723. }
  724. if (ret) {
  725. cancel_incoming_locked(s);
  726. return ret;
  727. }
  728. } while (!(s->flags & DIRTY_BITMAP_MIG_FLAG_EOS));
  729. trace_dirty_bitmap_load_success();
  730. return 0;
  731. }
  732. static int dirty_bitmap_save_setup(QEMUFile *f, void *opaque)
  733. {
  734. DBMSaveState *s = &((DBMState *)opaque)->save;
  735. SaveBitmapState *dbms = NULL;
  736. if (init_dirty_bitmap_migration(s) < 0) {
  737. return -1;
  738. }
  739. QSIMPLEQ_FOREACH(dbms, &s->dbms_list, entry) {
  740. send_bitmap_start(f, s, dbms);
  741. }
  742. qemu_put_bitmap_flags(f, DIRTY_BITMAP_MIG_FLAG_EOS);
  743. return 0;
  744. }
  745. static bool dirty_bitmap_is_active(void *opaque)
  746. {
  747. DBMSaveState *s = &((DBMState *)opaque)->save;
  748. return migrate_dirty_bitmaps() && !s->no_bitmaps;
  749. }
  750. static bool dirty_bitmap_is_active_iterate(void *opaque)
  751. {
  752. return dirty_bitmap_is_active(opaque) && !runstate_is_running();
  753. }
  754. static bool dirty_bitmap_has_postcopy(void *opaque)
  755. {
  756. return true;
  757. }
  758. static SaveVMHandlers savevm_dirty_bitmap_handlers = {
  759. .save_setup = dirty_bitmap_save_setup,
  760. .save_live_complete_postcopy = dirty_bitmap_save_complete,
  761. .save_live_complete_precopy = dirty_bitmap_save_complete,
  762. .has_postcopy = dirty_bitmap_has_postcopy,
  763. .save_live_pending = dirty_bitmap_save_pending,
  764. .save_live_iterate = dirty_bitmap_save_iterate,
  765. .is_active_iterate = dirty_bitmap_is_active_iterate,
  766. .load_state = dirty_bitmap_load,
  767. .save_cleanup = dirty_bitmap_save_cleanup,
  768. .is_active = dirty_bitmap_is_active,
  769. };
  770. void dirty_bitmap_mig_init(void)
  771. {
  772. QSIMPLEQ_INIT(&dbm_state.save.dbms_list);
  773. qemu_mutex_init(&dbm_state.load.lock);
  774. register_savevm_live("dirty-bitmap", 0, 1,
  775. &savevm_dirty_bitmap_handlers,
  776. &dbm_state);
  777. }