10 년 전 · a9392bc93c
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1182,7 +1182,19 @@ S: Supported
 
															 F: block/gluster.c
														
 
															 T: git git://github.com/codyprime/qemu-kvm-jtc.git block
														
 
															+Null Block Driver
														
 
															+M: Fam Zheng <famz@redhat.com>
														
 
															+L: qemu-block@nongnu.org
														
 
															+S: Supported
														
 
															+F: block/null.c
														
 
															+
														
 
															 Bootdevice
														
 
															 M: Gonglei <arei.gonglei@huawei.com>
														
 
															 S: Maintained
														
 
															 F: bootdevice.c
														
 
															+
														
 
															+Quorum
														
 
															+M: Alberto Garcia <berto@igalia.com>
														
 
															+S: Supported
														
 
															+F: block/quorum.c
														
 
															+L: qemu-block@nongnu.org
														
--- a/aio-posix.c
+++ b/aio-posix.c
@@ -24,7 +24,6 @@ struct AioHandler
 
															     IOHandler *io_read;
														
 
															     IOHandler *io_write;
														
 
															     int deleted;
														
 
															-    int pollfds_idx;
														
 
															     void *opaque;
														
 
															     QLIST_ENTRY(AioHandler) node;
														
 
															 };
														
@@ -83,7 +82,6 @@ void aio_set_fd_handler(AioContext *ctx,
 
															         node->io_read = io_read;
														
 
															         node->io_write = io_write;
														
 
															         node->opaque = opaque;
														
 
															-        node->pollfds_idx = -1;
														
 
															         node->pfd.events = (io_read ? G_IO_IN | G_IO_HUP | G_IO_ERR : 0);
														
 
															         node->pfd.events |= (io_write ? G_IO_OUT | G_IO_ERR : 0);
														
@@ -186,13 +184,61 @@ bool aio_dispatch(AioContext *ctx)
 
															     return progress;
														
 
															 }
														
 
															+/* These thread-local variables are used only in a small part of aio_poll
														
 
															+ * around the call to the poll() system call.  In particular they are not
														
 
															+ * used while aio_poll is performing callbacks, which makes it much easier
														
 
															+ * to think about reentrancy!
														
 
															+ *
														
 
															+ * Stack-allocated arrays would be perfect but they have size limitations;
														
 
															+ * heap allocation is expensive enough that we want to reuse arrays across
														
 
															+ * calls to aio_poll().  And because poll() has to be called without holding
														
 
															+ * any lock, the arrays cannot be stored in AioContext.  Thread-local data
														
 
															+ * has none of the disadvantages of these three options.
														
 
															+ */
														
 
															+static __thread GPollFD *pollfds;
														
 
															+static __thread AioHandler **nodes;
														
 
															+static __thread unsigned npfd, nalloc;
														
 
															+static __thread Notifier pollfds_cleanup_notifier;
														
 
															+
														
 
															+static void pollfds_cleanup(Notifier *n, void *unused)
														
 
															+{
														
 
															+    g_assert(npfd == 0);
														
 
															+    g_free(pollfds);
														
 
															+    g_free(nodes);
														
 
															+    nalloc = 0;
														
 
															+}
														
 
															+
														
 
															+static void add_pollfd(AioHandler *node)
														
 
															+{
														
 
															+    if (npfd == nalloc) {
														
 
															+        if (nalloc == 0) {
														
 
															+            pollfds_cleanup_notifier.notify = pollfds_cleanup;
														
 
															+            qemu_thread_atexit_add(&pollfds_cleanup_notifier);
														
 
															+            nalloc = 8;
														
 
															+        } else {
														
 
															+            g_assert(nalloc <= INT_MAX);
														
 
															+            nalloc *= 2;
														
 
															+        }
														
 
															+        pollfds = g_renew(GPollFD, pollfds, nalloc);
														
 
															+        nodes = g_renew(AioHandler *, nodes, nalloc);
														
 
															+    }
														
 
															+    nodes[npfd] = node;
														
 
															+    pollfds[npfd] = (GPollFD) {
														
 
															+        .fd = node->pfd.fd,
														
 
															+        .events = node->pfd.events,
														
 
															+    };
														
 
															+    npfd++;
														
 
															+}
														
 
															+
														
 
															 bool aio_poll(AioContext *ctx, bool blocking)
														
 
															 {
														
 
															     AioHandler *node;
														
 
															     bool was_dispatching;
														
 
															-    int ret;
														
 
															+    int i, ret;
														
 
															     bool progress;
														
 
															+    int64_t timeout;
														
 
															+    aio_context_acquire(ctx);
														
 
															     was_dispatching = ctx->dispatching;
														
 
															     progress = false;
														
@@ -210,39 +256,36 @@ bool aio_poll(AioContext *ctx, bool blocking)
 
															     ctx->walking_handlers++;
														
 
															-    g_array_set_size(ctx->pollfds, 0);
														
 
															+    assert(npfd == 0);
														
 
															     /* fill pollfds */
														
 
															     QLIST_FOREACH(node, &ctx->aio_handlers, node) {
														
 
															-        node->pollfds_idx = -1;
														
 
															         if (!node->deleted && node->pfd.events) {
														
 
															-            GPollFD pfd = {
														
 
															-                .fd = node->pfd.fd,
														
 
															-                .events = node->pfd.events,
														
 
															-            };
														
 
															-            node->pollfds_idx = ctx->pollfds->len;
														
 
															-            g_array_append_val(ctx->pollfds, pfd);
														
 
															+            add_pollfd(node);
														
 
															         }
														
 
															     }
														
 
															-    ctx->walking_handlers--;
														
 
															+    timeout = blocking ? aio_compute_timeout(ctx) : 0;
														
 
															     /* wait until next event */
														
 
															-    ret = qemu_poll_ns((GPollFD *)ctx->pollfds->data,
														
 
															-                         ctx->pollfds->len,
														
 
															-                         blocking ? aio_compute_timeout(ctx) : 0);
														
 
															+    if (timeout) {
														
 
															+        aio_context_release(ctx);
														
 
															+    }
														
 
															+    ret = qemu_poll_ns((GPollFD *)pollfds, npfd, timeout);
														
 
															+    if (timeout) {
														
 
															+        aio_context_acquire(ctx);
														
 
															+    }
														
 
															     /* if we have any readable fds, dispatch event */
														
 
															     if (ret > 0) {
														
 
															-        QLIST_FOREACH(node, &ctx->aio_handlers, node) {
														
 
															-            if (node->pollfds_idx != -1) {
														
 
															-                GPollFD *pfd = &g_array_index(ctx->pollfds, GPollFD,
														
 
															-                                              node->pollfds_idx);
														
 
															-                node->pfd.revents = pfd->revents;
														
 
															-            }
														
 
															+        for (i = 0; i < npfd; i++) {
														
 
															+            nodes[i]->pfd.revents = pollfds[i].revents;
														
 
															         }
														
 
															     }
														
 
															+    npfd = 0;
														
 
															+    ctx->walking_handlers--;
														
 
															+
														
 
															     /* Run dispatch even if there were no readable fds to run timers */
														
 
															     aio_set_dispatching(ctx, true);
														
 
															     if (aio_dispatch(ctx)) {
														
@@ -250,5 +293,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
 
															     }
														
 
															     aio_set_dispatching(ctx, was_dispatching);
														
 
															+    aio_context_release(ctx);
														
 
															+
														
 
															     return progress;
														
 
															 }
														
--- a/aio-win32.c
+++ b/aio-win32.c
@@ -283,6 +283,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
 
															     int count;
														
 
															     int timeout;
														
 
															+    aio_context_acquire(ctx);
														
 
															     have_select_revents = aio_prepare(ctx);
														
 
															     if (have_select_revents) {
														
 
															         blocking = false;
														
@@ -323,7 +324,13 @@ bool aio_poll(AioContext *ctx, bool blocking)
 
															         timeout = blocking
														
 
															             ? qemu_timeout_ns_to_ms(aio_compute_timeout(ctx)) : 0;
														
 
															+        if (timeout) {
														
 
															+            aio_context_release(ctx);
														
 
															+        }
														
 
															         ret = WaitForMultipleObjects(count, events, FALSE, timeout);
														
 
															+        if (timeout) {
														
 
															+            aio_context_acquire(ctx);
														
 
															+        }
														
 
															         aio_set_dispatching(ctx, true);
														
 
															         if (first && aio_bh_poll(ctx)) {
														
@@ -349,5 +356,6 @@ bool aio_poll(AioContext *ctx, bool blocking)
 
															     progress |= timerlistgroup_run_timers(&ctx->tlg);
														
 
															     aio_set_dispatching(ctx, was_dispatching);
														
 
															+    aio_context_release(ctx);
														
 
															     return progress;
														
 
															 }
														
--- a/async.c
+++ b/async.c
@@ -230,7 +230,6 @@ aio_ctx_finalize(GSource     *source)
 
															     event_notifier_cleanup(&ctx->notifier);
														
 
															     rfifolock_destroy(&ctx->lock);
														
 
															     qemu_mutex_destroy(&ctx->bh_lock);
														
 
															-    g_array_free(ctx->pollfds, TRUE);
														
 
															     timerlistgroup_deinit(&ctx->tlg);
														
 
															 }
														
@@ -281,12 +280,6 @@ static void aio_timerlist_notify(void *opaque)
 
															     aio_notify(opaque);
														
 
															 }
														
 
															-static void aio_rfifolock_cb(void *opaque)
														
 
															-{
														
 
															-    /* Kick owner thread in case they are blocked in aio_poll() */
														
 
															-    aio_notify(opaque);
														
 
															-}
														
 
															-
														
 
															 AioContext *aio_context_new(Error **errp)
														
 
															 {
														
 
															     int ret;
														
@@ -302,10 +295,9 @@ AioContext *aio_context_new(Error **errp)
 
															     aio_set_event_notifier(ctx, &ctx->notifier,
														
 
															                            (EventNotifierHandler *)
														
 
															                            event_notifier_test_and_clear);
														
 
															-    ctx->pollfds = g_array_new(FALSE, FALSE, sizeof(GPollFD));
														
 
															     ctx->thread_pool = NULL;
														
 
															     qemu_mutex_init(&ctx->bh_lock);
														
 
															-    rfifolock_init(&ctx->lock, aio_rfifolock_cb, ctx);
														
 
															+    rfifolock_init(&ctx->lock, NULL, NULL);
														
 
															     timerlistgroup_init(&ctx->tlg, aio_timerlist_notify, ctx);
														
 
															     return ctx;
														
--- a/block.c
+++ b/block.c
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -1,4 +1,4 @@
 
															-block-obj-y += raw_bsd.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vvfat.o
														
 
															+block-obj-y += raw_bsd.o qcow.o vdi.o vmdk.o cloop.o bochs.o vpc.o vvfat.o
														
 
															 block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o
														
 
															 block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
														
 
															 block-obj-y += qed-check.o
														
@@ -9,7 +9,7 @@ block-obj-y += block-backend.o snapshot.o qapi.o
 
															 block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
														
 
															 block-obj-$(CONFIG_POSIX) += raw-posix.o
														
 
															 block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
														
 
															-block-obj-y += null.o mirror.o
														
 
															+block-obj-y += null.o mirror.o io.o
														
 
															 block-obj-y += nbd.o nbd-client.o sheepdog.o
														
 
															 block-obj-$(CONFIG_LIBISCSI) += iscsi.o
														
@@ -37,6 +37,7 @@ gluster.o-libs     := $(GLUSTERFS_LIBS)
 
															 ssh.o-cflags       := $(LIBSSH2_CFLAGS)
														
 
															 ssh.o-libs         := $(LIBSSH2_LIBS)
														
 
															 archipelago.o-libs := $(ARCHIPELAGO_LIBS)
														
 
															+block-obj-m        += dmg.o
														
 
															 dmg.o-libs         := $(BZIP2_LIBS)
														
 
															 qcow.o-libs        := -lz
														
 
															 linux-aio.o-libs   := -laio
														
--- a/block/backup.c
+++ b/block/backup.c
@@ -37,6 +37,8 @@ typedef struct CowRequest {
 
															 typedef struct BackupBlockJob {
														
 
															     BlockJob common;
														
 
															     BlockDriverState *target;
														
 
															+    /* bitmap for sync=dirty-bitmap */
														
 
															+    BdrvDirtyBitmap *sync_bitmap;
														
 
															     MirrorSyncMode sync_mode;
														
 
															     RateLimit limit;
														
 
															     BlockdevOnError on_source_error;
														
@@ -242,6 +244,91 @@ static void backup_complete(BlockJob *job, void *opaque)
 
															     g_free(data);
														
 
															 }
														
 
															+static bool coroutine_fn yield_and_check(BackupBlockJob *job)
														
 
															+{
														
 
															+    if (block_job_is_cancelled(&job->common)) {
														
 
															+        return true;
														
 
															+    }
														
 
															+
														
 
															+    /* we need to yield so that bdrv_drain_all() returns.
														
 
															+     * (without, VM does not reboot)
														
 
															+     */
														
 
															+    if (job->common.speed) {
														
 
															+        uint64_t delay_ns = ratelimit_calculate_delay(&job->limit,
														
 
															+                                                      job->sectors_read);
														
 
															+        job->sectors_read = 0;
														
 
															+        block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, delay_ns);
														
 
															+    } else {
														
 
															+        block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, 0);
														
 
															+    }
														
 
															+
														
 
															+    if (block_job_is_cancelled(&job->common)) {
														
 
															+        return true;
														
 
															+    }
														
 
															+
														
 
															+    return false;
														
 
															+}
														
 
															+
														
 
															+static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
														
 
															+{
														
 
															+    bool error_is_read;
														
 
															+    int ret = 0;
														
 
															+    int clusters_per_iter;
														
 
															+    uint32_t granularity;
														
 
															+    int64_t sector;
														
 
															+    int64_t cluster;
														
 
															+    int64_t end;
														
 
															+    int64_t last_cluster = -1;
														
 
															+    BlockDriverState *bs = job->common.bs;
														
 
															+    HBitmapIter hbi;
														
 
															+
														
 
															+    granularity = bdrv_dirty_bitmap_granularity(job->sync_bitmap);
														
 
															+    clusters_per_iter = MAX((granularity / BACKUP_CLUSTER_SIZE), 1);
														
 
															+    bdrv_dirty_iter_init(job->sync_bitmap, &hbi);
														
 
															+
														
 
															+    /* Find the next dirty sector(s) */
														
 
															+    while ((sector = hbitmap_iter_next(&hbi)) != -1) {
														
 
															+        cluster = sector / BACKUP_SECTORS_PER_CLUSTER;
														
 
															+
														
 
															+        /* Fake progress updates for any clusters we skipped */
														
 
															+        if (cluster != last_cluster + 1) {
														
 
															+            job->common.offset += ((cluster - last_cluster - 1) *
														
 
															+                                   BACKUP_CLUSTER_SIZE);
														
 
															+        }
														
 
															+
														
 
															+        for (end = cluster + clusters_per_iter; cluster < end; cluster++) {
														
 
															+            do {
														
 
															+                if (yield_and_check(job)) {
														
 
															+                    return ret;
														
 
															+                }
														
 
															+                ret = backup_do_cow(bs, cluster * BACKUP_SECTORS_PER_CLUSTER,
														
 
															+                                    BACKUP_SECTORS_PER_CLUSTER, &error_is_read);
														
 
															+                if ((ret < 0) &&
														
 
															+                    backup_error_action(job, error_is_read, -ret) ==
														
 
															+                    BLOCK_ERROR_ACTION_REPORT) {
														
 
															+                    return ret;
														
 
															+                }
														
 
															+            } while (ret < 0);
														
 
															+        }
														
 
															+
														
 
															+        /* If the bitmap granularity is smaller than the backup granularity,
														
 
															+         * we need to advance the iterator pointer to the next cluster. */
														
 
															+        if (granularity < BACKUP_CLUSTER_SIZE) {
														
 
															+            bdrv_set_dirty_iter(&hbi, cluster * BACKUP_SECTORS_PER_CLUSTER);
														
 
															+        }
														
 
															+
														
 
															+        last_cluster = cluster - 1;
														
 
															+    }
														
 
															+
														
 
															+    /* Play some final catchup with the progress meter */
														
 
															+    end = DIV_ROUND_UP(job->common.len, BACKUP_CLUSTER_SIZE);
														
 
															+    if (last_cluster + 1 < end) {
														
 
															+        job->common.offset += ((end - last_cluster - 1) * BACKUP_CLUSTER_SIZE);
														
 
															+    }
														
 
															+
														
 
															+    return ret;
														
 
															+}
														
 
															+
														
 
															 static void coroutine_fn backup_run(void *opaque)
														
 
															 {
														
 
															     BackupBlockJob *job = opaque;
														
@@ -259,8 +346,7 @@ static void coroutine_fn backup_run(void *opaque)
 
															     qemu_co_rwlock_init(&job->flush_rwlock);
														
 
															     start = 0;
														
 
															-    end = DIV_ROUND_UP(job->common.len / BDRV_SECTOR_SIZE,
														
 
															-                       BACKUP_SECTORS_PER_CLUSTER);
														
 
															+    end = DIV_ROUND_UP(job->common.len, BACKUP_CLUSTER_SIZE);
														
 
															     job->bitmap = hbitmap_alloc(end, 0);
														
@@ -278,28 +364,13 @@ static void coroutine_fn backup_run(void *opaque)
 
															             qemu_coroutine_yield();
														
 
															             job->common.busy = true;
														
 
															         }
														
 
															+    } else if (job->sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP) {
														
 
															+        ret = backup_run_incremental(job);
														
 
															     } else {
														
 
															         /* Both FULL and TOP SYNC_MODE's require copying.. */
														
 
															         for (; start < end; start++) {
														
 
															             bool error_is_read;
														
 
															-
														
 
															-            if (block_job_is_cancelled(&job->common)) {
														
 
															-                break;
														
 
															-            }
														
 
															-
														
 
															-            /* we need to yield so that qemu_aio_flush() returns.
														
 
															-             * (without, VM does not reboot)
														
 
															-             */
														
 
															-            if (job->common.speed) {
														
 
															-                uint64_t delay_ns = ratelimit_calculate_delay(
														
 
															-                        &job->limit, job->sectors_read);
														
 
															-                job->sectors_read = 0;
														
 
															-                block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, delay_ns);
														
 
															-            } else {
														
 
															-                block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, 0);
														
 
															-            }
														
 
															-
														
 
															-            if (block_job_is_cancelled(&job->common)) {
														
 
															+            if (yield_and_check(job)) {
														
 
															                 break;
														
 
															             }
														
@@ -357,6 +428,18 @@ static void coroutine_fn backup_run(void *opaque)
 
															     qemu_co_rwlock_wrlock(&job->flush_rwlock);
														
 
															     qemu_co_rwlock_unlock(&job->flush_rwlock);
														
 
															+    if (job->sync_bitmap) {
														
 
															+        BdrvDirtyBitmap *bm;
														
 
															+        if (ret < 0) {
														
 
															+            /* Merge the successor back into the parent, delete nothing. */
														
 
															+            bm = bdrv_reclaim_dirty_bitmap(bs, job->sync_bitmap, NULL);
														
 
															+            assert(bm);
														
 
															+        } else {
														
 
															+            /* Everything is fine, delete this bitmap and install the backup. */
														
 
															+            bm = bdrv_dirty_bitmap_abdicate(bs, job->sync_bitmap, NULL);
														
 
															+            assert(bm);
														
 
															+        }
														
 
															+    }
														
 
															     hbitmap_free(job->bitmap);
														
 
															     bdrv_iostatus_disable(target);
														
@@ -369,6 +452,7 @@ static void coroutine_fn backup_run(void *opaque)
 
															 void backup_start(BlockDriverState *bs, BlockDriverState *target,
														
 
															                   int64_t speed, MirrorSyncMode sync_mode,
														
 
															+                  BdrvDirtyBitmap *sync_bitmap,
														
 
															                   BlockdevOnError on_source_error,
														
 
															                   BlockdevOnError on_target_error,
														
 
															                   BlockCompletionFunc *cb, void *opaque,
														
@@ -412,17 +496,36 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
 
															         return;
														
 
															     }
														
 
															+    if (sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP) {
														
 
															+        if (!sync_bitmap) {
														
 
															+            error_setg(errp, "must provide a valid bitmap name for "
														
 
															+                             "\"dirty-bitmap\" sync mode");
														
 
															+            return;
														
 
															+        }
														
 
															+
														
 
															+        /* Create a new bitmap, and freeze/disable this one. */
														
 
															+        if (bdrv_dirty_bitmap_create_successor(bs, sync_bitmap, errp) < 0) {
														
 
															+            return;
														
 
															+        }
														
 
															+    } else if (sync_bitmap) {
														
 
															+        error_setg(errp,
														
 
															+                   "a sync_bitmap was provided to backup_run, "
														
 
															+                   "but received an incompatible sync_mode (%s)",
														
 
															+                   MirrorSyncMode_lookup[sync_mode]);
														
 
															+        return;
														
 
															+    }
														
 
															+
														
 
															     len = bdrv_getlength(bs);
														
 
															     if (len < 0) {
														
 
															         error_setg_errno(errp, -len, "unable to get length for '%s'",
														
 
															                          bdrv_get_device_name(bs));
														
 
															-        return;
														
 
															+        goto error;
														
 
															     }
														
 
															     BackupBlockJob *job = block_job_create(&backup_job_driver, bs, speed,
														
 
															                                            cb, opaque, errp);
														
 
															     if (!job) {
														
 
															-        return;
														
 
															+        goto error;
														
 
															     }
														
 
															     bdrv_op_block_all(target, job->common.blocker);
														
@@ -431,7 +534,15 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
 
															     job->on_target_error = on_target_error;
														
 
															     job->target = target;
														
 
															     job->sync_mode = sync_mode;
														
 
															+    job->sync_bitmap = sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP ?
														
 
															+                       sync_bitmap : NULL;
														
 
															     job->common.len = len;
														
 
															     job->common.co = qemu_coroutine_create(backup_run);
														
 
															     qemu_coroutine_enter(job->common.co, job);
														
 
															+    return;
														
 
															+
														
 
															+ error:
														
 
															+    if (sync_bitmap) {
														
 
															+        bdrv_reclaim_dirty_bitmap(bs, sync_bitmap, NULL);
														
 
															+    }
														
 
															 }
														
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -721,6 +721,11 @@ static int64_t blkdebug_getlength(BlockDriverState *bs)
 
															     return bdrv_getlength(bs->file);
														
 
															 }
														
 
															+static int blkdebug_truncate(BlockDriverState *bs, int64_t offset)
														
 
															+{
														
 
															+    return bdrv_truncate(bs->file, offset);
														
 
															+}
														
 
															+
														
 
															 static void blkdebug_refresh_filename(BlockDriverState *bs)
														
 
															 {
														
 
															     QDict *opts;
														
@@ -779,6 +784,7 @@ static BlockDriver bdrv_blkdebug = {
 
															     .bdrv_file_open         = blkdebug_open,
														
 
															     .bdrv_close             = blkdebug_close,
														
 
															     .bdrv_getlength         = blkdebug_getlength,
														
 
															+    .bdrv_truncate          = blkdebug_truncate,
														
 
															     .bdrv_refresh_filename  = blkdebug_refresh_filename,
														
 
															     .bdrv_aio_readv         = blkdebug_aio_readv,
														
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -515,6 +515,17 @@ int blk_write(BlockBackend *blk, int64_t sector_num, const uint8_t *buf,
 
															     return bdrv_write(blk->bs, sector_num, buf, nb_sectors);
														
 
															 }
														
 
															+int blk_write_zeroes(BlockBackend *blk, int64_t sector_num,
														
 
															+                     int nb_sectors, BdrvRequestFlags flags)
														
 
															+{
														
 
															+    int ret = blk_check_request(blk, sector_num, nb_sectors);
														
 
															+    if (ret < 0) {
														
 
															+        return ret;
														
 
															+    }
														
 
															+
														
 
															+    return bdrv_write_zeroes(blk->bs, sector_num, nb_sectors, flags);
														
 
															+}
														
 
															+
														
 
															 static void error_callback_bh(void *opaque)
														
 
															 {
														
 
															     struct BlockBackendAIOCB *acb = opaque;
														
--- a/block/io.c
+++ b/block/io.c
@@ -0,0 +1,2540 @@
 
															+/*
														
 
															+ * Block layer I/O functions
														
 
															+ *
														
 
															+ * Copyright (c) 2003 Fabrice Bellard
														
 
															+ *
														
 
															+ * Permission is hereby granted, free of charge, to any person obtaining a copy
														
 
															+ * of this software and associated documentation files (the "Software"), to deal
														
 
															+ * in the Software without restriction, including without limitation the rights
														
 
															+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
														
 
															+ * copies of the Software, and to permit persons to whom the Software is
														
 
															+ * furnished to do so, subject to the following conditions:
														
 
															+ *
														
 
															+ * The above copyright notice and this permission notice shall be included in
														
 
															+ * all copies or substantial portions of the Software.
														
 
															+ *
														
 
															+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
														
 
															+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
														
 
															+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
														
 
															+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
														
 
															+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
														
 
															+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
														
 
															+ * THE SOFTWARE.
														
 
															+ */
														
 
															+
														
 
															+#include "trace.h"
														
 
															+#include "sysemu/qtest.h"
														
 
															+#include "block/blockjob.h"
														
 
															+#include "block/block_int.h"
														
 
															+
														
 
															+#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
														
 
															+
														
 
															+static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
														
 
															+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
														
 
															+        BlockCompletionFunc *cb, void *opaque);
														
 
															+static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
														
 
															+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
														
 
															+        BlockCompletionFunc *cb, void *opaque);
														
 
															+static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
														
 
															+                                         int64_t sector_num, int nb_sectors,
														
 
															+                                         QEMUIOVector *iov);
														
 
															+static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
														
 
															+                                         int64_t sector_num, int nb_sectors,
														
 
															+                                         QEMUIOVector *iov);
														
 
															+static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
														
 
															+    int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
														
 
															+    BdrvRequestFlags flags);
														
 
															+static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
														
 
															+    int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
														
 
															+    BdrvRequestFlags flags);
														
 
															+static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
														
 
															+                                         int64_t sector_num,
														
 
															+                                         QEMUIOVector *qiov,
														
 
															+                                         int nb_sectors,
														
 
															+                                         BdrvRequestFlags flags,
														
 
															+                                         BlockCompletionFunc *cb,
														
 
															+                                         void *opaque,
														
 
															+                                         bool is_write);
														
 
															+static void coroutine_fn bdrv_co_do_rw(void *opaque);
														
 
															+static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
														
 
															+    int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
														
 
															+
														
 
															+/* throttling disk I/O limits */
														
 
															+void bdrv_set_io_limits(BlockDriverState *bs,
														
 
															+                        ThrottleConfig *cfg)
														
 
															+{
														
 
															+    int i;
														
 
															+
														
 
															+    throttle_config(&bs->throttle_state, cfg);
														
 
															+
														
 
															+    for (i = 0; i < 2; i++) {
														
 
															+        qemu_co_enter_next(&bs->throttled_reqs[i]);
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															+/* this function drain all the throttled IOs */
														
 
															+static bool bdrv_start_throttled_reqs(BlockDriverState *bs)
														
 
															+{
														
 
															+    bool drained = false;
														
 
															+    bool enabled = bs->io_limits_enabled;
														
 
															+    int i;
														
 
															+
														
 
															+    bs->io_limits_enabled = false;
														
 
															+
														
 
															+    for (i = 0; i < 2; i++) {
														
 
															+        while (qemu_co_enter_next(&bs->throttled_reqs[i])) {
														
 
															+            drained = true;
														
 
															+        }
														
 
															+    }
														
 
															+
														
 
															+    bs->io_limits_enabled = enabled;
														
 
															+
														
 
															+    return drained;
														
 
															+}
														
 
															+
														
 
															+void bdrv_io_limits_disable(BlockDriverState *bs)
														
 
															+{
														
 
															+    bs->io_limits_enabled = false;
														
 
															+
														
 
															+    bdrv_start_throttled_reqs(bs);
														
 
															+
														
 
															+    throttle_destroy(&bs->throttle_state);
														
 
															+}
														
 
															+
														
 
															+static void bdrv_throttle_read_timer_cb(void *opaque)
														
 
															+{
														
 
															+    BlockDriverState *bs = opaque;
														
 
															+    qemu_co_enter_next(&bs->throttled_reqs[0]);
														
 
															+}
														
 
															+
														
 
															+static void bdrv_throttle_write_timer_cb(void *opaque)
														
 
															+{
														
 
															+    BlockDriverState *bs = opaque;
														
 
															+    qemu_co_enter_next(&bs->throttled_reqs[1]);
														
 
															+}
														
 
															+
														
 
															+/* should be called before bdrv_set_io_limits if a limit is set */
														
 
															+void bdrv_io_limits_enable(BlockDriverState *bs)
														
 
															+{
														
 
															+    int clock_type = QEMU_CLOCK_REALTIME;
														
 
															+
														
 
															+    if (qtest_enabled()) {
														
 
															+        /* For testing block IO throttling only */
														
 
															+        clock_type = QEMU_CLOCK_VIRTUAL;
														
 
															+    }
														
 
															+    assert(!bs->io_limits_enabled);
														
 
															+    throttle_init(&bs->throttle_state,
														
 
															+                  bdrv_get_aio_context(bs),
														
 
															+                  clock_type,
														
 
															+                  bdrv_throttle_read_timer_cb,
														
 
															+                  bdrv_throttle_write_timer_cb,
														
 
															+                  bs);
														
 
															+    bs->io_limits_enabled = true;
														
 
															+}
														
 
															+
														
 
															+/* This function makes an IO wait if needed
														
 
															+ *
														
 
															+ * @nb_sectors: the number of sectors of the IO
														
 
															+ * @is_write:   is the IO a write
														
 
															+ */
														
 
															+static void bdrv_io_limits_intercept(BlockDriverState *bs,
														
 
															+                                     unsigned int bytes,
														
 
															+                                     bool is_write)
														
 
															+{
														
 
															+    /* does this io must wait */
														
 
															+    bool must_wait = throttle_schedule_timer(&bs->throttle_state, is_write);
														
 
															+
														
 
															+    /* if must wait or any request of this type throttled queue the IO */
														
 
															+    if (must_wait ||
														
 
															+        !qemu_co_queue_empty(&bs->throttled_reqs[is_write])) {
														
 
															+        qemu_co_queue_wait(&bs->throttled_reqs[is_write]);
														
 
															+    }
														
 
															+
														
 
															+    /* the IO will be executed, do the accounting */
														
 
															+    throttle_account(&bs->throttle_state, is_write, bytes);
														
 
															+
														
 
															+
														
 
															+    /* if the next request must wait -> do nothing */
														
 
															+    if (throttle_schedule_timer(&bs->throttle_state, is_write)) {
														
 
															+        return;
														
 
															+    }
														
 
															+
														
 
															+    /* else queue next request for execution */
														
 
															+    qemu_co_queue_next(&bs->throttled_reqs[is_write]);
														
 
															+}
														
 
															+
														
 
															+void bdrv_setup_io_funcs(BlockDriver *bdrv)
														
 
															+{
														
 
															+    /* Block drivers without coroutine functions need emulation */
														
 
															+    if (!bdrv->bdrv_co_readv) {
														
 
															+        bdrv->bdrv_co_readv = bdrv_co_readv_em;
														
 
															+        bdrv->bdrv_co_writev = bdrv_co_writev_em;
														
 
															+
														
 
															+        /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
														
 
															+         * the block driver lacks aio we need to emulate that too.
														
 
															+         */
														
 
															+        if (!bdrv->bdrv_aio_readv) {
														
 
															+            /* add AIO emulation layer */
														
 
															+            bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
														
 
															+            bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
														
 
															+        }
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															+void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
														
 
															+{
														
 
															+    BlockDriver *drv = bs->drv;
														
 
															+    Error *local_err = NULL;
														
 
															+
														
 
															+    memset(&bs->bl, 0, sizeof(bs->bl));
														
 
															+
														
 
															+    if (!drv) {
														
 
															+        return;
														
 
															+    }
														
 
															+
														
 
															+    /* Take some limits from the children as a default */
														
 
															+    if (bs->file) {
														
 
															+        bdrv_refresh_limits(bs->file, &local_err);
														
 
															+        if (local_err) {
														
 
															+            error_propagate(errp, local_err);
														
 
															+            return;
														
 
															+        }
														
 
															+        bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length;
														
 
															+        bs->bl.max_transfer_length = bs->file->bl.max_transfer_length;
														
 
															+        bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment;
														
 
															+    } else {
														
 
															+        bs->bl.opt_mem_alignment = 512;
														
 
															+    }
														
 
															+
														
 
															+    if (bs->backing_hd) {
														
 
															+        bdrv_refresh_limits(bs->backing_hd, &local_err);
														
 
															+        if (local_err) {
														
 
															+            error_propagate(errp, local_err);
														
 
															+            return;
														
 
															+        }
														
 
															+        bs->bl.opt_transfer_length =
														
 
															+            MAX(bs->bl.opt_transfer_length,
														
 
															+                bs->backing_hd->bl.opt_transfer_length);
														
 
															+        bs->bl.max_transfer_length =
														
 
															+            MIN_NON_ZERO(bs->bl.max_transfer_length,
														
 
															+                         bs->backing_hd->bl.max_transfer_length);
														
 
															+        bs->bl.opt_mem_alignment =
														
 
															+            MAX(bs->bl.opt_mem_alignment,
														
 
															+                bs->backing_hd->bl.opt_mem_alignment);
														
 
															+    }
														
 
															+
														
 
															+    /* Then let the driver override it */
														
 
															+    if (drv->bdrv_refresh_limits) {
														
 
															+        drv->bdrv_refresh_limits(bs, errp);
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * The copy-on-read flag is actually a reference count so multiple users may
														
 
															+ * use the feature without worrying about clobbering its previous state.
														
 
															+ * Copy-on-read stays enabled until all users have called to disable it.
														
 
															+ */
														
 
															+void bdrv_enable_copy_on_read(BlockDriverState *bs)
														
 
															+{
														
 
															+    bs->copy_on_read++;
														
 
															+}
														
 
															+
														
 
															+void bdrv_disable_copy_on_read(BlockDriverState *bs)
														
 
															+{
														
 
															+    assert(bs->copy_on_read > 0);
														
 
															+    bs->copy_on_read--;
														
 
															+}
														
 
															+
														
 
															+/* Check if any requests are in-flight (including throttled requests) */
														
 
															+static bool bdrv_requests_pending(BlockDriverState *bs)
														
 
															+{
														
 
															+    if (!QLIST_EMPTY(&bs->tracked_requests)) {
														
 
															+        return true;
														
 
															+    }
														
 
															+    if (!qemu_co_queue_empty(&bs->throttled_reqs[0])) {
														
 
															+        return true;
														
 
															+    }
														
 
															+    if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) {
														
 
															+        return true;
														
 
															+    }
														
 
															+    if (bs->file && bdrv_requests_pending(bs->file)) {
														
 
															+        return true;
														
 
															+    }
														
 
															+    if (bs->backing_hd && bdrv_requests_pending(bs->backing_hd)) {
														
 
															+        return true;
														
 
															+    }
														
 
															+    return false;
														
 
															+}
														
 
															+
														
 
															+static bool bdrv_drain_one(BlockDriverState *bs)
														
 
															+{
														
 
															+    bool bs_busy;
														
 
															+
														
 
															+    bdrv_flush_io_queue(bs);
														
 
															+    bdrv_start_throttled_reqs(bs);
														
 
															+    bs_busy = bdrv_requests_pending(bs);
														
 
															+    bs_busy |= aio_poll(bdrv_get_aio_context(bs), bs_busy);
														
 
															+    return bs_busy;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Wait for pending requests to complete on a single BlockDriverState subtree
														
 
															+ *
														
 
															+ * See the warning in bdrv_drain_all().  This function can only be called if
														
 
															+ * you are sure nothing can generate I/O because you have op blockers
														
 
															+ * installed.
														
 
															+ *
														
 
															+ * Note that unlike bdrv_drain_all(), the caller must hold the BlockDriverState
														
 
															+ * AioContext.
														
 
															+ */
														
 
															+void bdrv_drain(BlockDriverState *bs)
														
 
															+{
														
 
															+    while (bdrv_drain_one(bs)) {
														
 
															+        /* Keep iterating */
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Wait for pending requests to complete across all BlockDriverStates
														
 
															+ *
														
 
															+ * This function does not flush data to disk, use bdrv_flush_all() for that
														
 
															+ * after calling this function.
														
 
															+ *
														
 
															+ * Note that completion of an asynchronous I/O operation can trigger any
														
 
															+ * number of other I/O operations on other devices---for example a coroutine
														
 
															+ * can be arbitrarily complex and a constant flow of I/O can come until the
														
 
															+ * coroutine is complete.  Because of this, it is not possible to have a
														
 
															+ * function to drain a single device's I/O queue.
														
 
															+ */
														
 
															+void bdrv_drain_all(void)
														
 
															+{
														
 
															+    /* Always run first iteration so any pending completion BHs run */
														
 
															+    bool busy = true;
														
 
															+    BlockDriverState *bs = NULL;
														
 
															+
														
 
															+    while ((bs = bdrv_next(bs))) {
														
 
															+        AioContext *aio_context = bdrv_get_aio_context(bs);
														
 
															+
														
 
															+        aio_context_acquire(aio_context);
														
 
															+        if (bs->job) {
														
 
															+            block_job_pause(bs->job);
														
 
															+        }
														
 
															+        aio_context_release(aio_context);
														
 
															+    }
														
 
															+
														
 
															+    while (busy) {
														
 
															+        busy = false;
														
 
															+        bs = NULL;
														
 
															+
														
 
															+        while ((bs = bdrv_next(bs))) {
														
 
															+            AioContext *aio_context = bdrv_get_aio_context(bs);
														
 
															+
														
 
															+            aio_context_acquire(aio_context);
														
 
															+            busy |= bdrv_drain_one(bs);
														
 
															+            aio_context_release(aio_context);
														
 
															+        }
														
 
															+    }
														
 
															+
														
 
															+    bs = NULL;
														
 
															+    while ((bs = bdrv_next(bs))) {
														
 
															+        AioContext *aio_context = bdrv_get_aio_context(bs);
														
 
															+
														
 
															+        aio_context_acquire(aio_context);
														
 
															+        if (bs->job) {
														
 
															+            block_job_resume(bs->job);
														
 
															+        }
														
 
															+        aio_context_release(aio_context);
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * Remove an active request from the tracked requests list
														
 
															+ *
														
 
															+ * This function should be called when a tracked request is completing.
														
 
															+ */
														
 
															+static void tracked_request_end(BdrvTrackedRequest *req)
														
 
															+{
														
 
															+    if (req->serialising) {
														
 
															+        req->bs->serialising_in_flight--;
														
 
															+    }
														
 
															+
														
 
															+    QLIST_REMOVE(req, list);
														
 
															+    qemu_co_queue_restart_all(&req->wait_queue);
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * Add an active request to the tracked requests list
														
 
															+ */
														
 
															+static void tracked_request_begin(BdrvTrackedRequest *req,
														
 
															+                                  BlockDriverState *bs,
														
 
															+                                  int64_t offset,
														
 
															+                                  unsigned int bytes, bool is_write)
														
 
															+{
														
 
															+    *req = (BdrvTrackedRequest){
														
 
															+        .bs = bs,
														
 
															+        .offset         = offset,
														
 
															+        .bytes          = bytes,
														
 
															+        .is_write       = is_write,
														
 
															+        .co             = qemu_coroutine_self(),
														
 
															+        .serialising    = false,
														
 
															+        .overlap_offset = offset,
														
 
															+        .overlap_bytes  = bytes,
														
 
															+    };
														
 
															+
														
 
															+    qemu_co_queue_init(&req->wait_queue);
														
 
															+
														
 
															+    QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
														
 
															+}
														
 
															+
														
 
															+static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
														
 
															+{
														
 
															+    int64_t overlap_offset = req->offset & ~(align - 1);
														
 
															+    unsigned int overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
														
 
															+                               - overlap_offset;
														
 
															+
														
 
															+    if (!req->serialising) {
														
 
															+        req->bs->serialising_in_flight++;
														
 
															+        req->serialising = true;
														
 
															+    }
														
 
															+
														
 
															+    req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
														
 
															+    req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * Round a region to cluster boundaries
														
 
															+ */
														
 
															+void bdrv_round_to_clusters(BlockDriverState *bs,
														
 
															+                            int64_t sector_num, int nb_sectors,
														
 
															+                            int64_t *cluster_sector_num,
														
 
															+                            int *cluster_nb_sectors)
														
 
															+{
														
 
															+    BlockDriverInfo bdi;
														
 
															+
														
 
															+    if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
														
 
															+        *cluster_sector_num = sector_num;
														
 
															+        *cluster_nb_sectors = nb_sectors;
														
 
															+    } else {
														
 
															+        int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
														
 
															+        *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
														
 
															+        *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
														
 
															+                                            nb_sectors, c);
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															+static int bdrv_get_cluster_size(BlockDriverState *bs)
														
 
															+{
														
 
															+    BlockDriverInfo bdi;
														
 
															+    int ret;
														
 
															+
														
 
															+    ret = bdrv_get_info(bs, &bdi);
														
 
															+    if (ret < 0 || bdi.cluster_size == 0) {
														
 
															+        return bs->request_alignment;
														
 
															+    } else {
														
 
															+        return bdi.cluster_size;
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															+static bool tracked_request_overlaps(BdrvTrackedRequest *req,
														
 
															+                                     int64_t offset, unsigned int bytes)
														
 
															+{
														
 
															+    /*        aaaa   bbbb */
														
 
															+    if (offset >= req->overlap_offset + req->overlap_bytes) {
														
 
															+        return false;
														
 
															+    }
														
 
															+    /* bbbb   aaaa        */
														
 
															+    if (req->overlap_offset >= offset + bytes) {
														
 
															+        return false;
														
 
															+    }
														
 
															+    return true;
														
 
															+}
														
 
															+
														
 
															+static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
														
 
															+{
														
 
															+    BlockDriverState *bs = self->bs;
														
 
															+    BdrvTrackedRequest *req;
														
 
															+    bool retry;
														
 
															+    bool waited = false;
														
 
															+
														
 
															+    if (!bs->serialising_in_flight) {
														
 
															+        return false;
														
 
															+    }
														
 
															+
														
 
															+    do {
														
 
															+        retry = false;
														
 
															+        QLIST_FOREACH(req, &bs->tracked_requests, list) {
														
 
															+            if (req == self || (!req->serialising && !self->serialising)) {
														
 
															+                continue;
														
 
															+            }
														
 
															+            if (tracked_request_overlaps(req, self->overlap_offset,
														
 
															+                                         self->overlap_bytes))
														
 
															+            {
														
 
															+                /* Hitting this means there was a reentrant request, for
														
 
															+                 * example, a block driver issuing nested requests.  This must
														
 
															+                 * never happen since it means deadlock.
														
 
															+                 */
														
 
															+                assert(qemu_coroutine_self() != req->co);
														
 
															+
														
 
															+                /* If the request is already (indirectly) waiting for us, or
														
 
															+                 * will wait for us as soon as it wakes up, then just go on
														
 
															+                 * (instead of producing a deadlock in the former case). */
														
 
															+                if (!req->waiting_for) {
														
 
															+                    self->waiting_for = req;
														
 
															+                    qemu_co_queue_wait(&req->wait_queue);
														
 
															+                    self->waiting_for = NULL;
														
 
															+                    retry = true;
														
 
															+                    waited = true;
														
 
															+                    break;
														
 
															+                }
														
 
															+            }
														
 
															+        }
														
 
															+    } while (retry);
														
 
															+
														
 
															+    return waited;
														
 
															+}
														
 
															+
														
 
															+static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
														
 
															+                                   size_t size)
														
 
															+{
														
 
															+    if (size > BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS) {
														
 
															+        return -EIO;
														
 
															+    }
														
 
															+
														
 
															+    if (!bdrv_is_inserted(bs)) {
														
 
															+        return -ENOMEDIUM;
														
 
															+    }
														
 
															+
														
 
															+    if (offset < 0) {
														
 
															+        return -EIO;
														
 
															+    }
														
 
															+
														
 
															+    return 0;
														
 
															+}
														
 
															+
														
 
															+static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
														
 
															+                              int nb_sectors)
														
 
															+{
														
 
															+    if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
														
 
															+        return -EIO;
														
 
															+    }
														
 
															+
														
 
															+    return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
														
 
															+                                   nb_sectors * BDRV_SECTOR_SIZE);
														
 
															+}
														
 
															+
														
 
															+typedef struct RwCo {
														
 
															+    BlockDriverState *bs;
														
 
															+    int64_t offset;
														
 
															+    QEMUIOVector *qiov;
														
 
															+    bool is_write;
														
 
															+    int ret;
														
 
															+    BdrvRequestFlags flags;
														
 
															+} RwCo;
														
 
															+
														
 
															+static void coroutine_fn bdrv_rw_co_entry(void *opaque)
														
 
															+{
														
 
															+    RwCo *rwco = opaque;
														
 
															+
														
 
															+    if (!rwco->is_write) {
														
 
															+        rwco->ret = bdrv_co_do_preadv(rwco->bs, rwco->offset,
														
 
															+                                      rwco->qiov->size, rwco->qiov,
														
 
															+                                      rwco->flags);
														
 
															+    } else {
														
 
															+        rwco->ret = bdrv_co_do_pwritev(rwco->bs, rwco->offset,
														
 
															+                                       rwco->qiov->size, rwco->qiov,
														
 
															+                                       rwco->flags);
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Process a vectored synchronous request using coroutines
														
 
															+ */
														
 
															+static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset,
														
 
															+                        QEMUIOVector *qiov, bool is_write,
														
 
															+                        BdrvRequestFlags flags)
														
 
															+{
														
 
															+    Coroutine *co;
														
 
															+    RwCo rwco = {
														
 
															+        .bs = bs,
														
 
															+        .offset = offset,
														
 
															+        .qiov = qiov,
														
 
															+        .is_write = is_write,
														
 
															+        .ret = NOT_DONE,
														
 
															+        .flags = flags,
														
 
															+    };
														
 
															+
														
 
															+    /**
														
 
															+     * In sync call context, when the vcpu is blocked, this throttling timer
														
 
															+     * will not fire; so the I/O throttling function has to be disabled here
														
 
															+     * if it has been enabled.
														
 
															+     */
														
 
															+    if (bs->io_limits_enabled) {
														
 
															+        fprintf(stderr, "Disabling I/O throttling on '%s' due "
														
 
															+                        "to synchronous I/O.\n", bdrv_get_device_name(bs));
														
 
															+        bdrv_io_limits_disable(bs);
														
 
															+    }
														
 
															+
														
 
															+    if (qemu_in_coroutine()) {
														
 
															+        /* Fast-path if already in coroutine context */
														
 
															+        bdrv_rw_co_entry(&rwco);
														
 
															+    } else {
														
 
															+        AioContext *aio_context = bdrv_get_aio_context(bs);
														
 
															+
														
 
															+        co = qemu_coroutine_create(bdrv_rw_co_entry);
														
 
															+        qemu_coroutine_enter(co, &rwco);
														
 
															+        while (rwco.ret == NOT_DONE) {
														
 
															+            aio_poll(aio_context, true);
														
 
															+        }
														
 
															+    }
														
 
															+    return rwco.ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Process a synchronous request using coroutines
														
 
															+ */
														
 
															+static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
														
 
															+                      int nb_sectors, bool is_write, BdrvRequestFlags flags)
														
 
															+{
														
 
															+    QEMUIOVector qiov;
														
 
															+    struct iovec iov = {
														
 
															+        .iov_base = (void *)buf,
														
 
															+        .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
														
 
															+    };
														
 
															+
														
 
															+    if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
														
 
															+        return -EINVAL;
														
 
															+    }
														
 
															+
														
 
															+    qemu_iovec_init_external(&qiov, &iov, 1);
														
 
															+    return bdrv_prwv_co(bs, sector_num << BDRV_SECTOR_BITS,
														
 
															+                        &qiov, is_write, flags);
														
 
															+}
														
 
															+
														
 
															+/* return < 0 if error. See bdrv_write() for the return codes */
														
 
															+int bdrv_read(BlockDriverState *bs, int64_t sector_num,
														
 
															+              uint8_t *buf, int nb_sectors)
														
 
															+{
														
 
															+    return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false, 0);
														
 
															+}
														
 
															+
														
 
															+/* Just like bdrv_read(), but with I/O throttling temporarily disabled */
														
 
															+int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
														
 
															+                          uint8_t *buf, int nb_sectors)
														
 
															+{
														
 
															+    bool enabled;
														
 
															+    int ret;
														
 
															+
														
 
															+    enabled = bs->io_limits_enabled;
														
 
															+    bs->io_limits_enabled = false;
														
 
															+    ret = bdrv_read(bs, sector_num, buf, nb_sectors);
														
 
															+    bs->io_limits_enabled = enabled;
														
 
															+    return ret;
														
 
															+}
														
 
															+
														
 
															+/* Return < 0 if error. Important errors are:
														
 
															+  -EIO         generic I/O error (may happen for all errors)
														
 
															+  -ENOMEDIUM   No media inserted.
														
 
															+  -EINVAL      Invalid sector number or nb_sectors
														
 
															+  -EACCES      Trying to write a read-only device
														
 
															+*/
														
 
															+int bdrv_write(BlockDriverState *bs, int64_t sector_num,
														
 
															+               const uint8_t *buf, int nb_sectors)
														
 
															+{
														
 
															+    return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
														
 
															+}
														
 
															+
														
 
															+int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
														
 
															+                      int nb_sectors, BdrvRequestFlags flags)
														
 
															+{
														
 
															+    return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true,
														
 
															+                      BDRV_REQ_ZERO_WRITE | flags);
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Completely zero out a block device with the help of bdrv_write_zeroes.
														
 
															+ * The operation is sped up by checking the block status and only writing
														
 
															+ * zeroes to the device if they currently do not return zeroes. Optional
														
 
															+ * flags are passed through to bdrv_write_zeroes (e.g. BDRV_REQ_MAY_UNMAP).
														
 
															+ *
														
 
															+ * Returns < 0 on error, 0 on success. For error codes see bdrv_write().
														
 
															+ */
														
 
															+int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
														
 
															+{
														
 
															+    int64_t target_sectors, ret, nb_sectors, sector_num = 0;
														
 
															+    int n;
														
 
															+
														
 
															+    target_sectors = bdrv_nb_sectors(bs);
														
 
															+    if (target_sectors < 0) {
														
 
															+        return target_sectors;
														
 
															+    }
														
 
															+
														
 
															+    for (;;) {
														
 
															+        nb_sectors = MIN(target_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS);
														
 
															+        if (nb_sectors <= 0) {
														
 
															+            return 0;
														
 
															+        }
														
 
															+        ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n);
														
 
															+        if (ret < 0) {
														
 
															+            error_report("error getting block status at sector %" PRId64 ": %s",
														
 
															+                         sector_num, strerror(-ret));
														
 
															+            return ret;
														
 
															+        }
														
 
															+        if (ret & BDRV_BLOCK_ZERO) {
														
 
															+            sector_num += n;
														
 
															+            continue;
														
 
															+        }
														
 
															+        ret = bdrv_write_zeroes(bs, sector_num, n, flags);
														
 
															+        if (ret < 0) {
														
 
															+            error_report("error writing zeroes at sector %" PRId64 ": %s",
														
 
															+                         sector_num, strerror(-ret));
														
 
															+            return ret;
														
 
															+        }
														
 
															+        sector_num += n;
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															+int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes)
														
 
															+{
														
 
															+    QEMUIOVector qiov;
														
 
															+    struct iovec iov = {
														
 
															+        .iov_base = (void *)buf,
														
 
															+        .iov_len = bytes,
														
 
															+    };
														
 
															+    int ret;
														
 
															+
														
 
															+    if (bytes < 0) {
														
 
															+        return -EINVAL;
														
 
															+    }
														
 
															+
														
 
															+    qemu_iovec_init_external(&qiov, &iov, 1);
														
 
															+    ret = bdrv_prwv_co(bs, offset, &qiov, false, 0);
														
 
															+    if (ret < 0) {
														
 
															+        return ret;
														
 
															+    }
														
 
															+
														
 
															+    return bytes;
														
 
															+}
														
 
															+
														
 
															+int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov)
														
 
															+{
														
 
															+    int ret;
														
 
															+
														
 
															+    ret = bdrv_prwv_co(bs, offset, qiov, true, 0);
														
 
															+    if (ret < 0) {
														
 
															+        return ret;
														
 
															+    }
														
 
															+
														
 
															+    return qiov->size;
														
 
															+}
														
 
															+
														
 
															+int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
														
 
															+                const void *buf, int bytes)
														
 
															+{
														
 
															+    QEMUIOVector qiov;
														
 
															+    struct iovec iov = {
														
 
															+        .iov_base   = (void *) buf,
														
 
															+        .iov_len    = bytes,
														
 
															+    };
														
 
															+
														
 
															+    if (bytes < 0) {
														
 
															+        return -EINVAL;
														
 
															+    }
														
 
															+
														
 
															+    qemu_iovec_init_external(&qiov, &iov, 1);
														
 
															+    return bdrv_pwritev(bs, offset, &qiov);
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Writes to the file and ensures that no writes are reordered across this
														
 
															+ * request (acts as a barrier)
														
 
															+ *
														
 
															+ * Returns 0 on success, -errno in error cases.
														
 
															+ */
														
 
															+int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
														
 
															+    const void *buf, int count)
														
 
															+{
														
 
															+    int ret;
														
 
															+
														
 
															+    ret = bdrv_pwrite(bs, offset, buf, count);
														
 
															+    if (ret < 0) {
														
 
															+        return ret;
														
 
															+    }
														
 
															+
														
 
															+    /* No flush needed for cache modes that already do it */
														
 
															+    if (bs->enable_write_cache) {
														
 
															+        bdrv_flush(bs);
														
 
															+    }
														
 
															+
														
 
															+    return 0;
														
 
															+}
														
 
															+
														
 
															+static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
														
 
															+        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
														
 
															+{
														
 
															+    /* Perform I/O through a temporary buffer so that users who scribble over
														
 
															+     * their read buffer while the operation is in progress do not end up
														
 
															+     * modifying the image file.  This is critical for zero-copy guest I/O
														
 
															+     * where anything might happen inside guest memory.
														
 
															+     */
														
 
															+    void *bounce_buffer;
														
 
															+
														
 
															+    BlockDriver *drv = bs->drv;
														
 
															+    struct iovec iov;
														
 
															+    QEMUIOVector bounce_qiov;
														
 
															+    int64_t cluster_sector_num;
														
 
															+    int cluster_nb_sectors;
														
 
															+    size_t skip_bytes;
														
 
															+    int ret;
														
 
															+
														
 
															+    /* Cover entire cluster so no additional backing file I/O is required when
														
 
															+     * allocating cluster in the image file.
														
 
															+     */
														
 
															+    bdrv_round_to_clusters(bs, sector_num, nb_sectors,
														
 
															+                           &cluster_sector_num, &cluster_nb_sectors);
														
 
															+
														
 
															+    trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
														
 
															+                                   cluster_sector_num, cluster_nb_sectors);
														
 
															+
														
 
															+    iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
														
 
															+    iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len);
														
 
															+    if (bounce_buffer == NULL) {
														
 
															+        ret = -ENOMEM;
														
 
															+        goto err;
														
 
															+    }
														
 
															+
														
 
															+    qemu_iovec_init_external(&bounce_qiov, &iov, 1);
														
 
															+
														
 
															+    ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
														
 
															+                             &bounce_qiov);
														
 
															+    if (ret < 0) {
														
 
															+        goto err;
														
 
															+    }
														
 
															+
														
 
															+    if (drv->bdrv_co_write_zeroes &&
														
 
															+        buffer_is_zero(bounce_buffer, iov.iov_len)) {
														
 
															+        ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
														
 
															+                                      cluster_nb_sectors, 0);
														
 
															+    } else {
														
 
															+        /* This does not change the data on the disk, it is not necessary
														
 
															+         * to flush even in cache=writethrough mode.
														
 
															+         */
														
 
															+        ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
														
 
															+                                  &bounce_qiov);
														
 
															+    }
														
 
															+
														
 
															+    if (ret < 0) {
														
 
															+        /* It might be okay to ignore write errors for guest requests.  If this
														
 
															+         * is a deliberate copy-on-read then we don't want to ignore the error.
														
 
															+         * Simply report it in all cases.
														
 
															+         */
														
 
															+        goto err;
														
 
															+    }
														
 
															+
														
 
															+    skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
														
 
															+    qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes,
														
 
															+                        nb_sectors * BDRV_SECTOR_SIZE);
														
 
															+
														
 
															+err:
														
 
															+    qemu_vfree(bounce_buffer);
														
 
															+    return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Forwards an already correctly aligned request to the BlockDriver. This
														
 
															+ * handles copy on read and zeroing after EOF; any other features must be
														
 
															+ * implemented by the caller.
														
 
															+ */
														
 
															+static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
														
 
															+    BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
														
 
															+    int64_t align, QEMUIOVector *qiov, int flags)
														
 
															+{
														
 
															+    BlockDriver *drv = bs->drv;
														
 
															+    int ret;
														
 
															+
														
 
															+    int64_t sector_num = offset >> BDRV_SECTOR_BITS;
														
 
															+    unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
														
 
															+
														
 
															+    assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
														
 
															+    assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
														
 
															+    assert(!qiov || bytes == qiov->size);
														
 
															+
														
 
															+    /* Handle Copy on Read and associated serialisation */
														
 
															+    if (flags & BDRV_REQ_COPY_ON_READ) {
														
 
															+        /* If we touch the same cluster it counts as an overlap.  This
														
 
															+         * guarantees that allocating writes will be serialized and not race
														
 
															+         * with each other for the same cluster.  For example, in copy-on-read
														
 
															+         * it ensures that the CoR read and write operations are atomic and
														
 
															+         * guest writes cannot interleave between them. */
														
 
															+        mark_request_serialising(req, bdrv_get_cluster_size(bs));
														
 
															+    }
														
 
															+
														
 
															+    wait_serialising_requests(req);
														
 
															+
														
 
															+    if (flags & BDRV_REQ_COPY_ON_READ) {
														
 
															+        int pnum;
														
 
															+
														
 
															+        ret = bdrv_is_allocated(bs, sector_num, nb_sectors, &pnum);
														
 
															+        if (ret < 0) {
														
 
															+            goto out;
														
 
															+        }
														
 
															+
														
 
															+        if (!ret || pnum != nb_sectors) {
														
 
															+            ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
														
 
															+            goto out;
														
 
															+        }
														
 
															+    }
														
 
															+
														
 
															+    /* Forward the request to the BlockDriver */
														
 
															+    if (!bs->zero_beyond_eof) {
														
 
															+        ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
														
 
															+    } else {
														
 
															+        /* Read zeros after EOF */
														
 
															+        int64_t total_sectors, max_nb_sectors;
														
 
															+
														
 
															+        total_sectors = bdrv_nb_sectors(bs);
														
 
															+        if (total_sectors < 0) {
														
 
															+            ret = total_sectors;
														
 
															+            goto out;
														
 
															+        }
														
 
															+
														
 
															+        max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num),
														
 
															+                                  align >> BDRV_SECTOR_BITS);
														
 
															+        if (nb_sectors < max_nb_sectors) {
														
 
															+            ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
														
 
															+        } else if (max_nb_sectors > 0) {
														
 
															+            QEMUIOVector local_qiov;
														
 
															+
														
 
															+            qemu_iovec_init(&local_qiov, qiov->niov);
														
 
															+            qemu_iovec_concat(&local_qiov, qiov, 0,
														
 
															+                              max_nb_sectors * BDRV_SECTOR_SIZE);
														
 
															+
														
 
															+            ret = drv->bdrv_co_readv(bs, sector_num, max_nb_sectors,
														
 
															+                                     &local_qiov);
														
 
															+
														
 
															+            qemu_iovec_destroy(&local_qiov);
														
 
															+        } else {
														
 
															+            ret = 0;
														
 
															+        }
														
 
															+
														
 
															+        /* Reading beyond end of file is supposed to produce zeroes */
														
 
															+        if (ret == 0 && total_sectors < sector_num + nb_sectors) {
														
 
															+            uint64_t offset = MAX(0, total_sectors - sector_num);
														
 
															+            uint64_t bytes = (sector_num + nb_sectors - offset) *
														
 
															+                              BDRV_SECTOR_SIZE;
														
 
															+            qemu_iovec_memset(qiov, offset * BDRV_SECTOR_SIZE, 0, bytes);
														
 
															+        }
														
 
															+    }
														
 
															+
														
 
															+out:
														
 
															+    return ret;
														
 
															+}
														
 
															+
														
 
															+static inline uint64_t bdrv_get_align(BlockDriverState *bs)
														
 
															+{
														
 
															+    /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
														
 
															+    return MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
														
 
															+}
														
 
															+
														
 
															+static inline bool bdrv_req_is_aligned(BlockDriverState *bs,
														
 
															+                                       int64_t offset, size_t bytes)
														
 
															+{
														
 
															+    int64_t align = bdrv_get_align(bs);
														
 
															+    return !(offset & (align - 1) || (bytes & (align - 1)));
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Handle a read request in coroutine context
														
 
															+ */
														
 
															+static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
														
 
															+    int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
														
 
															+    BdrvRequestFlags flags)
														
 
															+{
														
 
															+    BlockDriver *drv = bs->drv;
														
 
															+    BdrvTrackedRequest req;
														
 
															+
														
 
															+    uint64_t align = bdrv_get_align(bs);
														
 
															+    uint8_t *head_buf = NULL;
														
 
															+    uint8_t *tail_buf = NULL;
														
 
															+    QEMUIOVector local_qiov;
														
 
															+    bool use_local_qiov = false;
														
 
															+    int ret;
														
 
															+
														
 
															+    if (!drv) {
														
 
															+        return -ENOMEDIUM;
														
 
															+    }
														
 
															+
														
 
															+    ret = bdrv_check_byte_request(bs, offset, bytes);
														
 
															+    if (ret < 0) {
														
 
															+        return ret;
														
 
															+    }
														
 
															+
														
 
															+    if (bs->copy_on_read) {
														
 
															+        flags |= BDRV_REQ_COPY_ON_READ;
														
 
															+    }
														
 
															+
														
 
															+    /* throttling disk I/O */
														
 
															+    if (bs->io_limits_enabled) {
														
 
															+        bdrv_io_limits_intercept(bs, bytes, false);
														
 
															+    }
														
 
															+
														
 
															+    /* Align read if necessary by padding qiov */
														
 
															+    if (offset & (align - 1)) {
														
 
															+        head_buf = qemu_blockalign(bs, align);
														
 
															+        qemu_iovec_init(&local_qiov, qiov->niov + 2);
														
 
															+        qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
														
 
															+        qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
														
 
															+        use_local_qiov = true;
														
 
															+
														
 
															+        bytes += offset & (align - 1);
														
 
															+        offset = offset & ~(align - 1);
														
 
															+    }
														
 
															+
														
 
															+    if ((offset + bytes) & (align - 1)) {
														
 
															+        if (!use_local_qiov) {
														
 
															+            qemu_iovec_init(&local_qiov, qiov->niov + 1);
														
 
															+            qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
														
 
															+            use_local_qiov = true;
														
 
															+        }
														
 
															+        tail_buf = qemu_blockalign(bs, align);
														
 
															+        qemu_iovec_add(&local_qiov, tail_buf,
														
 
															+                       align - ((offset + bytes) & (align - 1)));
														
 
															+
														
 
															+        bytes = ROUND_UP(bytes, align);
														
 
															+    }
														
 
															+
														
 
															+    tracked_request_begin(&req, bs, offset, bytes, false);
														
 
															+    ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align,
														
 
															+                              use_local_qiov ? &local_qiov : qiov,
														
 
															+                              flags);
														
 
															+    tracked_request_end(&req);
														
 
															+
														
 
															+    if (use_local_qiov) {
														
 
															+        qemu_iovec_destroy(&local_qiov);
														
 
															+        qemu_vfree(head_buf);
														
 
															+        qemu_vfree(tail_buf);
														
 
															+    }
														
 
															+
														
 
															+    return ret;
														
 
															+}
														
 
															+
														
 
															+static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
														
 
															+    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
														
 
															+    BdrvRequestFlags flags)
														
 
															+{
														
 
															+    if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
														
 
															+        return -EINVAL;
														
 
															+    }
														
 
															+
														
 
															+    return bdrv_co_do_preadv(bs, sector_num << BDRV_SECTOR_BITS,
														
 
															+                             nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
														
 
															+}
														
 
															+
														
 
															+int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
														
 
															+    int nb_sectors, QEMUIOVector *qiov)
														
 
															+{
														
 
															+    trace_bdrv_co_readv(bs, sector_num, nb_sectors);
														
 
															+
														
 
															+    return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
														
 
															+}
														
 
															+
														
 
															+int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
														
 
															+    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
														
 
															+{
														
 
															+    trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
														
 
															+
														
 
															+    return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
														
 
															+                            BDRV_REQ_COPY_ON_READ);
														
 
															+}
														
 
															+
														
 
															+#define MAX_WRITE_ZEROES_BOUNCE_BUFFER 32768
														
 
															+
														
 
															+static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
														
 
															+    int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
														
 
															+{
														
 
															+    BlockDriver *drv = bs->drv;
														
 
															+    QEMUIOVector qiov;
														
 
															+    struct iovec iov = {0};
														
 
															+    int ret = 0;
														
 
															+
														
 
															+    int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_write_zeroes,
														
 
															+                                        BDRV_REQUEST_MAX_SECTORS);
														
 
															+
														
 
															+    while (nb_sectors > 0 && !ret) {
														
 
															+        int num = nb_sectors;
														
 
															+
														
 
															+        /* Align request.  Block drivers can expect the "bulk" of the request
														
 
															+         * to be aligned.
														
 
															+         */
														
 
															+        if (bs->bl.write_zeroes_alignment
														
 
															+            && num > bs->bl.write_zeroes_alignment) {
														
 
															+            if (sector_num % bs->bl.write_zeroes_alignment != 0) {
														
 
															+                /* Make a small request up to the first aligned sector.  */
														
 
															+                num = bs->bl.write_zeroes_alignment;
														
 
															+                num -= sector_num % bs->bl.write_zeroes_alignment;
														
 
															+            } else if ((sector_num + num) % bs->bl.write_zeroes_alignment != 0) {
														
 
															+                /* Shorten the request to the last aligned sector.  num cannot
														
 
															+                 * underflow because num > bs->bl.write_zeroes_alignment.
														
 
															+                 */
														
 
															+                num -= (sector_num + num) % bs->bl.write_zeroes_alignment;
														
 
															+            }
														
 
															+        }
														
 
															+
														
 
															+        /* limit request size */
														
 
															+        if (num > max_write_zeroes) {
														
 
															+            num = max_write_zeroes;
														
 
															+        }
														
 
															+
														
 
															+        ret = -ENOTSUP;
														
 
															+        /* First try the efficient write zeroes operation */
														
 
															+        if (drv->bdrv_co_write_zeroes) {
														
 
															+            ret = drv->bdrv_co_write_zeroes(bs, sector_num, num, flags);
														
 
															+        }
														
 
															+
														
 
															+        if (ret == -ENOTSUP) {
														
 
															+            /* Fall back to bounce buffer if write zeroes is unsupported */
														
 
															+            int max_xfer_len = MIN_NON_ZERO(bs->bl.max_transfer_length,
														
 
															+                                            MAX_WRITE_ZEROES_BOUNCE_BUFFER);
														
 
															+            num = MIN(num, max_xfer_len);
														
 
															+            iov.iov_len = num * BDRV_SECTOR_SIZE;
														
 
															+            if (iov.iov_base == NULL) {
														
 
															+                iov.iov_base = qemu_try_blockalign(bs, num * BDRV_SECTOR_SIZE);
														
 
															+                if (iov.iov_base == NULL) {
														
 
															+                    ret = -ENOMEM;
														
 
															+                    goto fail;
														
 
															+                }
														
 
															+                memset(iov.iov_base, 0, num * BDRV_SECTOR_SIZE);
														
 
															+            }
														
 
															+            qemu_iovec_init_external(&qiov, &iov, 1);
														
 
															+
														
 
															+            ret = drv->bdrv_co_writev(bs, sector_num, num, &qiov);
														
 
															+
														
 
															+            /* Keep bounce buffer around if it is big enough for all
														
 
															+             * all future requests.
														
 
															+             */
														
 
															+            if (num < max_xfer_len) {
														
 
															+                qemu_vfree(iov.iov_base);
														
 
															+                iov.iov_base = NULL;
														
 
															+            }
														
 
															+        }
														
 
															+
														
 
															+        sector_num += num;
														
 
															+        nb_sectors -= num;
														
 
															+    }
														
 
															+
														
 
															+fail:
														
 
															+    qemu_vfree(iov.iov_base);
														
 
															+    return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Forwards an already correctly aligned write request to the BlockDriver.
														
 
															+ */
														
 
															+static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
														
 
															+    BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
														
 
															+    QEMUIOVector *qiov, int flags)
														
 
															+{
														
 
															+    BlockDriver *drv = bs->drv;
														
 
															+    bool waited;
														
 
															+    int ret;
														
 
															+
														
 
															+    int64_t sector_num = offset >> BDRV_SECTOR_BITS;
														
 
															+    unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
														
 
															+
														
 
															+    assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
														
 
															+    assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
														
 
															+    assert(!qiov || bytes == qiov->size);
														
 
															+
														
 
															+    waited = wait_serialising_requests(req);
														
 
															+    assert(!waited || !req->serialising);
														
 
															+    assert(req->overlap_offset <= offset);
														
 
															+    assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
														
 
															+
														
 
															+    ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
														
 
															+
														
 
															+    if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF &&
														
 
															+        !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_write_zeroes &&
														
 
															+        qemu_iovec_is_zero(qiov)) {
														
 
															+        flags |= BDRV_REQ_ZERO_WRITE;
														
 
															+        if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
														
 
															+            flags |= BDRV_REQ_MAY_UNMAP;
														
 
															+        }
														
 
															+    }
														
 
															+
														
 
															+    if (ret < 0) {
														
 
															+        /* Do nothing, write notifier decided to fail this request */
														
 
															+    } else if (flags & BDRV_REQ_ZERO_WRITE) {
														
 
															+        BLKDBG_EVENT(bs, BLKDBG_PWRITEV_ZERO);
														
 
															+        ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
														
 
															+    } else {
														
 
															+        BLKDBG_EVENT(bs, BLKDBG_PWRITEV);
														
 
															+        ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
														
 
															+    }
														
 
															+    BLKDBG_EVENT(bs, BLKDBG_PWRITEV_DONE);
														
 
															+
														
 
															+    if (ret == 0 && !bs->enable_write_cache) {
														
 
															+        ret = bdrv_co_flush(bs);
														
 
															+    }
														
 
															+
														
 
															+    bdrv_set_dirty(bs, sector_num, nb_sectors);
														
 
															+
														
 
															+    block_acct_highest_sector(&bs->stats, sector_num, nb_sectors);
														
 
															+
														
 
															+    if (ret >= 0) {
														
 
															+        bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
														
 
															+    }
														
 
															+
														
 
															+    return ret;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Handle a write request in coroutine context
														
 
															+ */
														
 
															+static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
														
 
															+    int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
														
 
															+    BdrvRequestFlags flags)
														
 
															+{
														
 
															+    BdrvTrackedRequest req;
														
 
															+    uint64_t align = bdrv_get_align(bs);
														
 
															+    uint8_t *head_buf = NULL;
														
 
															+    uint8_t *tail_buf = NULL;
														
 
															+    QEMUIOVector local_qiov;
														
 
															+    bool use_local_qiov = false;
														
 
															+    int ret;
														
 
															+
														
 
															+    if (!bs->drv) {
														
 
															+        return -ENOMEDIUM;
														
 
															+    }
														
 
															+    if (bs->read_only) {
														
 
															+        return -EACCES;
														
 
															+    }
														
 
															+
														
 
															+    ret = bdrv_check_byte_request(bs, offset, bytes);
														
 
															+    if (ret < 0) {
														
 
															+        return ret;
														
 
															+    }
														
 
															+
														
 
															+    /* throttling disk I/O */
														
 
															+    if (bs->io_limits_enabled) {
														
 
															+        bdrv_io_limits_intercept(bs, bytes, true);
														
 
															+    }
														
 
															+
														
 
															+    /*
														
 
															+     * Align write if necessary by performing a read-modify-write cycle.
														
 
															+     * Pad qiov with the read parts and be sure to have a tracked request not
														
 
															+     * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle.
														
 
															+     */
														
 
															+    tracked_request_begin(&req, bs, offset, bytes, true);
														
 
															+
														
 
															+    if (offset & (align - 1)) {
														
 
															+        QEMUIOVector head_qiov;
														
 
															+        struct iovec head_iov;
														
 
															+
														
 
															+        mark_request_serialising(&req, align);
														
 
															+        wait_serialising_requests(&req);
														
 
															+
														
 
															+        head_buf = qemu_blockalign(bs, align);
														
 
															+        head_iov = (struct iovec) {
														
 
															+            .iov_base   = head_buf,
														
 
															+            .iov_len    = align,
														
 
															+        };
														
 
															+        qemu_iovec_init_external(&head_qiov, &head_iov, 1);
														
 
															+
														
 
															+        BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD);
														
 
															+        ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align,
														
 
															+                                  align, &head_qiov, 0);
														
 
															+        if (ret < 0) {
														
 
															+            goto fail;
														
 
															+        }
														
 
															+        BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
														
 
															+
														
 
															+        qemu_iovec_init(&local_qiov, qiov->niov + 2);
														
 
															+        qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
														
 
															+        qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
														
 
															+        use_local_qiov = true;
														
 
															+
														
 
															+        bytes += offset & (align - 1);
														
 
															+        offset = offset & ~(align - 1);
														
 
															+    }
														
 
															+
														
 
															+    if ((offset + bytes) & (align - 1)) {
														
 
															+        QEMUIOVector tail_qiov;
														
 
															+        struct iovec tail_iov;
														
 
															+        size_t tail_bytes;
														
 
															+        bool waited;
														
 
															+
														
 
															+        mark_request_serialising(&req, align);
														
 
															+        waited = wait_serialising_requests(&req);
														
 
															+        assert(!waited || !use_local_qiov);
														
 
															+
														
 
															+        tail_buf = qemu_blockalign(bs, align);
														
 
															+        tail_iov = (struct iovec) {
														
 
															+            .iov_base   = tail_buf,
														
 
															+            .iov_len    = align,
														
 
															+        };
														
 
															+        qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);
														
 
															+
														
 
															+        BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL);
														
 
															+        ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align,
														
 
															+                                  align, &tail_qiov, 0);
														
 
															+        if (ret < 0) {
														
 
															+            goto fail;
														
 
															+        }
														
 
															+        BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
														
 
															+
														
 
															+        if (!use_local_qiov) {
														
 
															+            qemu_iovec_init(&local_qiov, qiov->niov + 1);
														
 
															+            qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
														
 
															+            use_local_qiov = true;
														
 
															+        }
														
 
															+
														
 
															+        tail_bytes = (offset + bytes) & (align - 1);
														
 
															+        qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes);
														
 
															+
														
 
															+        bytes = ROUND_UP(bytes, align);
														
 
															+    }
														
 
															+
														
 
															+    if (use_local_qiov) {
														
 
															+        /* Local buffer may have non-zero data. */
														
 
															+        flags &= ~BDRV_REQ_ZERO_WRITE;
														
 
															+    }
														
 
															+    ret = bdrv_aligned_pwritev(bs, &req, offset, bytes,
														
 
															+                               use_local_qiov ? &local_qiov : qiov,
														
 
															+                               flags);
														
 
															+
														
 
															+fail:
														
 
															+    tracked_request_end(&req);
														
 
															+
														
 
															+    if (use_local_qiov) {
														
 
															+        qemu_iovec_destroy(&local_qiov);
														
 
															+    }
														
 
															+    qemu_vfree(head_buf);
														
 
															+    qemu_vfree(tail_buf);
														
 
															+
														
 
															+    return ret;
														
 
															+}
														
 
															+
														
 
															+static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
														
 
															+    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
														
 
															+    BdrvRequestFlags flags)
														
 
															+{
														
 
															+    if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
														
 
															+        return -EINVAL;
														
 
															+    }
														
 
															+
														
 
															+    return bdrv_co_do_pwritev(bs, sector_num << BDRV_SECTOR_BITS,
														
 
															+                              nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
														
 
															+}
														
 
															+
														
 
															+int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
														
 
															+    int nb_sectors, QEMUIOVector *qiov)
														
 
															+{
														
 
															+    trace_bdrv_co_writev(bs, sector_num, nb_sectors);
														
 
															+
														
 
															+    return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
														
 
															+}
														
 
															+
														
 
															+int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
														
 
															+                                      int64_t sector_num, int nb_sectors,
														
 
															+                                      BdrvRequestFlags flags)
														
 
															+{
														
 
															+    int ret;
														
 
															+
														
 
															+    trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags);
														
 
															+
														
 
															+    if (!(bs->open_flags & BDRV_O_UNMAP)) {
														
 
															+        flags &= ~BDRV_REQ_MAY_UNMAP;
														
 
															+    }
														
 
															+    if (bdrv_req_is_aligned(bs, sector_num << BDRV_SECTOR_BITS,
														
 
															+                            nb_sectors << BDRV_SECTOR_BITS)) {
														
 
															+        ret = bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
														
 
															+                                BDRV_REQ_ZERO_WRITE | flags);
														
 
															+    } else {
														
 
															+        uint8_t *buf;
														
 
															+        QEMUIOVector local_qiov;
														
 
															+        size_t bytes = nb_sectors << BDRV_SECTOR_BITS;
														
 
															+
														
 
															+        buf = qemu_memalign(bdrv_opt_mem_align(bs), bytes);
														
 
															+        memset(buf, 0, bytes);
														
 
															+        qemu_iovec_init(&local_qiov, 1);
														
 
															+        qemu_iovec_add(&local_qiov, buf, bytes);
														
 
															+
														
 
															+        ret = bdrv_co_do_writev(bs, sector_num, nb_sectors, &local_qiov,
														
 
															+                                BDRV_REQ_ZERO_WRITE | flags);
														
 
															+        qemu_vfree(buf);
														
 
															+    }
														
 
															+    return ret;
														
 
															+}
														
 
															+
														
 
															+int bdrv_flush_all(void)
														
 
															+{
														
 
															+    BlockDriverState *bs = NULL;
														
 
															+    int result = 0;
														
 
															+
														
 
															+    while ((bs = bdrv_next(bs))) {
														
 
															+        AioContext *aio_context = bdrv_get_aio_context(bs);
														
 
															+        int ret;
														
 
															+
														
 
															+        aio_context_acquire(aio_context);
														
 
															+        ret = bdrv_flush(bs);
														
 
															+        if (ret < 0 && !result) {
														
 
															+            result = ret;
														
 
															+        }
														
 
															+        aio_context_release(aio_context);
														
 
															+    }
														
 
															+
														
 
															+    return result;
														
 
															+}
														
 
															+
														
 
															+typedef struct BdrvCoGetBlockStatusData {
														
 
															+    BlockDriverState *bs;
														
 
															+    BlockDriverState *base;
														
 
															+    int64_t sector_num;
														
 
															+    int nb_sectors;
														
 
															+    int *pnum;
														
 
															+    int64_t ret;
														
 
															+    bool done;
														
 
															+} BdrvCoGetBlockStatusData;
														
 
															+
														
 
															+/*
														
 
															+ * Returns the allocation status of the specified sectors.
														
 
															+ * Drivers not implementing the functionality are assumed to not support
														
 
															+ * backing files, hence all their sectors are reported as allocated.
														
 
															+ *
														
 
															+ * If 'sector_num' is beyond the end of the disk image the return value is 0
														
 
															+ * and 'pnum' is set to 0.
														
 
															+ *
														
 
															+ * 'pnum' is set to the number of sectors (including and immediately following
														
 
															+ * the specified sector) that are known to be in the same
														
 
															+ * allocated/unallocated state.
														
 
															+ *
														
 
															+ * 'nb_sectors' is the max value 'pnum' should be set to.  If nb_sectors goes
														
 
															+ * beyond the end of the disk image it will be clamped.
														
 
															+ */
														
 
															+static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
														
 
															+                                                     int64_t sector_num,
														
 
															+                                                     int nb_sectors, int *pnum)
														
 
															+{
														
 
															+    int64_t total_sectors;
														
 
															+    int64_t n;
														
 
															+    int64_t ret, ret2;
														
 
															+
														
 
															+    total_sectors = bdrv_nb_sectors(bs);
														
 
															+    if (total_sectors < 0) {
														
 
															+        return total_sectors;
														
 
															+    }
														
 
															+
														
 
															+    if (sector_num >= total_sectors) {
														
 
															+        *pnum = 0;
														
 
															+        return 0;
														
 
															+    }
														
 
															+
														
 
															+    n = total_sectors - sector_num;
														
 
															+    if (n < nb_sectors) {
														
 
															+        nb_sectors = n;
														
 
															+    }
														
 
															+
														
 
															+    if (!bs->drv->bdrv_co_get_block_status) {
														
 
															+        *pnum = nb_sectors;
														
 
															+        ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED;
														
 
															+        if (bs->drv->protocol_name) {
														
 
															+            ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE);
														
 
															+        }
														
 
															+        return ret;
														
 
															+    }
														
 
															+
														
 
															+    ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum);
														
 
															+    if (ret < 0) {
														
 
															+        *pnum = 0;
														
 
															+        return ret;
														
 
															+    }
														
 
															+
														
 
															+    if (ret & BDRV_BLOCK_RAW) {
														
 
															+        assert(ret & BDRV_BLOCK_OFFSET_VALID);
														
 
															+        return bdrv_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
														
 
															+                                     *pnum, pnum);
														
 
															+    }
														
 
															+
														
 
															+    if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
														
 
															+        ret |= BDRV_BLOCK_ALLOCATED;
														
 
															+    }
														
 
															+
														
 
															+    if (!(ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO)) {
														
 
															+        if (bdrv_unallocated_blocks_are_zero(bs)) {
														
 
															+            ret |= BDRV_BLOCK_ZERO;
														
 
															+        } else if (bs->backing_hd) {
														
 
															+            BlockDriverState *bs2 = bs->backing_hd;
														
 
															+            int64_t nb_sectors2 = bdrv_nb_sectors(bs2);
														
 
															+            if (nb_sectors2 >= 0 && sector_num >= nb_sectors2) {
														
 
															+                ret |= BDRV_BLOCK_ZERO;
														
 
															+            }
														
 
															+        }
														
 
															+    }
														
 
															+
														
 
															+    if (bs->file &&
														
 
															+        (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
														
 
															+        (ret & BDRV_BLOCK_OFFSET_VALID)) {
														
 
															+        int file_pnum;
														
 
															+
														
 
															+        ret2 = bdrv_co_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
														
 
															+                                        *pnum, &file_pnum);
														
 
															+        if (ret2 >= 0) {
														
 
															+            /* Ignore errors.  This is just providing extra information, it
														
 
															+             * is useful but not necessary.
														
 
															+             */
														
 
															+            if (!file_pnum) {
														
 
															+                /* !file_pnum indicates an offset at or beyond the EOF; it is
														
 
															+                 * perfectly valid for the format block driver to point to such
														
 
															+                 * offsets, so catch it and mark everything as zero */
														
 
															+                ret |= BDRV_BLOCK_ZERO;
														
 
															+            } else {
														
 
															+                /* Limit request to the range reported by the protocol driver */
														
 
															+                *pnum = file_pnum;
														
 
															+                ret |= (ret2 & BDRV_BLOCK_ZERO);
														
 
															+            }
														
 
															+        }
														
 
															+    }
														
 
															+
														
 
															+    return ret;
														
 
															+}
														
 
															+
														
 
															+/* Coroutine wrapper for bdrv_get_block_status() */
														
 
															+static void coroutine_fn bdrv_get_block_status_co_entry(void *opaque)
														
 
															+{
														
 
															+    BdrvCoGetBlockStatusData *data = opaque;
														
 
															+    BlockDriverState *bs = data->bs;
														
 
															+
														
 
															+    data->ret = bdrv_co_get_block_status(bs, data->sector_num, data->nb_sectors,
														
 
															+                                         data->pnum);
														
 
															+    data->done = true;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Synchronous wrapper around bdrv_co_get_block_status().
														
 
															+ *
														
 
															+ * See bdrv_co_get_block_status() for details.
														
 
															+ */
														
 
															+int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num,
														
 
															+                              int nb_sectors, int *pnum)
														
 
															+{
														
 
															+    Coroutine *co;
														
 
															+    BdrvCoGetBlockStatusData data = {
														
 
															+        .bs = bs,
														
 
															+        .sector_num = sector_num,
														
 
															+        .nb_sectors = nb_sectors,
														
 
															+        .pnum = pnum,
														
 
															+        .done = false,
														
 
															+    };
														
 
															+
														
 
															+    if (qemu_in_coroutine()) {
														
 
															+        /* Fast-path if already in coroutine context */
														
 
															+        bdrv_get_block_status_co_entry(&data);
														
 
															+    } else {
														
 
															+        AioContext *aio_context = bdrv_get_aio_context(bs);
														
 
															+
														
 
															+        co = qemu_coroutine_create(bdrv_get_block_status_co_entry);
														
 
															+        qemu_coroutine_enter(co, &data);
														
 
															+        while (!data.done) {
														
 
															+            aio_poll(aio_context, true);
														
 
															+        }
														
 
															+    }
														
 
															+    return data.ret;
														
 
															+}
														
 
															+
														
 
															+int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num,
														
 
															+                                   int nb_sectors, int *pnum)
														
 
															+{
														
 
															+    int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum);
														
 
															+    if (ret < 0) {
														
 
															+        return ret;
														
 
															+    }
														
 
															+    return !!(ret & BDRV_BLOCK_ALLOCATED);
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
														
 
															+ *
														
 
															+ * Return true if the given sector is allocated in any image between
														
 
															+ * BASE and TOP (inclusive).  BASE can be NULL to check if the given
														
 
															+ * sector is allocated in any image of the chain.  Return false otherwise.
														
 
															+ *
														
 
															+ * 'pnum' is set to the number of sectors (including and immediately following
														
 
															+ *  the specified sector) that are known to be in the same
														
 
															+ *  allocated/unallocated state.
														
 
															+ *
														
 
															+ */
														
 
															+int bdrv_is_allocated_above(BlockDriverState *top,
														
 
															+                            BlockDriverState *base,
														
 
															+                            int64_t sector_num,
														
 
															+                            int nb_sectors, int *pnum)
														
 
															+{
														
 
															+    BlockDriverState *intermediate;
														
 
															+    int ret, n = nb_sectors;
														
 
															+
														
 
															+    intermediate = top;
														
 
															+    while (intermediate && intermediate != base) {
														
 
															+        int pnum_inter;
														
 
															+        ret = bdrv_is_allocated(intermediate, sector_num, nb_sectors,
														
 
															+                                &pnum_inter);
														
 
															+        if (ret < 0) {
														
 
															+            return ret;
														
 
															+        } else if (ret) {
														
 
															+            *pnum = pnum_inter;
														
 
															+            return 1;
														
 
															+        }
														
 
															+
														
 
															+        /*
														
 
															+         * [sector_num, nb_sectors] is unallocated on top but intermediate
														
 
															+         * might have
														
 
															+         *
														
 
															+         * [sector_num+x, nr_sectors] allocated.
														
 
															+         */
														
 
															+        if (n > pnum_inter &&
														
 
															+            (intermediate == top ||
														
 
															+             sector_num + pnum_inter < intermediate->total_sectors)) {
														
 
															+            n = pnum_inter;
														
 
															+        }
														
 
															+
														
 
															+        intermediate = intermediate->backing_hd;
														
 
															+    }
														
 
															+
														
 
															+    *pnum = n;
														
 
															+    return 0;
														
 
															+}
														
 
															+
														
 
															+int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
														
 
															+                          const uint8_t *buf, int nb_sectors)
														
 
															+{
														
 
															+    BlockDriver *drv = bs->drv;
														
 
															+    int ret;
														
 
															+
														
 
															+    if (!drv) {
														
 
															+        return -ENOMEDIUM;
														
 
															+    }
														
 
															+    if (!drv->bdrv_write_compressed) {
														
 
															+        return -ENOTSUP;
														
 
															+    }
														
 
															+    ret = bdrv_check_request(bs, sector_num, nb_sectors);
														
 
															+    if (ret < 0) {
														
 
															+        return ret;
														
 
															+    }
														
 
															+
														
 
															+    assert(QLIST_EMPTY(&bs->dirty_bitmaps));
														
 
															+
														
 
															+    return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
														
 
															+}
														
 
															+
														
 
															+int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
														
 
															+                      int64_t pos, int size)
														
 
															+{
														
 
															+    QEMUIOVector qiov;
														
 
															+    struct iovec iov = {
														
 
															+        .iov_base   = (void *) buf,
														
 
															+        .iov_len    = size,
														
 
															+    };
														
 
															+
														
 
															+    qemu_iovec_init_external(&qiov, &iov, 1);
														
 
															+    return bdrv_writev_vmstate(bs, &qiov, pos);
														
 
															+}
														
 
															+
														
 
															+int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
														
 
															+{
														
 
															+    BlockDriver *drv = bs->drv;
														
 
															+
														
 
															+    if (!drv) {
														
 
															+        return -ENOMEDIUM;
														
 
															+    } else if (drv->bdrv_save_vmstate) {
														
 
															+        return drv->bdrv_save_vmstate(bs, qiov, pos);
														
 
															+    } else if (bs->file) {
														
 
															+        return bdrv_writev_vmstate(bs->file, qiov, pos);
														
 
															+    }
														
 
															+
														
 
															+    return -ENOTSUP;
														
 
															+}
														
 
															+
														
 
															+int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
														
 
															+                      int64_t pos, int size)
														
 
															+{
														
 
															+    BlockDriver *drv = bs->drv;
														
 
															+    if (!drv)
														
 
															+        return -ENOMEDIUM;
														
 
															+    if (drv->bdrv_load_vmstate)
														
 
															+        return drv->bdrv_load_vmstate(bs, buf, pos, size);
														
 
															+    if (bs->file)
														
 
															+        return bdrv_load_vmstate(bs->file, buf, pos, size);
														
 
															+    return -ENOTSUP;
														
 
															+}
														
 
															+
														
 
															+/**************************************************************/
														
 
															+/* async I/Os */
														
 
															+
														
 
															+BlockAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
														
 
															+                           QEMUIOVector *qiov, int nb_sectors,
														
 
															+                           BlockCompletionFunc *cb, void *opaque)
														
 
															+{
														
 
															+    trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
														
 
															+
														
 
															+    return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
														
 
															+                                 cb, opaque, false);
														
 
															+}
														
 
															+
														
 
															+BlockAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
														
 
															+                            QEMUIOVector *qiov, int nb_sectors,
														
 
															+                            BlockCompletionFunc *cb, void *opaque)
														
 
															+{
														
 
															+    trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
														
 
															+
														
 
															+    return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
														
 
															+                                 cb, opaque, true);
														
 
															+}
														
 
															+
														
 
															+BlockAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs,
														
 
															+        int64_t sector_num, int nb_sectors, BdrvRequestFlags flags,
														
 
															+        BlockCompletionFunc *cb, void *opaque)
														
 
															+{
														
 
															+    trace_bdrv_aio_write_zeroes(bs, sector_num, nb_sectors, flags, opaque);
														
 
															+
														
 
															+    return bdrv_co_aio_rw_vector(bs, sector_num, NULL, nb_sectors,
														
 
															+                                 BDRV_REQ_ZERO_WRITE | flags,
														
 
															+                                 cb, opaque, true);
														
 
															+}
														
 
															+
														
 
															+
														
 
															+typedef struct MultiwriteCB {
														
 
															+    int error;
														
 
															+    int num_requests;
														
 
															+    int num_callbacks;
														
 
															+    struct {
														
 
															+        BlockCompletionFunc *cb;
														
 
															+        void *opaque;
														
 
															+        QEMUIOVector *free_qiov;
														
 
															+    } callbacks[];
														
 
															+} MultiwriteCB;
														
 
															+
														
 
															+static void multiwrite_user_cb(MultiwriteCB *mcb)
														
 
															+{
														
 
															+    int i;
														
 
															+
														
 
															+    for (i = 0; i < mcb->num_callbacks; i++) {
														
 
															+        mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
														
 
															+        if (mcb->callbacks[i].free_qiov) {
														
 
															+            qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
														
 
															+        }
														
 
															+        g_free(mcb->callbacks[i].free_qiov);
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															+static void multiwrite_cb(void *opaque, int ret)
														
 
															+{
														
 
															+    MultiwriteCB *mcb = opaque;
														
 
															+
														
 
															+    trace_multiwrite_cb(mcb, ret);
														
 
															+
														
 
															+    if (ret < 0 && !mcb->error) {
														
 
															+        mcb->error = ret;
														
 
															+    }
														
 
															+
														
 
															+    mcb->num_requests--;
														
 
															+    if (mcb->num_requests == 0) {
														
 
															+        multiwrite_user_cb(mcb);
														
 
															+        g_free(mcb);
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															+static int multiwrite_req_compare(const void *a, const void *b)
														
 
															+{
														
 
															+    const BlockRequest *req1 = a, *req2 = b;
														
 
															+
														
 
															+    /*
														
 
															+     * Note that we can't simply subtract req2->sector from req1->sector
														
 
															+     * here as that could overflow the return value.
														
 
															+     */
														
 
															+    if (req1->sector > req2->sector) {
														
 
															+        return 1;
														
 
															+    } else if (req1->sector < req2->sector) {
														
 
															+        return -1;
														
 
															+    } else {
														
 
															+        return 0;
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Takes a bunch of requests and tries to merge them. Returns the number of
														
 
															+ * requests that remain after merging.
														
 
															+ */
														
 
															+static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
														
 
															+    int num_reqs, MultiwriteCB *mcb)
														
 
															+{
														
 
															+    int i, outidx;
														
 
															+
														
 
															+    // Sort requests by start sector
														
 
															+    qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
														
 
															+
														
 
															+    // Check if adjacent requests touch the same clusters. If so, combine them,
														
 
															+    // filling up gaps with zero sectors.
														
 
															+    outidx = 0;
														
 
															+    for (i = 1; i < num_reqs; i++) {
														
 
															+        int merge = 0;
														
 
															+        int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
														
 
															+
														
 
															+        // Handle exactly sequential writes and overlapping writes.
														
 
															+        if (reqs[i].sector <= oldreq_last) {
														
 
															+            merge = 1;
														
 
															+        }
														
 
															+
														
 
															+        if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
														
 
															+            merge = 0;
														
 
															+        }
														
 
															+
														
 
															+        if (bs->bl.max_transfer_length && reqs[outidx].nb_sectors +
														
 
															+            reqs[i].nb_sectors > bs->bl.max_transfer_length) {
														
 
															+            merge = 0;
														
 
															+        }
														
 
															+
														
 
															+        if (merge) {
														
 
															+            size_t size;
														
 
															+            QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
														
 
															+            qemu_iovec_init(qiov,
														
 
															+                reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
														
 
															+
														
 
															+            // Add the first request to the merged one. If the requests are
														
 
															+            // overlapping, drop the last sectors of the first request.
														
 
															+            size = (reqs[i].sector - reqs[outidx].sector) << 9;
														
 
															+            qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size);
														
 
															+
														
 
															+            // We should need to add any zeros between the two requests
														
 
															+            assert (reqs[i].sector <= oldreq_last);
														
 
															+
														
 
															+            // Add the second request
														
 
															+            qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size);
														
 
															+
														
 
															+            // Add tail of first request, if necessary
														
 
															+            if (qiov->size < reqs[outidx].qiov->size) {
														
 
															+                qemu_iovec_concat(qiov, reqs[outidx].qiov, qiov->size,
														
 
															+                                  reqs[outidx].qiov->size - qiov->size);
														
 
															+            }
														
 
															+
														
 
															+            reqs[outidx].nb_sectors = qiov->size >> 9;
														
 
															+            reqs[outidx].qiov = qiov;
														
 
															+
														
 
															+            mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
														
 
															+        } else {
														
 
															+            outidx++;
														
 
															+            reqs[outidx].sector     = reqs[i].sector;
														
 
															+            reqs[outidx].nb_sectors = reqs[i].nb_sectors;
														
 
															+            reqs[outidx].qiov       = reqs[i].qiov;
														
 
															+        }
														
 
															+    }
														
 
															+
														
 
															+    block_acct_merge_done(&bs->stats, BLOCK_ACCT_WRITE, num_reqs - outidx - 1);
														
 
															+
														
 
															+    return outidx + 1;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Submit multiple AIO write requests at once.
														
 
															+ *
														
 
															+ * On success, the function returns 0 and all requests in the reqs array have
														
 
															+ * been submitted. In error case this function returns -1, and any of the
														
 
															+ * requests may or may not be submitted yet. In particular, this means that the
														
 
															+ * callback will be called for some of the requests, for others it won't. The
														
 
															+ * caller must check the error field of the BlockRequest to wait for the right
														
 
															+ * callbacks (if error != 0, no callback will be called).
														
 
															+ *
														
 
															+ * The implementation may modify the contents of the reqs array, e.g. to merge
														
 
															+ * requests. However, the fields opaque and error are left unmodified as they
														
 
															+ * are used to signal failure for a single request to the caller.
														
 
															+ */
														
 
															+int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
														
 
															+{
														
 
															+    MultiwriteCB *mcb;
														
 
															+    int i;
														
 
															+
														
 
															+    /* don't submit writes if we don't have a medium */
														
 
															+    if (bs->drv == NULL) {
														
 
															+        for (i = 0; i < num_reqs; i++) {
														
 
															+            reqs[i].error = -ENOMEDIUM;
														
 
															+        }
														
 
															+        return -1;
														
 
															+    }
														
 
															+
														
 
															+    if (num_reqs == 0) {
														
 
															+        return 0;
														
 
															+    }
														
 
															+
														
 
															+    // Create MultiwriteCB structure
														
 
															+    mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
														
 
															+    mcb->num_requests = 0;
														
 
															+    mcb->num_callbacks = num_reqs;
														
 
															+
														
 
															+    for (i = 0; i < num_reqs; i++) {
														
 
															+        mcb->callbacks[i].cb = reqs[i].cb;
														
 
															+        mcb->callbacks[i].opaque = reqs[i].opaque;
														
 
															+    }
														
 
															+
														
 
															+    // Check for mergable requests
														
 
															+    num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
														
 
															+
														
 
															+    trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
														
 
															+
														
 
															+    /* Run the aio requests. */
														
 
															+    mcb->num_requests = num_reqs;
														
 
															+    for (i = 0; i < num_reqs; i++) {
														
 
															+        bdrv_co_aio_rw_vector(bs, reqs[i].sector, reqs[i].qiov,
														
 
															+                              reqs[i].nb_sectors, reqs[i].flags,
														
 
															+                              multiwrite_cb, mcb,
														
 
															+                              true);
														
 
															+    }
														
 
															+
														
 
															+    return 0;
														
 
															+}
														
 
															+
														
 
															+void bdrv_aio_cancel(BlockAIOCB *acb)
														
 
															+{
														
 
															+    qemu_aio_ref(acb);
														
 
															+    bdrv_aio_cancel_async(acb);
														
 
															+    while (acb->refcnt > 1) {
														
 
															+        if (acb->aiocb_info->get_aio_context) {
														
 
															+            aio_poll(acb->aiocb_info->get_aio_context(acb), true);
														
 
															+        } else if (acb->bs) {
														
 
															+            aio_poll(bdrv_get_aio_context(acb->bs), true);
														
 
															+        } else {
														
 
															+            abort();
														
 
															+        }
														
 
															+    }
														
 
															+    qemu_aio_unref(acb);
														
 
															+}
														
 
															+
														
 
															+/* Async version of aio cancel. The caller is not blocked if the acb implements
														
 
															+ * cancel_async, otherwise we do nothing and let the request normally complete.
														
 
															+ * In either case the completion callback must be called. */
														
 
															+void bdrv_aio_cancel_async(BlockAIOCB *acb)
														
 
															+{
														
 
															+    if (acb->aiocb_info->cancel_async) {
														
 
															+        acb->aiocb_info->cancel_async(acb);
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															+/**************************************************************/
														
 
															+/* async block device emulation */
														
 
															+
														
 
															+typedef struct BlockAIOCBSync {
														
 
															+    BlockAIOCB common;
														
 
															+    QEMUBH *bh;
														
 
															+    int ret;
														
 
															+    /* vector translation state */
														
 
															+    QEMUIOVector *qiov;
														
 
															+    uint8_t *bounce;
														
 
															+    int is_write;
														
 
															+} BlockAIOCBSync;
														
 
															+
														
 
															+static const AIOCBInfo bdrv_em_aiocb_info = {
														
 
															+    .aiocb_size         = sizeof(BlockAIOCBSync),
														
 
															+};
														
 
															+
														
 
															+static void bdrv_aio_bh_cb(void *opaque)
														
 
															+{
														
 
															+    BlockAIOCBSync *acb = opaque;
														
 
															+
														
 
															+    if (!acb->is_write && acb->ret >= 0) {
														
 
															+        qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
														
 
															+    }
														
 
															+    qemu_vfree(acb->bounce);
														
 
															+    acb->common.cb(acb->common.opaque, acb->ret);
														
 
															+    qemu_bh_delete(acb->bh);
														
 
															+    acb->bh = NULL;
														
 
															+    qemu_aio_unref(acb);
														
 
															+}
														
 
															+
														
 
															+static BlockAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
														
 
															+                                      int64_t sector_num,
														
 
															+                                      QEMUIOVector *qiov,
														
 
															+                                      int nb_sectors,
														
 
															+                                      BlockCompletionFunc *cb,
														
 
															+                                      void *opaque,
														
 
															+                                      int is_write)
														
 
															+
														
 
															+{
														
 
															+    BlockAIOCBSync *acb;
														
 
															+
														
 
															+    acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque);
														
 
															+    acb->is_write = is_write;
														
 
															+    acb->qiov = qiov;
														
 
															+    acb->bounce = qemu_try_blockalign(bs, qiov->size);
														
 
															+    acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_aio_bh_cb, acb);
														
 
															+
														
 
															+    if (acb->bounce == NULL) {
														
 
															+        acb->ret = -ENOMEM;
														
 
															+    } else if (is_write) {
														
 
															+        qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
														
 
															+        acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
														
 
															+    } else {
														
 
															+        acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
														
 
															+    }
														
 
															+
														
 
															+    qemu_bh_schedule(acb->bh);
														
 
															+
														
 
															+    return &acb->common;
														
 
															+}
														
 
															+
														
 
															+static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
														
 
															+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
														
 
															+        BlockCompletionFunc *cb, void *opaque)
														
 
															+{
														
 
															+    return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
														
 
															+}
														
 
															+
														
 
															+static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
														
 
															+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
														
 
															+        BlockCompletionFunc *cb, void *opaque)
														
 
															+{
														
 
															+    return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
														
 
															+}
														
 
															+
														
 
															+
														
 
															+typedef struct BlockAIOCBCoroutine {
														
 
															+    BlockAIOCB common;
														
 
															+    BlockRequest req;
														
 
															+    bool is_write;
														
 
															+    bool need_bh;
														
 
															+    bool *done;
														
 
															+    QEMUBH* bh;
														
 
															+} BlockAIOCBCoroutine;
														
 
															+
														
 
															+static const AIOCBInfo bdrv_em_co_aiocb_info = {
														
 
															+    .aiocb_size         = sizeof(BlockAIOCBCoroutine),
														
 
															+};
														
 
															+
														
 
															+static void bdrv_co_complete(BlockAIOCBCoroutine *acb)
														
 
															+{
														
 
															+    if (!acb->need_bh) {
														
 
															+        acb->common.cb(acb->common.opaque, acb->req.error);
														
 
															+        qemu_aio_unref(acb);
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															+static void bdrv_co_em_bh(void *opaque)
														
 
															+{
														
 
															+    BlockAIOCBCoroutine *acb = opaque;
														
 
															+
														
 
															+    assert(!acb->need_bh);
														
 
															+    qemu_bh_delete(acb->bh);
														
 
															+    bdrv_co_complete(acb);
														
 
															+}
														
 
															+
														
 
															+static void bdrv_co_maybe_schedule_bh(BlockAIOCBCoroutine *acb)
														
 
															+{
														
 
															+    acb->need_bh = false;
														
 
															+    if (acb->req.error != -EINPROGRESS) {
														
 
															+        BlockDriverState *bs = acb->common.bs;
														
 
															+
														
 
															+        acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
														
 
															+        qemu_bh_schedule(acb->bh);
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															+/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
														
 
															+static void coroutine_fn bdrv_co_do_rw(void *opaque)
														
 
															+{
														
 
															+    BlockAIOCBCoroutine *acb = opaque;
														
 
															+    BlockDriverState *bs = acb->common.bs;
														
 
															+
														
 
															+    if (!acb->is_write) {
														
 
															+        acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
														
 
															+            acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
														
 
															+    } else {
														
 
															+        acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
														
 
															+            acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
														
 
															+    }
														
 
															+
														
 
															+    bdrv_co_complete(acb);
														
 
															+}
														
 
															+
														
 
															+static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
														
 
															+                                         int64_t sector_num,
														
 
															+                                         QEMUIOVector *qiov,
														
 
															+                                         int nb_sectors,
														
 
															+                                         BdrvRequestFlags flags,
														
 
															+                                         BlockCompletionFunc *cb,
														
 
															+                                         void *opaque,
														
 
															+                                         bool is_write)
														
 
															+{
														
 
															+    Coroutine *co;
														
 
															+    BlockAIOCBCoroutine *acb;
														
 
															+
														
 
															+    acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
														
 
															+    acb->need_bh = true;
														
 
															+    acb->req.error = -EINPROGRESS;
														
 
															+    acb->req.sector = sector_num;
														
 
															+    acb->req.nb_sectors = nb_sectors;
														
 
															+    acb->req.qiov = qiov;
														
 
															+    acb->req.flags = flags;
														
 
															+    acb->is_write = is_write;
														
 
															+
														
 
															+    co = qemu_coroutine_create(bdrv_co_do_rw);
														
 
															+    qemu_coroutine_enter(co, acb);
														
 
															+
														
 
															+    bdrv_co_maybe_schedule_bh(acb);
														
 
															+    return &acb->common;
														
 
															+}
														
 
															+
														
 
															+static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
														
 
															+{
														
 
															+    BlockAIOCBCoroutine *acb = opaque;
														
 
															+    BlockDriverState *bs = acb->common.bs;
														
 
															+
														
 
															+    acb->req.error = bdrv_co_flush(bs);
														
 
															+    bdrv_co_complete(acb);
														
 
															+}
														
 
															+
														
 
															+BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs,
														
 
															+        BlockCompletionFunc *cb, void *opaque)
														
 
															+{
														
 
															+    trace_bdrv_aio_flush(bs, opaque);
														
 
															+
														
 
															+    Coroutine *co;
														
 
															+    BlockAIOCBCoroutine *acb;
														
 
															+
														
 
															+    acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
														
 
															+    acb->need_bh = true;
														
 
															+    acb->req.error = -EINPROGRESS;
														
 
															+
														
 
															+    co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
														
 
															+    qemu_coroutine_enter(co, acb);
														
 
															+
														
 
															+    bdrv_co_maybe_schedule_bh(acb);
														
 
															+    return &acb->common;
														
 
															+}
														
 
															+
														
 
															+static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
														
 
															+{
														
 
															+    BlockAIOCBCoroutine *acb = opaque;
														
 
															+    BlockDriverState *bs = acb->common.bs;
														
 
															+
														
 
															+    acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
														
 
															+    bdrv_co_complete(acb);
														
 
															+}
														
 
															+
														
 
															+BlockAIOCB *bdrv_aio_discard(BlockDriverState *bs,
														
 
															+        int64_t sector_num, int nb_sectors,
														
 
															+        BlockCompletionFunc *cb, void *opaque)
														
 
															+{
														
 
															+    Coroutine *co;
														
 
															+    BlockAIOCBCoroutine *acb;
														
 
															+
														
 
															+    trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
														
 
															+
														
 
															+    acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
														
 
															+    acb->need_bh = true;
														
 
															+    acb->req.error = -EINPROGRESS;
														
 
															+    acb->req.sector = sector_num;
														
 
															+    acb->req.nb_sectors = nb_sectors;
														
 
															+    co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
														
 
															+    qemu_coroutine_enter(co, acb);
														
 
															+
														
 
															+    bdrv_co_maybe_schedule_bh(acb);
														
 
															+    return &acb->common;
														
 
															+}
														
 
															+
														
 
															+void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
														
 
															+                   BlockCompletionFunc *cb, void *opaque)
														
 
															+{
														
 
															+    BlockAIOCB *acb;
														
 
															+
														
 
															+    acb = g_slice_alloc(aiocb_info->aiocb_size);
														
 
															+    acb->aiocb_info = aiocb_info;
														
 
															+    acb->bs = bs;
														
 
															+    acb->cb = cb;
														
 
															+    acb->opaque = opaque;
														
 
															+    acb->refcnt = 1;
														
 
															+    return acb;
														
 
															+}
														
 
															+
														
 
															+void qemu_aio_ref(void *p)
														
 
															+{
														
 
															+    BlockAIOCB *acb = p;
														
 
															+    acb->refcnt++;
														
 
															+}
														
 
															+
														
 
															+void qemu_aio_unref(void *p)
														
 
															+{
														
 
															+    BlockAIOCB *acb = p;
														
 
															+    assert(acb->refcnt > 0);
														
 
															+    if (--acb->refcnt == 0) {
														
 
															+        g_slice_free1(acb->aiocb_info->aiocb_size, acb);
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															+/**************************************************************/
														
 
															+/* Coroutine block device emulation */
														
 
															+
														
 
															+typedef struct CoroutineIOCompletion {
														
 
															+    Coroutine *coroutine;
														
 
															+    int ret;
														
 
															+} CoroutineIOCompletion;
														
 
															+
														
 
															+static void bdrv_co_io_em_complete(void *opaque, int ret)
														
 
															+{
														
 
															+    CoroutineIOCompletion *co = opaque;
														
 
															+
														
 
															+    co->ret = ret;
														
 
															+    qemu_coroutine_enter(co->coroutine, NULL);
														
 
															+}
														
 
															+
														
 
															+static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
														
 
															+                                      int nb_sectors, QEMUIOVector *iov,
														
 
															+                                      bool is_write)
														
 
															+{
														
 
															+    CoroutineIOCompletion co = {
														
 
															+        .coroutine = qemu_coroutine_self(),
														
 
															+    };
														
 
															+    BlockAIOCB *acb;
														
 
															+
														
 
															+    if (is_write) {
														
 
															+        acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
														
 
															+                                       bdrv_co_io_em_complete, &co);
														
 
															+    } else {
														
 
															+        acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
														
 
															+                                      bdrv_co_io_em_complete, &co);
														
 
															+    }
														
 
															+
														
 
															+    trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
														
 
															+    if (!acb) {
														
 
															+        return -EIO;
														
 
															+    }
														
 
															+    qemu_coroutine_yield();
														
 
															+
														
 
															+    return co.ret;
														
 
															+}
														
 
															+
														
 
															+static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
														
 
															+                                         int64_t sector_num, int nb_sectors,
														
 
															+                                         QEMUIOVector *iov)
														
 
															+{
														
 
															+    return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
														
 
															+}
														
 
															+
														
 
															+static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
														
 
															+                                         int64_t sector_num, int nb_sectors,
														
 
															+                                         QEMUIOVector *iov)
														
 
															+{
														
 
															+    return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
														
 
															+}
														
 
															+
														
 
															+static void coroutine_fn bdrv_flush_co_entry(void *opaque)
														
 
															+{
														
 
															+    RwCo *rwco = opaque;
														
 
															+
														
 
															+    rwco->ret = bdrv_co_flush(rwco->bs);
														
 
															+}
														
 
															+
														
 
															+int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
														
 
															+{
														
 
															+    int ret;
														
 
															+
														
 
															+    if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
														
 
															+        return 0;
														
 
															+    }
														
 
															+
														
 
															+    /* Write back cached data to the OS even with cache=unsafe */
														
 
															+    BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
														
 
															+    if (bs->drv->bdrv_co_flush_to_os) {
														
 
															+        ret = bs->drv->bdrv_co_flush_to_os(bs);
														
 
															+        if (ret < 0) {
														
 
															+            return ret;
														
 
															+        }
														
 
															+    }
														
 
															+
														
 
															+    /* But don't actually force it to the disk with cache=unsafe */
														
 
															+    if (bs->open_flags & BDRV_O_NO_FLUSH) {
														
 
															+        goto flush_parent;
														
 
															+    }
														
 
															+
														
 
															+    BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
														
 
															+    if (bs->drv->bdrv_co_flush_to_disk) {
														
 
															+        ret = bs->drv->bdrv_co_flush_to_disk(bs);
														
 
															+    } else if (bs->drv->bdrv_aio_flush) {
														
 
															+        BlockAIOCB *acb;
														
 
															+        CoroutineIOCompletion co = {
														
 
															+            .coroutine = qemu_coroutine_self(),
														
 
															+        };
														
 
															+
														
 
															+        acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
														
 
															+        if (acb == NULL) {
														
 
															+            ret = -EIO;
														
 
															+        } else {
														
 
															+            qemu_coroutine_yield();
														
 
															+            ret = co.ret;
														
 
															+        }
														
 
															+    } else {
														
 
															+        /*
														
 
															+         * Some block drivers always operate in either writethrough or unsafe
														
 
															+         * mode and don't support bdrv_flush therefore. Usually qemu doesn't
														
 
															+         * know how the server works (because the behaviour is hardcoded or
														
 
															+         * depends on server-side configuration), so we can't ensure that
														
 
															+         * everything is safe on disk. Returning an error doesn't work because
														
 
															+         * that would break guests even if the server operates in writethrough
														
 
															+         * mode.
														
 
															+         *
														
 
															+         * Let's hope the user knows what he's doing.
														
 
															+         */
														
 
															+        ret = 0;
														
 
															+    }
														
 
															+    if (ret < 0) {
														
 
															+        return ret;
														
 
															+    }
														
 
															+
														
 
															+    /* Now flush the underlying protocol.  It will also have BDRV_O_NO_FLUSH
														
 
															+     * in the case of cache=unsafe, so there are no useless flushes.
														
 
															+     */
														
 
															+flush_parent:
														
 
															+    return bdrv_co_flush(bs->file);
														
 
															+}
														
 
															+
														
 
															+int bdrv_flush(BlockDriverState *bs)
														
 
															+{
														
 
															+    Coroutine *co;
														
 
															+    RwCo rwco = {
														
 
															+        .bs = bs,
														
 
															+        .ret = NOT_DONE,
														
 
															+    };
														
 
															+
														
 
															+    if (qemu_in_coroutine()) {
														
 
															+        /* Fast-path if already in coroutine context */
														
 
															+        bdrv_flush_co_entry(&rwco);
														
 
															+    } else {
														
 
															+        AioContext *aio_context = bdrv_get_aio_context(bs);
														
 
															+
														
 
															+        co = qemu_coroutine_create(bdrv_flush_co_entry);
														
 
															+        qemu_coroutine_enter(co, &rwco);
														
 
															+        while (rwco.ret == NOT_DONE) {
														
 
															+            aio_poll(aio_context, true);
														
 
															+        }
														
 
															+    }
														
 
															+
														
 
															+    return rwco.ret;
														
 
															+}
														
 
															+
														
 
															+typedef struct DiscardCo {
														
 
															+    BlockDriverState *bs;
														
 
															+    int64_t sector_num;
														
 
															+    int nb_sectors;
														
 
															+    int ret;
														
 
															+} DiscardCo;
														
 
															+static void coroutine_fn bdrv_discard_co_entry(void *opaque)
														
 
															+{
														
 
															+    DiscardCo *rwco = opaque;
														
 
															+
														
 
															+    rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
														
 
															+}
														
 
															+
														
 
															+int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
														
 
															+                                 int nb_sectors)
														
 
															+{
														
 
															+    int max_discard, ret;
														
 
															+
														
 
															+    if (!bs->drv) {
														
 
															+        return -ENOMEDIUM;
														
 
															+    }
														
 
															+
														
 
															+    ret = bdrv_check_request(bs, sector_num, nb_sectors);
														
 
															+    if (ret < 0) {
														
 
															+        return ret;
														
 
															+    } else if (bs->read_only) {
														
 
															+        return -EROFS;
														
 
															+    }
														
 
															+
														
 
															+    bdrv_reset_dirty(bs, sector_num, nb_sectors);
														
 
															+
														
 
															+    /* Do nothing if disabled.  */
														
 
															+    if (!(bs->open_flags & BDRV_O_UNMAP)) {
														
 
															+        return 0;
														
 
															+    }
														
 
															+
														
 
															+    if (!bs->drv->bdrv_co_discard && !bs->drv->bdrv_aio_discard) {
														
 
															+        return 0;
														
 
															+    }
														
 
															+
														
 
															+    max_discard = MIN_NON_ZERO(bs->bl.max_discard, BDRV_REQUEST_MAX_SECTORS);
														
 
															+    while (nb_sectors > 0) {
														
 
															+        int ret;
														
 
															+        int num = nb_sectors;
														
 
															+
														
 
															+        /* align request */
														
 
															+        if (bs->bl.discard_alignment &&
														
 
															+            num >= bs->bl.discard_alignment &&
														
 
															+            sector_num % bs->bl.discard_alignment) {
														
 
															+            if (num > bs->bl.discard_alignment) {
														
 
															+                num = bs->bl.discard_alignment;
														
 
															+            }
														
 
															+            num -= sector_num % bs->bl.discard_alignment;
														
 
															+        }
														
 
															+
														
 
															+        /* limit request size */
														
 
															+        if (num > max_discard) {
														
 
															+            num = max_discard;
														
 
															+        }
														
 
															+
														
 
															+        if (bs->drv->bdrv_co_discard) {
														
 
															+            ret = bs->drv->bdrv_co_discard(bs, sector_num, num);
														
 
															+        } else {
														
 
															+            BlockAIOCB *acb;
														
 
															+            CoroutineIOCompletion co = {
														
 
															+                .coroutine = qemu_coroutine_self(),
														
 
															+            };
														
 
															+
														
 
															+            acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
														
 
															+                                            bdrv_co_io_em_complete, &co);
														
 
															+            if (acb == NULL) {
														
 
															+                return -EIO;
														
 
															+            } else {
														
 
															+                qemu_coroutine_yield();
														
 
															+                ret = co.ret;
														
 
															+            }
														
 
															+        }
														
 
															+        if (ret && ret != -ENOTSUP) {
														
 
															+            return ret;
														
 
															+        }
														
 
															+
														
 
															+        sector_num += num;
														
 
															+        nb_sectors -= num;
														
 
															+    }
														
 
															+    return 0;
														
 
															+}
														
 
															+
														
 
															+int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
														
 
															+{
														
 
															+    Coroutine *co;
														
 
															+    DiscardCo rwco = {
														
 
															+        .bs = bs,
														
 
															+        .sector_num = sector_num,
														
 
															+        .nb_sectors = nb_sectors,
														
 
															+        .ret = NOT_DONE,
														
 
															+    };
														
 
															+
														
 
															+    if (qemu_in_coroutine()) {
														
 
															+        /* Fast-path if already in coroutine context */
														
 
															+        bdrv_discard_co_entry(&rwco);
														
 
															+    } else {
														
 
															+        AioContext *aio_context = bdrv_get_aio_context(bs);
														
 
															+
														
 
															+        co = qemu_coroutine_create(bdrv_discard_co_entry);
														
 
															+        qemu_coroutine_enter(co, &rwco);
														
 
															+        while (rwco.ret == NOT_DONE) {
														
 
															+            aio_poll(aio_context, true);
														
 
															+        }
														
 
															+    }
														
 
															+
														
 
															+    return rwco.ret;
														
 
															+}
														
 
															+
														
 
															+/* needed for generic scsi interface */
														
 
															+
														
 
															+int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
														
 
															+{
														
 
															+    BlockDriver *drv = bs->drv;
														
 
															+
														
 
															+    if (drv && drv->bdrv_ioctl)
														
 
															+        return drv->bdrv_ioctl(bs, req, buf);
														
 
															+    return -ENOTSUP;
														
 
															+}
														
 
															+
														
 
															+BlockAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
														
 
															+        unsigned long int req, void *buf,
														
 
															+        BlockCompletionFunc *cb, void *opaque)
														
 
															+{
														
 
															+    BlockDriver *drv = bs->drv;
														
 
															+
														
 
															+    if (drv && drv->bdrv_aio_ioctl)
														
 
															+        return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
														
 
															+    return NULL;
														
 
															+}
														
 
															+
														
 
															+void *qemu_blockalign(BlockDriverState *bs, size_t size)
														
 
															+{
														
 
															+    return qemu_memalign(bdrv_opt_mem_align(bs), size);
														
 
															+}
														
 
															+
														
 
															+void *qemu_blockalign0(BlockDriverState *bs, size_t size)
														
 
															+{
														
 
															+    return memset(qemu_blockalign(bs, size), 0, size);
														
 
															+}
														
 
															+
														
 
															+void *qemu_try_blockalign(BlockDriverState *bs, size_t size)
														
 
															+{
														
 
															+    size_t align = bdrv_opt_mem_align(bs);
														
 
															+
														
 
															+    /* Ensure that NULL is never returned on success */
														
 
															+    assert(align > 0);
														
 
															+    if (size == 0) {
														
 
															+        size = align;
														
 
															+    }
														
 
															+
														
 
															+    return qemu_try_memalign(align, size);
														
 
															+}
														
 
															+
														
 
															+void *qemu_try_blockalign0(BlockDriverState *bs, size_t size)
														
 
															+{
														
 
															+    void *mem = qemu_try_blockalign(bs, size);
														
 
															+
														
 
															+    if (mem) {
														
 
															+        memset(mem, 0, size);
														
 
															+    }
														
 
															+
														
 
															+    return mem;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Check if all memory in this vector is sector aligned.
														
 
															+ */
														
 
															+bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
														
 
															+{
														
 
															+    int i;
														
 
															+    size_t alignment = bdrv_opt_mem_align(bs);
														
 
															+
														
 
															+    for (i = 0; i < qiov->niov; i++) {
														
 
															+        if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
														
 
															+            return false;
														
 
															+        }
														
 
															+        if (qiov->iov[i].iov_len % alignment) {
														
 
															+            return false;
														
 
															+        }
														
 
															+    }
														
 
															+
														
 
															+    return true;
														
 
															+}
														
 
															+
														
 
															+void bdrv_add_before_write_notifier(BlockDriverState *bs,
														
 
															+                                    NotifierWithReturn *notifier)
														
 
															+{
														
 
															+    notifier_with_return_list_add(&bs->before_write_notifiers, notifier);
														
 
															+}
														
 
															+
														
 
															+void bdrv_io_plug(BlockDriverState *bs)
														
 
															+{
														
 
															+    BlockDriver *drv = bs->drv;
														
 
															+    if (drv && drv->bdrv_io_plug) {
														
 
															+        drv->bdrv_io_plug(bs);
														
 
															+    } else if (bs->file) {
														
 
															+        bdrv_io_plug(bs->file);
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															+void bdrv_io_unplug(BlockDriverState *bs)
														
 
															+{
														
 
															+    BlockDriver *drv = bs->drv;
														
 
															+    if (drv && drv->bdrv_io_unplug) {
														
 
															+        drv->bdrv_io_unplug(bs);
														
 
															+    } else if (bs->file) {
														
 
															+        bdrv_io_unplug(bs->file);
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															+void bdrv_flush_io_queue(BlockDriverState *bs)
														
 
															+{
														
 
															+    BlockDriver *drv = bs->drv;
														
 
															+    if (drv && drv->bdrv_flush_io_queue) {
														
 
															+        drv->bdrv_flush_io_queue(bs);
														
 
															+    } else if (bs->file) {
														
 
															+        bdrv_flush_io_queue(bs->file);
														
 
															+    }
														
 
															+}
														
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -2,7 +2,7 @@
 
															  * QEMU Block driver for iSCSI images
														
 
															  *
														
 
															  * Copyright (c) 2010-2011 Ronnie Sahlberg <ronniesahlberg@gmail.com>
														
 
															- * Copyright (c) 2012-2014 Peter Lieven <pl@kamp.de>
														
 
															+ * Copyright (c) 2012-2015 Peter Lieven <pl@kamp.de>
														
 
															  *
														
 
															  * Permission is hereby granted, free of charge, to any person obtaining a copy
														
 
															  * of this software and associated documentation files (the "Software"), to deal
														
@@ -57,9 +57,6 @@ typedef struct IscsiLun {
 
															     int events;
														
 
															     QEMUTimer *nop_timer;
														
 
															     QEMUTimer *event_timer;
														
 
															-    uint8_t lbpme;
														
 
															-    uint8_t lbprz;
														
 
															-    uint8_t has_write_same;
														
 
															     struct scsi_inquiry_logical_block_provisioning lbp;
														
 
															     struct scsi_inquiry_block_limits bl;
														
 
															     unsigned char *zeroblock;
														
@@ -67,6 +64,11 @@ typedef struct IscsiLun {
 
															     int cluster_sectors;
														
 
															     bool use_16_for_rw;
														
 
															     bool write_protected;
														
 
															+    bool lbpme;
														
 
															+    bool lbprz;
														
 
															+    bool dpofua;
														
 
															+    bool has_write_same;
														
 
															+    bool force_next_flush;
														
 
															 } IscsiLun;
														
 
															 typedef struct IscsiTask {
														
@@ -79,6 +81,7 @@ typedef struct IscsiTask {
 
															     QEMUBH *bh;
														
 
															     IscsiLun *iscsilun;
														
 
															     QEMUTimer retry_timer;
														
 
															+    bool force_next_flush;
														
 
															 } IscsiTask;
														
 
															 typedef struct IscsiAIOCB {
														
@@ -100,7 +103,7 @@ typedef struct IscsiAIOCB {
 
															 #define NOP_INTERVAL 5000
														
 
															 #define MAX_NOP_FAILURES 3
														
 
															 #define ISCSI_CMD_RETRIES ARRAY_SIZE(iscsi_retry_times)
														
 
															-static const unsigned iscsi_retry_times[] = {8, 32, 128, 512, 2048};
														
 
															+static const unsigned iscsi_retry_times[] = {8, 32, 128, 512, 2048, 8192, 32768};
														
 
															 /* this threshold is a trade-off knob to choose between
														
 
															  * the potential additional overhead of an extra GET_LBA_STATUS request
														
@@ -183,10 +186,13 @@ iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
 
															                 iTask->do_retry = 1;
														
 
															                 goto out;
														
 
															             }
														
 
															-            if (status == SCSI_STATUS_BUSY) {
														
 
															+            /* status 0x28 is SCSI_TASK_SET_FULL. It was first introduced
														
 
															+             * in libiscsi 1.10.0. Hardcode this value here to avoid
														
 
															+             * the need to bump the libiscsi requirement to 1.10.0 */
														
 
															+            if (status == SCSI_STATUS_BUSY || status == 0x28) {
														
 
															                 unsigned retry_time =
														
 
															                     exp_random(iscsi_retry_times[iTask->retries - 1]);
														
 
															-                error_report("iSCSI Busy (retry #%u in %u ms): %s",
														
 
															+                error_report("iSCSI Busy/TaskSetFull (retry #%u in %u ms): %s",
														
 
															                              iTask->retries, retry_time,
														
 
															                              iscsi_get_error(iscsi));
														
 
															                 aio_timer_init(iTask->iscsilun->aio_context,
														
@@ -199,6 +205,8 @@ iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
 
															             }
														
 
															         }
														
 
															         error_report("iSCSI Failure: %s", iscsi_get_error(iscsi));
														
 
															+    } else {
														
 
															+        iTask->iscsilun->force_next_flush |= iTask->force_next_flush;
														
 
															     }
														
 
															 out:
														
@@ -369,6 +377,7 @@ static int coroutine_fn iscsi_co_writev(BlockDriverState *bs,
 
															     struct IscsiTask iTask;
														
 
															     uint64_t lba;
														
 
															     uint32_t num_sectors;
														
 
															+    int fua;
														
 
															     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
														
 
															         return -EINVAL;
														
@@ -384,15 +393,17 @@ static int coroutine_fn iscsi_co_writev(BlockDriverState *bs,
 
															     num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
														
 
															     iscsi_co_init_iscsitask(iscsilun, &iTask);
														
 
															 retry:
														
 
															+    fua = iscsilun->dpofua && !bs->enable_write_cache;
														
 
															+    iTask.force_next_flush = !fua;
														
 
															     if (iscsilun->use_16_for_rw) {
														
 
															         iTask.task = iscsi_write16_task(iscsilun->iscsi, iscsilun->lun, lba,
														
 
															                                         NULL, num_sectors * iscsilun->block_size,
														
 
															-                                        iscsilun->block_size, 0, 0, 0, 0, 0,
														
 
															+                                        iscsilun->block_size, 0, 0, fua, 0, 0,
														
 
															                                         iscsi_co_generic_cb, &iTask);
														
 
															     } else {
														
 
															         iTask.task = iscsi_write10_task(iscsilun->iscsi, iscsilun->lun, lba,
														
 
															                                         NULL, num_sectors * iscsilun->block_size,
														
 
															-                                        iscsilun->block_size, 0, 0, 0, 0, 0,
														
 
															+                                        iscsilun->block_size, 0, 0, fua, 0, 0,
														
 
															                                         iscsi_co_generic_cb, &iTask);
														
 
															     }
														
 
															     if (iTask.task == NULL) {
														
@@ -460,7 +471,7 @@ static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs,
 
															     *pnum = nb_sectors;
														
 
															     /* LUN does not support logical block provisioning */
														
 
															-    if (iscsilun->lbpme == 0) {
														
 
															+    if (!iscsilun->lbpme) {
														
 
															         goto out;
														
 
															     }
														
@@ -620,8 +631,12 @@ static int coroutine_fn iscsi_co_flush(BlockDriverState *bs)
 
															         return 0;
														
 
															     }
														
 
															-    iscsi_co_init_iscsitask(iscsilun, &iTask);
														
 
															+    if (!iscsilun->force_next_flush) {
														
 
															+        return 0;
														
 
															+    }
														
 
															+    iscsilun->force_next_flush = false;
														
 
															+    iscsi_co_init_iscsitask(iscsilun, &iTask);
														
 
															 retry:
														
 
															     if (iscsi_synchronizecache10_task(iscsilun->iscsi, iscsilun->lun, 0, 0, 0,
														
 
															                                       0, iscsi_co_generic_cb, &iTask) == NULL) {
														
@@ -917,6 +932,7 @@ coroutine_fn iscsi_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
 
															     }
														
 
															     iscsi_co_init_iscsitask(iscsilun, &iTask);
														
 
															+    iTask.force_next_flush = true;
														
 
															 retry:
														
 
															     if (use_16_for_ws) {
														
 
															         iTask.task = iscsi_writesame16_task(iscsilun->iscsi, iscsilun->lun, lba,
														
@@ -1121,8 +1137,8 @@ static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp)
 
															                 } else {
														
 
															                     iscsilun->block_size = rc16->block_length;
														
 
															                     iscsilun->num_blocks = rc16->returned_lba + 1;
														
 
															-                    iscsilun->lbpme = rc16->lbpme;
														
 
															-                    iscsilun->lbprz = rc16->lbprz;
														
 
															+                    iscsilun->lbpme = !!rc16->lbpme;
														
 
															+                    iscsilun->lbprz = !!rc16->lbprz;
														
 
															                     iscsilun->use_16_for_rw = (rc16->returned_lba > 0xffffffff);
														
 
															                 }
														
 
															             }
														
@@ -1253,11 +1269,12 @@ static void iscsi_attach_aio_context(BlockDriverState *bs,
 
															                                           iscsi_timed_set_events, iscsilun);
														
 
															 }
														
 
															-static bool iscsi_is_write_protected(IscsiLun *iscsilun)
														
 
															+static void iscsi_modesense_sync(IscsiLun *iscsilun)
														
 
															 {
														
 
															     struct scsi_task *task;
														
 
															     struct scsi_mode_sense *ms = NULL;
														
 
															-    bool wrprotected = false;
														
 
															+    iscsilun->write_protected = false;
														
 
															+    iscsilun->dpofua = false;
														
 
															     task = iscsi_modesense6_sync(iscsilun->iscsi, iscsilun->lun,
														
 
															                                  1, SCSI_MODESENSE_PC_CURRENT,
														
@@ -1278,13 +1295,13 @@ static bool iscsi_is_write_protected(IscsiLun *iscsilun)
 
															                      iscsi_get_error(iscsilun->iscsi));
														
 
															         goto out;
														
 
															     }
														
 
															-    wrprotected = ms->device_specific_parameter & 0x80;
														
 
															+    iscsilun->write_protected = ms->device_specific_parameter & 0x80;
														
 
															+    iscsilun->dpofua          = ms->device_specific_parameter & 0x10;
														
 
															 out:
														
 
															     if (task) {
														
 
															         scsi_free_scsi_task(task);
														
 
															     }
														
 
															-    return wrprotected;
														
 
															 }
														
 
															 /*
														
@@ -1403,7 +1420,8 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
 
															     scsi_free_scsi_task(task);
														
 
															     task = NULL;
														
 
															-    iscsilun->write_protected = iscsi_is_write_protected(iscsilun);
														
 
															+    iscsi_modesense_sync(iscsilun);
														
 
															+
														
 
															     /* Check the write protect flag of the LUN if we want to write */
														
 
															     if (iscsilun->type == TYPE_DISK && (flags & BDRV_O_RDWR) &&
														
 
															         iscsilun->write_protected) {
														
@@ -1481,7 +1499,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
 
															         iscsilun->bl.opt_unmap_gran * iscsilun->block_size <= 16 * 1024 * 1024) {
														
 
															         iscsilun->cluster_sectors = (iscsilun->bl.opt_unmap_gran *
														
 
															                                      iscsilun->block_size) >> BDRV_SECTOR_BITS;
														
 
															-        if (iscsilun->lbprz && !(bs->open_flags & BDRV_O_NOCACHE)) {
														
 
															+        if (iscsilun->lbprz) {
														
 
															             iscsilun->allocationmap = iscsi_allocationmap_init(iscsilun);
														
 
															             if (iscsilun->allocationmap == NULL) {
														
 
															                 ret = -ENOMEM;
														
@@ -1501,6 +1519,9 @@ out:
 
															     if (ret) {
														
 
															         if (iscsi != NULL) {
														
 
															+            if (iscsi_is_logged_in(iscsi)) {
														
 
															+                iscsi_logout_sync(iscsi);
														
 
															+            }
														
 
															             iscsi_destroy_context(iscsi);
														
 
															         }
														
 
															         memset(iscsilun, 0, sizeof(IscsiLun));
														
@@ -1514,6 +1535,9 @@ static void iscsi_close(BlockDriverState *bs)
 
															     struct iscsi_context *iscsi = iscsilun->iscsi;
														
 
															     iscsi_detach_aio_context(bs);
														
 
															+    if (iscsi_is_logged_in(iscsi)) {
														
 
															+        iscsi_logout_sync(iscsi);
														
 
															+    }
														
 
															     iscsi_destroy_context(iscsi);
														
 
															     g_free(iscsilun->zeroblock);
														
 
															     g_free(iscsilun->allocationmap);
														
@@ -1649,7 +1673,7 @@ out:
 
															 static int iscsi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
														
 
															 {
														
 
															     IscsiLun *iscsilun = bs->opaque;
														
 
															-    bdi->unallocated_blocks_are_zero = !!iscsilun->lbprz;
														
 
															+    bdi->unallocated_blocks_are_zero = iscsilun->lbprz;
														
 
															     bdi->can_write_zeroes_with_unmap = iscsilun->lbprz && iscsilun->lbp.lbpws;
														
 
															     bdi->cluster_size = iscsilun->cluster_sectors * BDRV_SECTOR_SIZE;
														
 
															     return 0;
														
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -125,11 +125,9 @@ static void mirror_write_complete(void *opaque, int ret)
 
															     MirrorOp *op = opaque;
														
 
															     MirrorBlockJob *s = op->s;
														
 
															     if (ret < 0) {
														
 
															-        BlockDriverState *source = s->common.bs;
														
 
															         BlockErrorAction action;
														
 
															-        bdrv_set_dirty_bitmap(source, s->dirty_bitmap, op->sector_num,
														
 
															-                              op->nb_sectors);
														
 
															+        bdrv_set_dirty_bitmap(s->dirty_bitmap, op->sector_num, op->nb_sectors);
														
 
															         action = mirror_error_action(s, false, -ret);
														
 
															         if (action == BLOCK_ERROR_ACTION_REPORT && s->ret >= 0) {
														
 
															             s->ret = ret;
														
@@ -143,11 +141,9 @@ static void mirror_read_complete(void *opaque, int ret)
 
															     MirrorOp *op = opaque;
														
 
															     MirrorBlockJob *s = op->s;
														
 
															     if (ret < 0) {
														
 
															-        BlockDriverState *source = s->common.bs;
														
 
															         BlockErrorAction action;
														
 
															-        bdrv_set_dirty_bitmap(source, s->dirty_bitmap, op->sector_num,
														
 
															-                              op->nb_sectors);
														
 
															+        bdrv_set_dirty_bitmap(s->dirty_bitmap, op->sector_num, op->nb_sectors);
														
 
															         action = mirror_error_action(s, true, -ret);
														
 
															         if (action == BLOCK_ERROR_ACTION_REPORT && s->ret >= 0) {
														
 
															             s->ret = ret;
														
@@ -170,10 +166,9 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
 
															     s->sector_num = hbitmap_iter_next(&s->hbi);
														
 
															     if (s->sector_num < 0) {
														
 
															-        bdrv_dirty_iter_init(source, s->dirty_bitmap, &s->hbi);
														
 
															+        bdrv_dirty_iter_init(s->dirty_bitmap, &s->hbi);
														
 
															         s->sector_num = hbitmap_iter_next(&s->hbi);
														
 
															-        trace_mirror_restart_iter(s,
														
 
															-                                  bdrv_get_dirty_count(source, s->dirty_bitmap));
														
 
															+        trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap));
														
 
															         assert(s->sector_num >= 0);
														
 
															     }
														
@@ -288,8 +283,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
 
															         next_sector += sectors_per_chunk;
														
 
															     }
														
 
															-    bdrv_reset_dirty_bitmap(source, s->dirty_bitmap, sector_num,
														
 
															-                            nb_sectors);
														
 
															+    bdrv_reset_dirty_bitmap(s->dirty_bitmap, sector_num, nb_sectors);
														
 
															     /* Copy the dirty cluster.  */
														
 
															     s->in_flight++;
														
@@ -446,7 +440,7 @@ static void coroutine_fn mirror_run(void *opaque)
 
															             assert(n > 0);
														
 
															             if (ret == 1) {
														
 
															-                bdrv_set_dirty_bitmap(bs, s->dirty_bitmap, sector_num, n);
														
 
															+                bdrv_set_dirty_bitmap(s->dirty_bitmap, sector_num, n);
														
 
															                 sector_num = next;
														
 
															             } else {
														
 
															                 sector_num += n;
														
@@ -454,7 +448,7 @@ static void coroutine_fn mirror_run(void *opaque)
 
															         }
														
 
															     }
														
 
															-    bdrv_dirty_iter_init(bs, s->dirty_bitmap, &s->hbi);
														
 
															+    bdrv_dirty_iter_init(s->dirty_bitmap, &s->hbi);
														
 
															     last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
														
 
															     for (;;) {
														
 
															         uint64_t delay_ns = 0;
														
@@ -466,7 +460,7 @@ static void coroutine_fn mirror_run(void *opaque)
 
															             goto immediate_exit;
														
 
															         }
														
 
															-        cnt = bdrv_get_dirty_count(bs, s->dirty_bitmap);
														
 
															+        cnt = bdrv_get_dirty_count(s->dirty_bitmap);
														
 
															         /* s->common.offset contains the number of bytes already processed so
														
 
															          * far, cnt is the number of dirty sectors remaining and
														
 
															          * s->sectors_in_flight is the number of sectors currently being
														
@@ -475,7 +469,7 @@ static void coroutine_fn mirror_run(void *opaque)
 
															                         (cnt + s->sectors_in_flight) * BDRV_SECTOR_SIZE;
														
 
															         /* Note that even when no rate limit is applied we need to yield
														
 
															-         * periodically with no pending I/O so that qemu_aio_flush() returns.
														
 
															+         * periodically with no pending I/O so that bdrv_drain_all() returns.
														
 
															          * We do so every SLICE_TIME nanoseconds, or when there is an error,
														
 
															          * or when the source is clean, whichever comes first.
														
 
															          */
														
@@ -488,9 +482,6 @@ static void coroutine_fn mirror_run(void *opaque)
 
															                 continue;
														
 
															             } else if (cnt != 0) {
														
 
															                 delay_ns = mirror_iteration(s);
														
 
															-                if (delay_ns == 0) {
														
 
															-                    continue;
														
 
															-                }
														
 
															             }
														
 
															         }
														
@@ -516,7 +507,7 @@ static void coroutine_fn mirror_run(void *opaque)
 
															                 should_complete = s->should_complete ||
														
 
															                     block_job_is_cancelled(&s->common);
														
 
															-                cnt = bdrv_get_dirty_count(bs, s->dirty_bitmap);
														
 
															+                cnt = bdrv_get_dirty_count(s->dirty_bitmap);
														
 
															             }
														
 
															         }
														
@@ -531,7 +522,7 @@ static void coroutine_fn mirror_run(void *opaque)
 
															              */
														
 
															             trace_mirror_before_drain(s, cnt);
														
 
															             bdrv_drain(bs);
														
 
															-            cnt = bdrv_get_dirty_count(bs, s->dirty_bitmap);
														
 
															+            cnt = bdrv_get_dirty_count(s->dirty_bitmap);
														
 
															         }
														
 
															         ret = 0;
														
@@ -634,7 +625,7 @@ static void mirror_complete(BlockJob *job, Error **errp)
 
															     }
														
 
															     s->should_complete = true;
														
 
															-    block_job_resume(job);
														
 
															+    block_job_enter(&s->common);
														
 
															 }
														
 
															 static const BlockJobDriver mirror_job_driver = {
														
@@ -656,7 +647,7 @@ static const BlockJobDriver commit_active_job_driver = {
 
															 static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
														
 
															                              const char *replaces,
														
 
															-                             int64_t speed, int64_t granularity,
														
 
															+                             int64_t speed, uint32_t granularity,
														
 
															                              int64_t buf_size,
														
 
															                              BlockdevOnError on_source_error,
														
 
															                              BlockdevOnError on_target_error,
														
@@ -668,15 +659,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
 
															     MirrorBlockJob *s;
														
 
															     if (granularity == 0) {
														
 
															-        /* Choose the default granularity based on the target file's cluster
														
 
															-         * size, clamped between 4k and 64k.  */
														
 
															-        BlockDriverInfo bdi;
														
 
															-        if (bdrv_get_info(target, &bdi) >= 0 && bdi.cluster_size != 0) {
														
 
															-            granularity = MAX(4096, bdi.cluster_size);
														
 
															-            granularity = MIN(65536, granularity);
														
 
															-        } else {
														
 
															-            granularity = 65536;
														
 
															-        }
														
 
															+        granularity = bdrv_get_default_bitmap_granularity(target);
														
 
															     }
														
 
															     assert ((granularity & (granularity - 1)) == 0);
														
@@ -703,7 +686,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
 
															     s->granularity = granularity;
														
 
															     s->buf_size = MAX(buf_size, granularity);
														
 
															-    s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity, errp);
														
 
															+    s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
														
 
															     if (!s->dirty_bitmap) {
														
 
															         return;
														
 
															     }
														
@@ -717,7 +700,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
 
															 void mirror_start(BlockDriverState *bs, BlockDriverState *target,
														
 
															                   const char *replaces,
														
 
															-                  int64_t speed, int64_t granularity, int64_t buf_size,
														
 
															+                  int64_t speed, uint32_t granularity, int64_t buf_size,
														
 
															                   MirrorSyncMode mode, BlockdevOnError on_source_error,
														
 
															                   BlockdevOnError on_target_error,
														
 
															                   BlockCompletionFunc *cb,
														
@@ -726,6 +709,10 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
 
															     bool is_none_mode;
														
 
															     BlockDriverState *base;
														
 
															+    if (mode == MIRROR_SYNC_MODE_DIRTY_BITMAP) {
														
 
															+        error_setg(errp, "Sync mode 'dirty-bitmap' not supported");
														
 
															+        return;
														
 
															+    }
														
 
															     is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
														
 
															     base = mode == MIRROR_SYNC_MODE_TOP ? bs->backing_hd : NULL;
														
 
															     mirror_start_job(bs, target, replaces,
														
--- a/block/null.c
+++ b/block/null.c
@@ -12,8 +12,11 @@
 
															 #include "block/block_int.h"
														
 
															+#define NULL_OPT_LATENCY "latency-ns"
														
 
															+
														
 
															 typedef struct {
														
 
															     int64_t length;
														
 
															+    int64_t latency_ns;
														
 
															 } BDRVNullState;
														
 
															 static QemuOptsList runtime_opts = {
														
@@ -30,6 +33,12 @@ static QemuOptsList runtime_opts = {
 
															             .type = QEMU_OPT_SIZE,
														
 
															             .help = "size of the null block",
														
 
															         },
														
 
															+        {
														
 
															+            .name = NULL_OPT_LATENCY,
														
 
															+            .type = QEMU_OPT_NUMBER,
														
 
															+            .help = "nanoseconds (approximated) to wait "
														
 
															+                    "before completing request",
														
 
															+        },
														
 
															         { /* end of list */ }
														
 
															     },
														
 
															 };
														
@@ -39,13 +48,20 @@ static int null_file_open(BlockDriverState *bs, QDict *options, int flags,
 
															 {
														
 
															     QemuOpts *opts;
														
 
															     BDRVNullState *s = bs->opaque;
														
 
															+    int ret = 0;
														
 
															     opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
														
 
															     qemu_opts_absorb_qdict(opts, options, &error_abort);
														
 
															     s->length =
														
 
															         qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 1 << 30);
														
 
															+    s->latency_ns =
														
 
															+        qemu_opt_get_number(opts, NULL_OPT_LATENCY, 0);
														
 
															+    if (s->latency_ns < 0) {
														
 
															+        error_setg(errp, "latency-ns is invalid");
														
 
															+        ret = -EINVAL;
														
 
															+    }
														
 
															     qemu_opts_del(opts);
														
 
															-    return 0;
														
 
															+    return ret;
														
 
															 }
														
 
															 static void null_close(BlockDriverState *bs)
														
@@ -58,28 +74,40 @@ static int64_t null_getlength(BlockDriverState *bs)
 
															     return s->length;
														
 
															 }
														
 
															+static coroutine_fn int null_co_common(BlockDriverState *bs)
														
 
															+{
														
 
															+    BDRVNullState *s = bs->opaque;
														
 
															+
														
 
															+    if (s->latency_ns) {
														
 
															+        co_aio_sleep_ns(bdrv_get_aio_context(bs), QEMU_CLOCK_REALTIME,
														
 
															+                        s->latency_ns);
														
 
															+    }
														
 
															+    return 0;
														
 
															+}
														
 
															+
														
 
															 static coroutine_fn int null_co_readv(BlockDriverState *bs,
														
 
															                                       int64_t sector_num, int nb_sectors,
														
 
															                                       QEMUIOVector *qiov)
														
 
															 {
														
 
															-    return 0;
														
 
															+    return null_co_common(bs);
														
 
															 }
														
 
															 static coroutine_fn int null_co_writev(BlockDriverState *bs,
														
 
															                                        int64_t sector_num, int nb_sectors,
														
 
															                                        QEMUIOVector *qiov)
														
 
															 {
														
 
															-    return 0;
														
 
															+    return null_co_common(bs);
														
 
															 }
														
 
															 static coroutine_fn int null_co_flush(BlockDriverState *bs)
														
 
															 {
														
 
															-    return 0;
														
 
															+    return null_co_common(bs);
														
 
															 }
														
 
															 typedef struct {
														
 
															     BlockAIOCB common;
														
 
															     QEMUBH *bh;
														
 
															+    QEMUTimer timer;
														
 
															 } NullAIOCB;
														
 
															 static const AIOCBInfo null_aiocb_info = {
														
@@ -94,15 +122,33 @@ static void null_bh_cb(void *opaque)
 
															     qemu_aio_unref(acb);
														
 
															 }
														
 
															+static void null_timer_cb(void *opaque)
														
 
															+{
														
 
															+    NullAIOCB *acb = opaque;
														
 
															+    acb->common.cb(acb->common.opaque, 0);
														
 
															+    timer_deinit(&acb->timer);
														
 
															+    qemu_aio_unref(acb);
														
 
															+}
														
 
															+
														
 
															 static inline BlockAIOCB *null_aio_common(BlockDriverState *bs,
														
 
															                                           BlockCompletionFunc *cb,
														
 
															                                           void *opaque)
														
 
															 {
														
 
															     NullAIOCB *acb;
														
 
															+    BDRVNullState *s = bs->opaque;
														
 
															     acb = qemu_aio_get(&null_aiocb_info, bs, cb, opaque);
														
 
															-    acb->bh = aio_bh_new(bdrv_get_aio_context(bs), null_bh_cb, acb);
														
 
															-    qemu_bh_schedule(acb->bh);
														
 
															+    /* Only emulate latency after vcpu is running. */
														
 
															+    if (s->latency_ns) {
														
 
															+        aio_timer_init(bdrv_get_aio_context(bs), &acb->timer,
														
 
															+                       QEMU_CLOCK_REALTIME, SCALE_NS,
														
 
															+                       null_timer_cb, acb);
														
 
															+        timer_mod_ns(&acb->timer,
														
 
															+                     qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + s->latency_ns);
														
 
															+    } else {
														
 
															+        acb->bh = aio_bh_new(bdrv_get_aio_context(bs), null_bh_cb, acb);
														
 
															+        qemu_bh_schedule(acb->bh);
														
 
															+    }
														
 
															     return &acb->common;
														
 
															 }
														
@@ -131,6 +177,12 @@ static BlockAIOCB *null_aio_flush(BlockDriverState *bs,
 
															     return null_aio_common(bs, cb, opaque);
														
 
															 }
														
 
															+static int null_reopen_prepare(BDRVReopenState *reopen_state,
														
 
															+                               BlockReopenQueue *queue, Error **errp)
														
 
															+{
														
 
															+    return 0;
														
 
															+}
														
 
															+
														
 
															 static BlockDriver bdrv_null_co = {
														
 
															     .format_name            = "null-co",
														
 
															     .protocol_name          = "null-co",
														
@@ -143,6 +195,7 @@ static BlockDriver bdrv_null_co = {
 
															     .bdrv_co_readv          = null_co_readv,
														
 
															     .bdrv_co_writev         = null_co_writev,
														
 
															     .bdrv_co_flush_to_disk  = null_co_flush,
														
 
															+    .bdrv_reopen_prepare    = null_reopen_prepare,
														
 
															 };
														
 
															 static BlockDriver bdrv_null_aio = {
														
@@ -157,6 +210,7 @@ static BlockDriver bdrv_null_aio = {
 
															     .bdrv_aio_readv         = null_aio_readv,
														
 
															     .bdrv_aio_writev        = null_aio_writev,
														
 
															     .bdrv_aio_flush         = null_aio_flush,
														
 
															+    .bdrv_reopen_prepare    = null_reopen_prepare,
														
 
															 };
														
 
															 static void bdrv_null_init(void)
														
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -31,8 +31,10 @@
 
															 #include "qapi/qmp/types.h"
														
 
															 #include "sysemu/block-backend.h"
														
 
															-BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs)
														
 
															+BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs, Error **errp)
														
 
															 {
														
 
															+    ImageInfo **p_image_info;
														
 
															+    BlockDriverState *bs0;
														
 
															     BlockDeviceInfo *info = g_malloc0(sizeof(*info));
														
 
															     info->file                   = g_strdup(bs->filename);
														
@@ -92,6 +94,25 @@ BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs)
 
															     info->write_threshold = bdrv_write_threshold_get(bs);
														
 
															+    bs0 = bs;
														
 
															+    p_image_info = &info->image;
														
 
															+    while (1) {
														
 
															+        Error *local_err = NULL;
														
 
															+        bdrv_query_image_info(bs0, p_image_info, &local_err);
														
 
															+        if (local_err) {
														
 
															+            error_propagate(errp, local_err);
														
 
															+            qapi_free_BlockDeviceInfo(info);
														
 
															+            return NULL;
														
 
															+        }
														
 
															+        if (bs0->drv && bs0->backing_hd) {
														
 
															+            bs0 = bs0->backing_hd;
														
 
															+            (*p_image_info)->has_backing_image = true;
														
 
															+            p_image_info = &((*p_image_info)->backing_image);
														
 
															+        } else {
														
 
															+            break;
														
 
															+        }
														
 
															+    }
														
 
															+
														
 
															     return info;
														
 
															 }
														
@@ -264,9 +285,6 @@ static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info,
 
															 {
														
 
															     BlockInfo *info = g_malloc0(sizeof(*info));
														
 
															     BlockDriverState *bs = blk_bs(blk);
														
 
															-    BlockDriverState *bs0;
														
 
															-    ImageInfo **p_image_info;
														
 
															-    Error *local_err = NULL;
														
 
															     info->device = g_strdup(blk_name(blk));
														
 
															     info->type = g_strdup("unknown");
														
 
															     info->locked = blk_dev_is_medium_locked(blk);
														
@@ -289,23 +307,9 @@ static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info,
 
															     if (bs->drv) {
														
 
															         info->has_inserted = true;
														
 
															-        info->inserted = bdrv_block_device_info(bs);
														
 
															-
														
 
															-        bs0 = bs;
														
 
															-        p_image_info = &info->inserted->image;
														
 
															-        while (1) {
														
 
															-            bdrv_query_image_info(bs0, p_image_info, &local_err);
														
 
															-            if (local_err) {
														
 
															-                error_propagate(errp, local_err);
														
 
															-                goto err;
														
 
															-            }
														
 
															-            if (bs0->drv && bs0->backing_hd) {
														
 
															-                bs0 = bs0->backing_hd;
														
 
															-                (*p_image_info)->has_backing_image = true;
														
 
															-                p_image_info = &((*p_image_info)->backing_image);
														
 
															-            } else {
														
 
															-                break;
														
 
															-            }
														
 
															+        info->inserted = bdrv_block_device_info(bs, errp);
														
 
															+        if (info->inserted == NULL) {
														
 
															+            goto err;
														
 
															         }
														
 
															     }
														
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -124,7 +124,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
 
															         snprintf(version, sizeof(version), "QCOW version %" PRIu32,
														
 
															                  header.version);
														
 
															         error_set(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
														
 
															-                  bdrv_get_device_name(bs), "qcow", version);
														
 
															+                  bdrv_get_device_or_node_name(bs), "qcow", version);
														
 
															         ret = -ENOTSUP;
														
 
															         goto fail;
														
 
															     }
														
@@ -229,9 +229,9 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
 
															     }
														
 
															     /* Disable migration when qcow images are used */
														
 
															-    error_set(&s->migration_blocker,
														
 
															-              QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
														
 
															-              "qcow", bdrv_get_device_name(bs), "live migration");
														
 
															+    error_setg(&s->migration_blocker, "The qcow format used by node '%s' "
														
 
															+               "does not support live migration",
														
 
															+               bdrv_get_device_or_node_name(bs));
														
 
															     migrate_add_blocker(s->migration_blocker);
														
 
															     qemu_co_mutex_init(&s->lock);
														
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -2450,7 +2450,7 @@ int qcow2_pre_write_overlap_check(BlockDriverState *bs, int ign, int64_t offset,
 
															     if (ret < 0) {
														
 
															         return ret;
														
 
															     } else if (ret > 0) {
														
 
															-        int metadata_ol_bitnr = ffs(ret) - 1;
														
 
															+        int metadata_ol_bitnr = ctz32(ret);
														
 
															         assert(metadata_ol_bitnr < QCOW2_OL_MAX_BITNR);
														
 
															         qcow2_signal_corruption(bs, true, offset, size, "Preventing invalid "
														
--- a/block/qcow2-snapshot.c
+++ b/block/qcow2-snapshot.c
@@ -351,10 +351,8 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
 
															     memset(sn, 0, sizeof(*sn));
														
 
															-    /* Generate an ID if it wasn't passed */
														
 
															-    if (sn_info->id_str[0] == '\0') {
														
 
															-        find_new_snapshot_id(bs, sn_info->id_str, sizeof(sn_info->id_str));
														
 
															-    }
														
 
															+    /* Generate an ID */
														
 
															+    find_new_snapshot_id(bs, sn_info->id_str, sizeof(sn_info->id_str));
														
 
															     /* Check that the ID is unique */
														
 
															     if (find_snapshot_by_id_and_name(bs, sn_info->id_str, NULL) >= 0) {
														
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -208,7 +208,7 @@ static void GCC_FMT_ATTR(3, 4) report_unsupported(BlockDriverState *bs,
 
															     va_end(ap);
														
 
															     error_set(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
														
 
															-              bdrv_get_device_name(bs), "qcow2", msg);
														
 
															+              bdrv_get_device_or_node_name(bs), "qcow2", msg);
														
 
															 }
														
 
															 static void report_unsupported_feature(BlockDriverState *bs,
														
@@ -1802,7 +1802,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
 
															 {
														
 
															     /* Calculate cluster_bits */
														
 
															     int cluster_bits;
														
 
															-    cluster_bits = ffs(cluster_size) - 1;
														
 
															+    cluster_bits = ctz32(cluster_size);
														
 
															     if (cluster_bits < MIN_CLUSTER_BITS || cluster_bits > MAX_CLUSTER_BITS ||
														
 
															         (1 << cluster_bits) != cluster_size)
														
 
															     {
														
@@ -2110,7 +2110,7 @@ static int qcow2_create(const char *filename, QemuOpts *opts, Error **errp)
 
															         goto finish;
														
 
															     }
														
 
															-    refcount_order = ffs(refcount_bits) - 1;
														
 
															+    refcount_order = ctz32(refcount_bits);
														
 
															     ret = qcow2_create2(filename, size, backing_file, backing_fmt, flags,
														
 
															                         cluster_size, prealloc, opts, version, refcount_order,
														
@@ -2824,6 +2824,7 @@ void qcow2_signal_corruption(BlockDriverState *bs, bool fatal, int64_t offset,
 
															                              int64_t size, const char *message_format, ...)
														
 
															 {
														
 
															     BDRVQcowState *s = bs->opaque;
														
 
															+    const char *node_name;
														
 
															     char *message;
														
 
															     va_list ap;
														
@@ -2847,8 +2848,11 @@ void qcow2_signal_corruption(BlockDriverState *bs, bool fatal, int64_t offset,
 
															                 "corruption events will be suppressed\n", message);
														
 
															     }
														
 
															-    qapi_event_send_block_image_corrupted(bdrv_get_device_name(bs), message,
														
 
															-                                          offset >= 0, offset, size >= 0, size,
														
 
															+    node_name = bdrv_get_node_name(bs);
														
 
															+    qapi_event_send_block_image_corrupted(bdrv_get_device_name(bs),
														
 
															+                                          *node_name != '\0', node_name,
														
 
															+                                          message, offset >= 0, offset,
														
 
															+                                          size >= 0, size,
														
 
															                                           fatal, &error_abort);
														
 
															     g_free(message);
														
--- a/block/qed.c
+++ b/block/qed.c
@@ -408,7 +408,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
 
															         snprintf(buf, sizeof(buf), "%" PRIx64,
														
 
															             s->header.features & ~QED_FEATURE_MASK);
														
 
															         error_set(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
														
 
															-            bdrv_get_device_name(bs), "QED", buf);
														
 
															+            bdrv_get_device_or_node_name(bs), "QED", buf);
														
 
															         return -ENOTSUP;
														
 
															     }
														
 
															     if (!qed_is_cluster_size_valid(s->header.cluster_size)) {
														
@@ -436,9 +436,9 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
 
															     s->table_nelems = (s->header.cluster_size * s->header.table_size) /
														
 
															                       sizeof(uint64_t);
														
 
															-    s->l2_shift = ffs(s->header.cluster_size) - 1;
														
 
															+    s->l2_shift = ctz32(s->header.cluster_size);
														
 
															     s->l2_mask = s->table_nelems - 1;
														
 
															-    s->l1_shift = s->l2_shift + ffs(s->table_nelems) - 1;
														
 
															+    s->l1_shift = s->l2_shift + ctz32(s->table_nelems);
														
 
															     /* Header size calculation must not overflow uint32_t */
														
 
															     if (s->header.header_size > UINT32_MAX / s->header.cluster_size) {
														
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -226,10 +226,7 @@ static void quorum_report_bad(QuorumAIOCB *acb, char *node_name, int ret)
 
															 static void quorum_report_failure(QuorumAIOCB *acb)
														
 
															 {
														
 
															-    const char *reference = bdrv_get_device_name(acb->common.bs)[0] ?
														
 
															-                            bdrv_get_device_name(acb->common.bs) :
														
 
															-                            acb->common.bs->node_name;
														
 
															-
														
 
															+    const char *reference = bdrv_get_device_or_node_name(acb->common.bs);
														
 
															     qapi_event_send_quorum_failure(reference, acb->sector_num,
														
 
															                                    acb->nb_sectors, &error_abort);
														
 
															 }
														
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -325,7 +325,7 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
 
															             error_setg(errp, "obj size too small");
														
 
															             return -EINVAL;
														
 
															         }
														
 
															-        obj_order = ffs(objsize) - 1;
														
 
															+        obj_order = ctz32(objsize);
														
 
															     }
														
 
															     clientname = qemu_rbd_parse_clientname(conf, clientname_buf);
														
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -1716,7 +1716,7 @@ static int parse_block_size_shift(BDRVSheepdogState *s, QemuOpts *opt)
 
															         if ((object_size - 1) & object_size) {    /* not a power of 2? */
														
 
															             return -EINVAL;
														
 
															         }
														
 
															-        obj_order = ffs(object_size) - 1;
														
 
															+        obj_order = ctz32(object_size);
														
 
															         if (obj_order < 20 || obj_order > 31) {
														
 
															             return -EINVAL;
														
 
															         }
														
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -246,9 +246,9 @@ int bdrv_snapshot_delete(BlockDriverState *bs,
 
															     if (bs->file) {
														
 
															         return bdrv_snapshot_delete(bs->file, snapshot_id, name, errp);
														
 
															     }
														
 
															-    error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
														
 
															-              drv->format_name, bdrv_get_device_name(bs),
														
 
															-              "internal snapshot deletion");
														
 
															+    error_setg(errp, "Block format '%s' used by device '%s' "
														
 
															+               "does not support internal snapshot deletion",
														
 
															+               drv->format_name, bdrv_get_device_name(bs));
														
 
															     return -ENOTSUP;
														
 
															 }
														
@@ -329,9 +329,9 @@ int bdrv_snapshot_load_tmp(BlockDriverState *bs,
 
															     if (drv->bdrv_snapshot_load_tmp) {
														
 
															         return drv->bdrv_snapshot_load_tmp(bs, snapshot_id, name, errp);
														
 
															     }
														
 
															-    error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
														
 
															-              drv->format_name, bdrv_get_device_name(bs),
														
 
															-              "temporarily load internal snapshot");
														
 
															+    error_setg(errp, "Block format '%s' used by device '%s' "
														
 
															+               "does not support temporarily loading internal snapshots",
														
 
															+               drv->format_name, bdrv_get_device_name(bs));
														
 
															     return -ENOTSUP;
														
 
															 }
														
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -502,9 +502,9 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
 
															     }
														
 
															     /* Disable migration when vdi images are used */
														
 
															-    error_set(&s->migration_blocker,
														
 
															-              QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
														
 
															-              "vdi", bdrv_get_device_name(bs), "live migration");
														
 
															+    error_setg(&s->migration_blocker, "The vdi format used by node '%s' "
														
 
															+               "does not support live migration",
														
 
															+               bdrv_get_device_or_node_name(bs));
														
 
															     migrate_add_blocker(s->migration_blocker);
														
 
															     qemu_co_mutex_init(&s->write_lock);
														
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -1002,9 +1002,9 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
 
															     /* TODO: differencing files */
														
 
															     /* Disable migration when VHDX images are used */
														
 
															-    error_set(&s->migration_blocker,
														
 
															-            QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
														
 
															-            "vhdx", bdrv_get_device_name(bs), "live migration");
														
 
															+    error_setg(&s->migration_blocker, "The vhdx format used by node '%s' "
														
 
															+               "does not support live migration",
														
 
															+               bdrv_get_device_or_node_name(bs));
														
 
															     migrate_add_blocker(s->migration_blocker);
														
 
															     return 0;
														
@@ -1269,7 +1269,7 @@ static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num,
 
															                         iov1.iov_base = qemu_blockalign(bs, iov1.iov_len);
														
 
															                         memset(iov1.iov_base, 0, iov1.iov_len);
														
 
															                         qemu_iovec_concat_iov(&hd_qiov, &iov1, 1, 0,
														
 
															-                                              sinfo.block_offset);
														
 
															+                                              iov1.iov_len);
														
 
															                         sectors_to_write += iov1.iov_len >> BDRV_SECTOR_BITS;
														
 
															                     }
														
@@ -1285,7 +1285,7 @@ static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num,
 
															                         iov2.iov_base = qemu_blockalign(bs, iov2.iov_len);
														
 
															                         memset(iov2.iov_base, 0, iov2.iov_len);
														
 
															                         qemu_iovec_concat_iov(&hd_qiov, &iov2, 1, 0,
														
 
															-                                              sinfo.block_offset);
														
 
															+                                              iov2.iov_len);
														
 
															                         sectors_to_write += iov2.iov_len >> BDRV_SECTOR_BITS;
														
 
															                     }
														
 
															                 }
														
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -523,7 +523,7 @@ static int vmdk_open_vmfs_sparse(BlockDriverState *bs,
 
															     }
														
 
															     ret = vmdk_add_extent(bs, file, false,
														
 
															                           le32_to_cpu(header.disk_sectors),
														
 
															-                          le32_to_cpu(header.l1dir_offset) << 9,
														
 
															+                          (int64_t)le32_to_cpu(header.l1dir_offset) << 9,
														
 
															                           0,
														
 
															                           le32_to_cpu(header.l1dir_size),
														
 
															                           4096,
														
@@ -669,7 +669,7 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
 
															         snprintf(buf, sizeof(buf), "VMDK version %" PRId32,
														
 
															                  le32_to_cpu(header.version));
														
 
															         error_set(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
														
 
															-                  bdrv_get_device_name(bs), "vmdk", buf);
														
 
															+                  bdrv_get_device_or_node_name(bs), "vmdk", buf);
														
 
															         return -ENOTSUP;
														
 
															     } else if (le32_to_cpu(header.version) == 3 && (flags & BDRV_O_RDWR)) {
														
 
															         /* VMware KB 2064959 explains that version 3 added support for
														
@@ -962,9 +962,9 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags,
 
															     qemu_co_mutex_init(&s->lock);
														
 
															     /* Disable migration when VMDK images are used */
														
 
															-    error_set(&s->migration_blocker,
														
 
															-              QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
														
 
															-              "vmdk", bdrv_get_device_name(bs), "live migration");
														
 
															+    error_setg(&s->migration_blocker, "The vmdk format used by node '%s' "
														
 
															+               "does not support live migration",
														
 
															+               bdrv_get_device_or_node_name(bs));
														
 
															     migrate_add_blocker(s->migration_blocker);
														
 
															     g_free(buf);
														
 
															     return 0;
														
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -318,9 +318,9 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
 
															     qemu_co_mutex_init(&s->lock);
														
 
															     /* Disable migration when VHD images are used */
														
 
															-    error_set(&s->migration_blocker,
														
 
															-              QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
														
 
															-              "vpc", bdrv_get_device_name(bs), "live migration");
														
 
															+    error_setg(&s->migration_blocker, "The vpc format used by node '%s' "
														
 
															+               "does not support live migration",
														
 
															+               bdrv_get_device_or_node_name(bs));
														
 
															     migrate_add_blocker(s->migration_blocker);
														
 
															     return 0;
														
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -1180,9 +1180,10 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags,
 
															     /* Disable migration when vvfat is used rw */
														
 
															     if (s->qcow) {
														
 
															-        error_set(&s->migration_blocker,
														
 
															-                  QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
														
 
															-                  "vvfat (rw)", bdrv_get_device_name(bs), "live migration");
														
 
															+        error_setg(&s->migration_blocker,
														
 
															+                   "The vvfat (rw) format used by node '%s' "
														
 
															+                   "does not support live migration",
														
 
															+                   bdrv_get_device_or_node_name(bs));
														
 
															         migrate_add_blocker(s->migration_blocker);
														
 
															     }
														
--- a/blockdev.c
+++ b/blockdev.c
@@ -1164,6 +1164,68 @@ out_aio_context:
 
															     return NULL;
														
 
															 }
														
 
															+/**
														
 
															+ * block_dirty_bitmap_lookup:
														
 
															+ * Return a dirty bitmap (if present), after validating
														
 
															+ * the node reference and bitmap names.
														
 
															+ *
														
 
															+ * @node: The name of the BDS node to search for bitmaps
														
 
															+ * @name: The name of the bitmap to search for
														
 
															+ * @pbs: Output pointer for BDS lookup, if desired. Can be NULL.
														
 
															+ * @paio: Output pointer for aio_context acquisition, if desired. Can be NULL.
														
 
															+ * @errp: Output pointer for error information. Can be NULL.
														
 
															+ *
														
 
															+ * @return: A bitmap object on success, or NULL on failure.
														
 
															+ */
														
 
															+static BdrvDirtyBitmap *block_dirty_bitmap_lookup(const char *node,
														
 
															+                                                  const char *name,
														
 
															+                                                  BlockDriverState **pbs,
														
 
															+                                                  AioContext **paio,
														
 
															+                                                  Error **errp)
														
 
															+{
														
 
															+    BlockDriverState *bs;
														
 
															+    BdrvDirtyBitmap *bitmap;
														
 
															+    AioContext *aio_context;
														
 
															+
														
 
															+    if (!node) {
														
 
															+        error_setg(errp, "Node cannot be NULL");
														
 
															+        return NULL;
														
 
															+    }
														
 
															+    if (!name) {
														
 
															+        error_setg(errp, "Bitmap name cannot be NULL");
														
 
															+        return NULL;
														
 
															+    }
														
 
															+    bs = bdrv_lookup_bs(node, node, NULL);
														
 
															+    if (!bs) {
														
 
															+        error_setg(errp, "Node '%s' not found", node);
														
 
															+        return NULL;
														
 
															+    }
														
 
															+
														
 
															+    aio_context = bdrv_get_aio_context(bs);
														
 
															+    aio_context_acquire(aio_context);
														
 
															+
														
 
															+    bitmap = bdrv_find_dirty_bitmap(bs, name);
														
 
															+    if (!bitmap) {
														
 
															+        error_setg(errp, "Dirty bitmap '%s' not found", name);
														
 
															+        goto fail;
														
 
															+    }
														
 
															+
														
 
															+    if (pbs) {
														
 
															+        *pbs = bs;
														
 
															+    }
														
 
															+    if (paio) {
														
 
															+        *paio = aio_context;
														
 
															+    } else {
														
 
															+        aio_context_release(aio_context);
														
 
															+    }
														
 
															+
														
 
															+    return bitmap;
														
 
															+
														
 
															+ fail:
														
 
															+    aio_context_release(aio_context);
														
 
															+    return NULL;
														
 
															+}
														
 
															+
														
 
															 /* New and old BlockDriverState structs for atomic group operations */
														
 
															 typedef struct BlkTransactionState BlkTransactionState;
														
@@ -1248,13 +1310,14 @@ static void internal_snapshot_prepare(BlkTransactionState *common,
 
															     }
														
 
															     if (bdrv_is_read_only(bs)) {
														
 
															-        error_set(errp, QERR_DEVICE_IS_READ_ONLY, device);
														
 
															+        error_setg(errp, "Device '%s' is read only", device);
														
 
															         return;
														
 
															     }
														
 
															     if (!bdrv_can_snapshot(bs)) {
														
 
															-        error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
														
 
															-                  bs->drv->format_name, device, "internal snapshot");
														
 
															+        error_setg(errp, "Block format '%s' used by device '%s' "
														
 
															+                   "does not support internal snapshots",
														
 
															+                   bs->drv->format_name, device);
														
 
															         return;
														
 
															     }
														
@@ -1522,6 +1585,7 @@ static void drive_backup_prepare(BlkTransactionState *common, Error **errp)
 
															                      backup->sync,
														
 
															                      backup->has_mode, backup->mode,
														
 
															                      backup->has_speed, backup->speed,
														
 
															+                     backup->has_bitmap, backup->bitmap,
														
 
															                      backup->has_on_source_error, backup->on_source_error,
														
 
															                      backup->has_on_target_error, backup->on_target_error,
														
 
															                      &local_err);
														
@@ -1953,6 +2017,102 @@ void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd,
 
															     aio_context_release(aio_context);
														
 
															 }
														
 
															+void qmp_block_dirty_bitmap_add(const char *node, const char *name,
														
 
															+                                bool has_granularity, uint32_t granularity,
														
 
															+                                Error **errp)
														
 
															+{
														
 
															+    AioContext *aio_context;
														
 
															+    BlockDriverState *bs;
														
 
															+
														
 
															+    if (!name || name[0] == '\0') {
														
 
															+        error_setg(errp, "Bitmap name cannot be empty");
														
 
															+        return;
														
 
															+    }
														
 
															+
														
 
															+    bs = bdrv_lookup_bs(node, node, errp);
														
 
															+    if (!bs) {
														
 
															+        return;
														
 
															+    }
														
 
															+
														
 
															+    aio_context = bdrv_get_aio_context(bs);
														
 
															+    aio_context_acquire(aio_context);
														
 
															+
														
 
															+    if (has_granularity) {
														
 
															+        if (granularity < 512 || !is_power_of_2(granularity)) {
														
 
															+            error_setg(errp, "Granularity must be power of 2 "
														
 
															+                             "and at least 512");
														
 
															+            goto out;
														
 
															+        }
														
 
															+    } else {
														
 
															+        /* Default to cluster size, if available: */
														
 
															+        granularity = bdrv_get_default_bitmap_granularity(bs);
														
 
															+    }
														
 
															+
														
 
															+    bdrv_create_dirty_bitmap(bs, granularity, name, errp);
														
 
															+
														
 
															+ out:
														
 
															+    aio_context_release(aio_context);
														
 
															+}
														
 
															+
														
 
															+void qmp_block_dirty_bitmap_remove(const char *node, const char *name,
														
 
															+                                   Error **errp)
														
 
															+{
														
 
															+    AioContext *aio_context;
														
 
															+    BlockDriverState *bs;
														
 
															+    BdrvDirtyBitmap *bitmap;
														
 
															+
														
 
															+    bitmap = block_dirty_bitmap_lookup(node, name, &bs, &aio_context, errp);
														
 
															+    if (!bitmap || !bs) {
														
 
															+        return;
														
 
															+    }
														
 
															+
														
 
															+    if (bdrv_dirty_bitmap_frozen(bitmap)) {
														
 
															+        error_setg(errp,
														
 
															+                   "Bitmap '%s' is currently frozen and cannot be removed",
														
 
															+                   name);
														
 
															+        goto out;
														
 
															+    }
														
 
															+    bdrv_dirty_bitmap_make_anon(bitmap);
														
 
															+    bdrv_release_dirty_bitmap(bs, bitmap);
														
 
															+
														
 
															+ out:
														
 
															+    aio_context_release(aio_context);
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * Completely clear a bitmap, for the purposes of synchronizing a bitmap
														
 
															+ * immediately after a full backup operation.
														
 
															+ */
														
 
															+void qmp_block_dirty_bitmap_clear(const char *node, const char *name,
														
 
															+                                  Error **errp)
														
 
															+{
														
 
															+    AioContext *aio_context;
														
 
															+    BdrvDirtyBitmap *bitmap;
														
 
															+    BlockDriverState *bs;
														
 
															+
														
 
															+    bitmap = block_dirty_bitmap_lookup(node, name, &bs, &aio_context, errp);
														
 
															+    if (!bitmap || !bs) {
														
 
															+        return;
														
 
															+    }
														
 
															+
														
 
															+    if (bdrv_dirty_bitmap_frozen(bitmap)) {
														
 
															+        error_setg(errp,
														
 
															+                   "Bitmap '%s' is currently frozen and cannot be modified",
														
 
															+                   name);
														
 
															+        goto out;
														
 
															+    } else if (!bdrv_dirty_bitmap_enabled(bitmap)) {
														
 
															+        error_setg(errp,
														
 
															+                   "Bitmap '%s' is currently disabled and cannot be cleared",
														
 
															+                   name);
														
 
															+        goto out;
														
 
															+    }
														
 
															+
														
 
															+    bdrv_clear_dirty_bitmap(bitmap);
														
 
															+
														
 
															+ out:
														
 
															+    aio_context_release(aio_context);
														
 
															+}
														
 
															+
														
 
															 int hmp_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data)
														
 
															 {
														
 
															     const char *id = qdict_get_str(qdict, "id");
														
@@ -2055,7 +2215,7 @@ void qmp_block_resize(bool has_device, const char *device,
 
															         error_set(errp, QERR_UNSUPPORTED);
														
 
															         break;
														
 
															     case -EACCES:
														
 
															-        error_set(errp, QERR_DEVICE_IS_READ_ONLY, device);
														
 
															+        error_setg(errp, "Device '%s' is read only", device);
														
 
															         break;
														
 
															     case -EBUSY:
														
 
															         error_set(errp, QERR_DEVICE_IN_USE, device);
														
@@ -2270,6 +2430,7 @@ void qmp_drive_backup(const char *device, const char *target,
 
															                       enum MirrorSyncMode sync,
														
 
															                       bool has_mode, enum NewImageMode mode,
														
 
															                       bool has_speed, int64_t speed,
														
 
															+                      bool has_bitmap, const char *bitmap,
														
 
															                       bool has_on_source_error, BlockdevOnError on_source_error,
														
 
															                       bool has_on_target_error, BlockdevOnError on_target_error,
														
 
															                       Error **errp)
														
@@ -2278,6 +2439,7 @@ void qmp_drive_backup(const char *device, const char *target,
 
															     BlockDriverState *bs;
														
 
															     BlockDriverState *target_bs;
														
 
															     BlockDriverState *source = NULL;
														
 
															+    BdrvDirtyBitmap *bmap = NULL;
														
 
															     AioContext *aio_context;
														
 
															     BlockDriver *drv = NULL;
														
 
															     Error *local_err = NULL;
														
@@ -2377,7 +2539,16 @@ void qmp_drive_backup(const char *device, const char *target,
 
															     bdrv_set_aio_context(target_bs, aio_context);
														
 
															-    backup_start(bs, target_bs, speed, sync, on_source_error, on_target_error,
														
 
															+    if (has_bitmap) {
														
 
															+        bmap = bdrv_find_dirty_bitmap(bs, bitmap);
														
 
															+        if (!bmap) {
														
 
															+            error_setg(errp, "Bitmap '%s' could not be found", bitmap);
														
 
															+            goto out;
														
 
															+        }
														
 
															+    }
														
 
															+
														
 
															+    backup_start(bs, target_bs, speed, sync, bmap,
														
 
															+                 on_source_error, on_target_error,
														
 
															                  block_job_cb, bs, &local_err);
														
 
															     if (local_err != NULL) {
														
 
															         bdrv_unref(target_bs);
														
@@ -2391,7 +2562,7 @@ out:
 
															 BlockDeviceInfoList *qmp_query_named_block_nodes(Error **errp)
														
 
															 {
														
 
															-    return bdrv_named_nodes_list();
														
 
															+    return bdrv_named_nodes_list(errp);
														
 
															 }
														
 
															 void qmp_blockdev_backup(const char *device, const char *target,
														
@@ -2438,8 +2609,8 @@ void qmp_blockdev_backup(const char *device, const char *target,
 
															     bdrv_ref(target_bs);
														
 
															     bdrv_set_aio_context(target_bs, aio_context);
														
 
															-    backup_start(bs, target_bs, speed, sync, on_source_error, on_target_error,
														
 
															-                 block_job_cb, bs, &local_err);
														
 
															+    backup_start(bs, target_bs, speed, sync, NULL, on_source_error,
														
 
															+                 on_target_error, block_job_cb, bs, &local_err);
														
 
															     if (local_err != NULL) {
														
 
															         bdrv_unref(target_bs);
														
 
															         error_propagate(errp, local_err);
														
@@ -2699,7 +2870,7 @@ void qmp_block_job_cancel(const char *device,
 
															         force = false;
														
 
															     }
														
 
															-    if (job->paused && !force) {
														
 
															+    if (job->user_paused && !force) {
														
 
															         error_setg(errp, "The block job for device '%s' is currently paused",
														
 
															                    device);
														
 
															         goto out;
														
@@ -2716,10 +2887,11 @@ void qmp_block_job_pause(const char *device, Error **errp)
 
															     AioContext *aio_context;
														
 
															     BlockJob *job = find_block_job(device, &aio_context, errp);
														
 
															-    if (!job) {
														
 
															+    if (!job || job->user_paused) {
														
 
															         return;
														
 
															     }
														
 
															+    job->user_paused = true;
														
 
															     trace_qmp_block_job_pause(job);
														
 
															     block_job_pause(job);
														
 
															     aio_context_release(aio_context);
														
@@ -2730,10 +2902,11 @@ void qmp_block_job_resume(const char *device, Error **errp)
 
															     AioContext *aio_context;
														
 
															     BlockJob *job = find_block_job(device, &aio_context, errp);
														
 
															-    if (!job) {
														
 
															+    if (!job || !job->user_paused) {
														
 
															         return;
														
 
															     }
														
 
															+    job->user_paused = false;
														
 
															     trace_qmp_block_job_resume(job);
														
 
															     block_job_resume(job);
														
 
															     aio_context_release(aio_context);
														
--- a/blockjob.c
+++ b/blockjob.c
@@ -107,7 +107,7 @@ void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
 
															 void block_job_complete(BlockJob *job, Error **errp)
														
 
															 {
														
 
															-    if (job->paused || job->cancelled || !job->driver->complete) {
														
 
															+    if (job->pause_count || job->cancelled || !job->driver->complete) {
														
 
															         error_set(errp, QERR_BLOCK_JOB_NOT_READY,
														
 
															                   bdrv_get_device_name(job->bs));
														
 
															         return;
														
@@ -118,17 +118,26 @@ void block_job_complete(BlockJob *job, Error **errp)
 
															 void block_job_pause(BlockJob *job)
														
 
															 {
														
 
															-    job->paused = true;
														
 
															+    job->pause_count++;
														
 
															 }
														
 
															 bool block_job_is_paused(BlockJob *job)
														
 
															 {
														
 
															-    return job->paused;
														
 
															+    return job->pause_count > 0;
														
 
															 }
														
 
															 void block_job_resume(BlockJob *job)
														
 
															 {
														
 
															-    job->paused = false;
														
 
															+    assert(job->pause_count > 0);
														
 
															+    job->pause_count--;
														
 
															+    if (job->pause_count) {
														
 
															+        return;
														
 
															+    }
														
 
															+    block_job_enter(job);
														
 
															+}
														
 
															+
														
 
															+void block_job_enter(BlockJob *job)
														
 
															+{
														
 
															     block_job_iostatus_reset(job);
														
 
															     if (job->co && !job->busy) {
														
 
															         qemu_coroutine_enter(job->co, NULL);
														
@@ -138,7 +147,7 @@ void block_job_resume(BlockJob *job)
 
															 void block_job_cancel(BlockJob *job)
														
 
															 {
														
 
															     job->cancelled = true;
														
 
															-    block_job_resume(job);
														
 
															+    block_job_enter(job);
														
 
															 }
														
 
															 bool block_job_is_cancelled(BlockJob *job)
														
@@ -258,7 +267,7 @@ BlockJobInfo *block_job_query(BlockJob *job)
 
															     info->device    = g_strdup(bdrv_get_device_name(job->bs));
														
 
															     info->len       = job->len;
														
 
															     info->busy      = job->busy;
														
 
															-    info->paused    = job->paused;
														
 
															+    info->paused    = job->pause_count > 0;
														
 
															     info->offset    = job->offset;
														
 
															     info->speed     = job->speed;
														
 
															     info->io_status = job->iostatus;
														
@@ -335,6 +344,8 @@ BlockErrorAction block_job_error_action(BlockJob *job, BlockDriverState *bs,
 
															                                     IO_OPERATION_TYPE_WRITE,
														
 
															                                     action, &error_abort);
														
 
															     if (action == BLOCK_ERROR_ACTION_STOP) {
														
 
															+        /* make the pause user visible, which will be resumed from QMP. */
														
 
															+        job->user_paused = true;
														
 
															         block_job_pause(job);
														
 
															         block_job_iostatus_set_err(job, error);
														
 
															         if (bs != job->bs) {
														
--- a/docs/bitmaps.md
+++ b/docs/bitmaps.md
@@ -0,0 +1,352 @@
 
															+<!--
														
 
															+Copyright 2015 John Snow <jsnow@redhat.com> and Red Hat, Inc.
														
 
															+All rights reserved.
														
 
															+
														
 
															+This file is licensed via The FreeBSD Documentation License, the full text of
														
 
															+which is included at the end of this document.
														
 
															+-->
														
 
															+
														
 
															+# Dirty Bitmaps and Incremental Backup
														
 
															+
														
 
															+* Dirty Bitmaps are objects that track which data needs to be backed up for the
														
 
															+  next incremental backup.
														
 
															+
														
 
															+* Dirty bitmaps can be created at any time and attached to any node
														
 
															+  (not just complete drives.)
														
 
															+
														
 
															+## Dirty Bitmap Names
														
 
															+
														
 
															+* A dirty bitmap's name is unique to the node, but bitmaps attached to different
														
 
															+  nodes can share the same name.
														
 
															+
														
 
															+## Bitmap Modes
														
 
															+
														
 
															+* A Bitmap can be "frozen," which means that it is currently in-use by a backup
														
 
															+  operation and cannot be deleted, renamed, written to, reset,
														
 
															+  etc.
														
 
															+
														
 
															+## Basic QMP Usage
														
 
															+
														
 
															+### Supported Commands ###
														
 
															+
														
 
															+* block-dirty-bitmap-add
														
 
															+* block-dirty-bitmap-remove
														
 
															+* block-dirty-bitmap-clear
														
 
															+
														
 
															+### Creation
														
 
															+
														
 
															+* To create a new bitmap, enabled, on the drive with id=drive0:
														
 
															+
														
 
															+```json
														
 
															+{ "execute": "block-dirty-bitmap-add",
														
 
															+  "arguments": {
														
 
															+    "node": "drive0",
														
 
															+    "name": "bitmap0"
														
 
															+  }
														
 
															+}
														
 
															+```
														
 
															+
														
 
															+* This bitmap will have a default granularity that matches the cluster size of
														
 
															+  its associated drive, if available, clamped to between [4KiB, 64KiB].
														
 
															+  The current default for qcow2 is 64KiB.
														
 
															+
														
 
															+* To create a new bitmap that tracks changes in 32KiB segments:
														
 
															+
														
 
															+```json
														
 
															+{ "execute": "block-dirty-bitmap-add",
														
 
															+  "arguments": {
														
 
															+    "node": "drive0",
														
 
															+    "name": "bitmap0",
														
 
															+    "granularity": 32768
														
 
															+  }
														
 
															+}
														
 
															+```
														
 
															+
														
 
															+### Deletion
														
 
															+
														
 
															+* Bitmaps that are frozen cannot be deleted.
														
 
															+
														
 
															+* Deleting the bitmap does not impact any other bitmaps attached to the same
														
 
															+  node, nor does it affect any backups already created from this node.
														
 
															+
														
 
															+* Because bitmaps are only unique to the node to which they are attached,
														
 
															+  you must specify the node/drive name here, too.
														
 
															+
														
 
															+```json
														
 
															+{ "execute": "block-dirty-bitmap-remove",
														
 
															+  "arguments": {
														
 
															+    "node": "drive0",
														
 
															+    "name": "bitmap0"
														
 
															+  }
														
 
															+}
														
 
															+```
														
 
															+
														
 
															+### Resetting
														
 
															+
														
 
															+* Resetting a bitmap will clear all information it holds.
														
 
															+
														
 
															+* An incremental backup created from an empty bitmap will copy no data,
														
 
															+  as if nothing has changed.
														
 
															+
														
 
															+```json
														
 
															+{ "execute": "block-dirty-bitmap-clear",
														
 
															+  "arguments": {
														
 
															+    "node": "drive0",
														
 
															+    "name": "bitmap0"
														
 
															+  }
														
 
															+}
														
 
															+```
														
 
															+
														
 
															+## Transactions (Not yet implemented)
														
 
															+
														
 
															+* Transactional commands are forthcoming in a future version,
														
 
															+  and are not yet available for use. This section serves as
														
 
															+  documentation of intent for their design and usage.
														
 
															+
														
 
															+### Justification
														
 
															+
														
 
															+Bitmaps can be safely modified when the VM is paused or halted by using
														
 
															+the basic QMP commands. For instance, you might perform the following actions:
														
 
															+
														
 
															+1. Boot the VM in a paused state.
														
 
															+2. Create a full drive backup of drive0.
														
 
															+3. Create a new bitmap attached to drive0.
														
 
															+4. Resume execution of the VM.
														
 
															+5. Incremental backups are ready to be created.
														
 
															+
														
 
															+At this point, the bitmap and drive backup would be correctly in sync,
														
 
															+and incremental backups made from this point forward would be correctly aligned
														
 
															+to the full drive backup.
														
 
															+
														
 
															+This is not particularly useful if we decide we want to start incremental
														
 
															+backups after the VM has been running for a while, for which we will need to
														
 
															+perform actions such as the following:
														
 
															+
														
 
															+1. Boot the VM and begin execution.
														
 
															+2. Using a single transaction, perform the following operations:
														
 
															+    * Create bitmap0.
														
 
															+    * Create a full drive backup of drive0.
														
 
															+3. Incremental backups are now ready to be created.
														
 
															+
														
 
															+### Supported Bitmap Transactions
														
 
															+
														
 
															+* block-dirty-bitmap-add
														
 
															+* block-dirty-bitmap-clear
														
 
															+
														
 
															+The usages are identical to their respective QMP commands, but see below
														
 
															+for examples.
														
 
															+
														
 
															+### Example: New Incremental Backup
														
 
															+
														
 
															+As outlined in the justification, perhaps we want to create a new incremental
														
 
															+backup chain attached to a drive.
														
 
															+
														
 
															+```json
														
 
															+{ "execute": "transaction",
														
 
															+  "arguments": {
														
 
															+    "actions": [
														
 
															+      {"type": "block-dirty-bitmap-add",
														
 
															+       "data": {"node": "drive0", "name": "bitmap0"} },
														
 
															+      {"type": "drive-backup",
														
 
															+       "data": {"device": "drive0", "target": "/path/to/full_backup.img",
														
 
															+                "sync": "full", "format": "qcow2"} }
														
 
															+    ]
														
 
															+  }
														
 
															+}
														
 
															+```
														
 
															+
														
 
															+### Example: New Incremental Backup Anchor Point
														
 
															+
														
 
															+Maybe we just want to create a new full backup with an existing bitmap and
														
 
															+want to reset the bitmap to track the new chain.
														
 
															+
														
 
															+```json
														
 
															+{ "execute": "transaction",
														
 
															+  "arguments": {
														
 
															+    "actions": [
														
 
															+      {"type": "block-dirty-bitmap-clear",
														
 
															+       "data": {"node": "drive0", "name": "bitmap0"} },
														
 
															+      {"type": "drive-backup",
														
 
															+       "data": {"device": "drive0", "target": "/path/to/new_full_backup.img",
														
 
															+                "sync": "full", "format": "qcow2"} }
														
 
															+    ]
														
 
															+  }
														
 
															+}
														
 
															+```
														
 
															+
														
 
															+## Incremental Backups
														
 
															+
														
 
															+The star of the show.
														
 
															+
														
 
															+**Nota Bene!** Only incremental backups of entire drives are supported for now.
														
 
															+So despite the fact that you can attach a bitmap to any arbitrary node, they are
														
 
															+only currently useful when attached to the root node. This is because
														
 
															+drive-backup only supports drives/devices instead of arbitrary nodes.
														
 
															+
														
 
															+### Example: First Incremental Backup
														
 
															+
														
 
															+1. Create a full backup and sync it to the dirty bitmap, as in the transactional
														
 
															+examples above; or with the VM offline, manually create a full copy and then
														
 
															+create a new bitmap before the VM begins execution.
														
 
															+
														
 
															+    * Let's assume the full backup is named 'full_backup.img'.
														
 
															+    * Let's assume the bitmap you created is 'bitmap0' attached to 'drive0'.
														
 
															+
														
 
															+2. Create a destination image for the incremental backup that utilizes the
														
 
															+full backup as a backing image.
														
 
															+
														
 
															+    * Let's assume it is named 'incremental.0.img'.
														
 
															+
														
 
															+    ```sh
														
 
															+    # qemu-img create -f qcow2 incremental.0.img -b full_backup.img -F qcow2
														
 
															+    ```
														
 
															+
														
 
															+3. Issue the incremental backup command:
														
 
															+
														
 
															+    ```json
														
 
															+    { "execute": "drive-backup",
														
 
															+      "arguments": {
														
 
															+        "device": "drive0",
														
 
															+        "bitmap": "bitmap0",
														
 
															+        "target": "incremental.0.img",
														
 
															+        "format": "qcow2",
														
 
															+        "sync": "dirty-bitmap",
														
 
															+        "mode": "existing"
														
 
															+      }
														
 
															+    }
														
 
															+    ```
														
 
															+
														
 
															+### Example: Second Incremental Backup
														
 
															+
														
 
															+1. Create a new destination image for the incremental backup that points to the
														
 
															+   previous one, e.g.: 'incremental.1.img'
														
 
															+
														
 
															+    ```sh
														
 
															+    # qemu-img create -f qcow2 incremental.1.img -b incremental.0.img -F qcow2
														
 
															+    ```
														
 
															+
														
 
															+2. Issue a new incremental backup command. The only difference here is that we
														
 
															+   have changed the target image below.
														
 
															+
														
 
															+    ```json
														
 
															+    { "execute": "drive-backup",
														
 
															+      "arguments": {
														
 
															+        "device": "drive0",
														
 
															+        "bitmap": "bitmap0",
														
 
															+        "target": "incremental.1.img",
														
 
															+        "format": "qcow2",
														
 
															+        "sync": "dirty-bitmap",
														
 
															+        "mode": "existing"
														
 
															+      }
														
 
															+    }
														
 
															+    ```
														
 
															+
														
 
															+## Errors
														
 
															+
														
 
															+* In the event of an error that occurs after a backup job is successfully
														
 
															+  launched, either by a direct QMP command or a QMP transaction, the user
														
 
															+  will receive a BLOCK_JOB_COMPLETE event with a failure message, accompanied
														
 
															+  by a BLOCK_JOB_ERROR event.
														
 
															+
														
 
															+* In the case of an event being cancelled, the user will receive a
														
 
															+  BLOCK_JOB_CANCELLED event instead of a pair of COMPLETE and ERROR events.
														
 
															+
														
 
															+* In either case, the incremental backup data contained within the bitmap is
														
 
															+  safely rolled back, and the data within the bitmap is not lost. The image
														
 
															+  file created for the failed attempt can be safely deleted.
														
 
															+
														
 
															+* Once the underlying problem is fixed (e.g. more storage space is freed up),
														
 
															+  you can simply retry the incremental backup command with the same bitmap.
														
 
															+
														
 
															+### Example
														
 
															+
														
 
															+1. Create a target image:
														
 
															+
														
 
															+    ```sh
														
 
															+    # qemu-img create -f qcow2 incremental.0.img -b full_backup.img -F qcow2
														
 
															+    ```
														
 
															+
														
 
															+2. Attempt to create an incremental backup via QMP:
														
 
															+
														
 
															+    ```json
														
 
															+    { "execute": "drive-backup",
														
 
															+      "arguments": {
														
 
															+        "device": "drive0",
														
 
															+        "bitmap": "bitmap0",
														
 
															+        "target": "incremental.0.img",
														
 
															+        "format": "qcow2",
														
 
															+        "sync": "dirty-bitmap",
														
 
															+        "mode": "existing"
														
 
															+      }
														
 
															+    }
														
 
															+    ```
														
 
															+
														
 
															+3. Receive an event notifying us of failure:
														
 
															+
														
 
															+    ```json
														
 
															+    { "timestamp": { "seconds": 1424709442, "microseconds": 844524 },
														
 
															+      "data": { "speed": 0, "offset": 0, "len": 67108864,
														
 
															+                "error": "No space left on device",
														
 
															+                "device": "drive1", "type": "backup" },
														
 
															+      "event": "BLOCK_JOB_COMPLETED" }
														
 
															+    ```
														
 
															+
														
 
															+4. Delete the failed incremental, and re-create the image.
														
 
															+
														
 
															+    ```sh
														
 
															+    # rm incremental.0.img
														
 
															+    # qemu-img create -f qcow2 incremental.0.img -b full_backup.img -F qcow2
														
 
															+    ```
														
 
															+
														
 
															+5. Retry the command after fixing the underlying problem,
														
 
															+   such as freeing up space on the backup volume:
														
 
															+
														
 
															+    ```json
														
 
															+    { "execute": "drive-backup",
														
 
															+      "arguments": {
														
 
															+        "device": "drive0",
														
 
															+        "bitmap": "bitmap0",
														
 
															+        "target": "incremental.0.img",
														
 
															+        "format": "qcow2",
														
 
															+        "sync": "dirty-bitmap",
														
 
															+        "mode": "existing"
														
 
															+      }
														
 
															+    }
														
 
															+    ```
														
 
															+
														
 
															+6. Receive confirmation that the job completed successfully:
														
 
															+
														
 
															+    ```json
														
 
															+    { "timestamp": { "seconds": 1424709668, "microseconds": 526525 },
														
 
															+      "data": { "device": "drive1", "type": "backup",
														
 
															+                "speed": 0, "len": 67108864, "offset": 67108864},
														
 
															+      "event": "BLOCK_JOB_COMPLETED" }
														
 
															+    ```
														
 
															+
														
 
															+<!--
														
 
															+The FreeBSD Documentation License
														
 
															+
														
 
															+Redistribution and use in source (Markdown) and 'compiled' forms (SGML, HTML,
														
 
															+PDF, PostScript, RTF and so forth) with or without modification, are permitted
														
 
															+provided that the following conditions are met:
														
 
															+
														
 
															+Redistributions of source code (Markdown) must retain the above copyright
														
 
															+notice, this list of conditions and the following disclaimer of this file
														
 
															+unmodified.
														
 
															+
														
 
															+Redistributions in compiled form (transformed to other DTDs, converted to PDF,
														
 
															+PostScript, RTF and other formats) must reproduce the above copyright notice,
														
 
															+this list of conditions and the following disclaimer in the documentation and/or
														
 
															+other materials provided with the distribution.
														
 
															+
														
 
															+THIS DOCUMENTATION IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
														
 
															+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
														
 
															+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR  PURPOSE ARE
														
 
															+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS  BE LIABLE
														
 
															+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
														
 
															+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
														
 
															+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
														
 
															+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
														
 
															+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
														
 
															+THIS DOCUMENTATION, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
														
 
															+-->
														
--- a/docs/qmp/qmp-events.txt
+++ b/docs/qmp/qmp-events.txt
@@ -31,21 +31,26 @@ Example:
 
															 BLOCK_IMAGE_CORRUPTED
														
 
															 ---------------------
														
 
															-Emitted when a disk image is being marked corrupt.
														
 
															+Emitted when a disk image is being marked corrupt. The image can be
														
 
															+identified by its device or node name. The 'device' field is always
														
 
															+present for compatibility reasons, but it can be empty ("") if the
														
 
															+image does not have a device name associated.
														
 
															 Data:
														
 
															-- "device": Device name (json-string)
														
 
															-- "msg":    Informative message (e.g., reason for the corruption) (json-string)
														
 
															-- "offset": If the corruption resulted from an image access, this is the access
														
 
															-            offset into the image (json-int)
														
 
															-- "size":   If the corruption resulted from an image access, this is the access
														
 
															-            size (json-int)
														
 
															+- "device":    Device name (json-string)
														
 
															+- "node-name": Node name (json-string, optional)
														
 
															+- "msg":       Informative message (e.g., reason for the corruption)
														
 
															+               (json-string)
														
 
															+- "offset":    If the corruption resulted from an image access, this
														
 
															+               is the access offset into the image (json-int)
														
 
															+- "size":      If the corruption resulted from an image access, this
														
 
															+               is the access size (json-int)
														
 
															 Example:
														
 
															 { "event": "BLOCK_IMAGE_CORRUPTED",
														
 
															-    "data": { "device": "ide0-hd0",
														
 
															+    "data": { "device": "ide0-hd0", "node-name": "node0",
														
 
															         "msg": "Prevented active L1 table overwrite", "offset": 196608,
														
 
															         "size": 65536 },
														
 
															     "timestamp": { "seconds": 1378126126, "microseconds": 966463 } }
														
--- a/hmp.c
+++ b/hmp.c
@@ -391,8 +391,7 @@ static void print_block_info(Monitor *mon, BlockInfo *info,
 
															                         inserted->iops_size);
														
 
															     }
														
 
															-    /* TODO: inserted->image should never be null */
														
 
															-    if (verbose && inserted->image) {
														
 
															+    if (verbose) {
														
 
															         monitor_printf(mon, "\nImages:\n");
														
 
															         image_info = inserted->image;
														
 
															         while (1) {
														
@@ -1062,7 +1061,8 @@ void hmp_drive_backup(Monitor *mon, const QDict *qdict)
 
															     qmp_drive_backup(device, filename, !!format, format,
														
 
															                      full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP,
														
 
															-                     true, mode, false, 0, false, 0, false, 0, &err);
														
 
															+                     true, mode, false, 0, false, NULL,
														
 
															+                     false, 0, false, 0, &err);
														
 
															     hmp_handle_error(mon, &err);
														
 
															 }
														
--- a/hw/acpi/pcihp.c
+++ b/hw/acpi/pcihp.c
@@ -120,7 +120,7 @@ static bool acpi_pcihp_pc_no_hotplug(AcpiPciHpState *s, PCIDevice *dev)
 
															 static void acpi_pcihp_eject_slot(AcpiPciHpState *s, unsigned bsel, unsigned slots)
														
 
															 {
														
 
															     BusChild *kid, *next;
														
 
															-    int slot = ffs(slots) - 1;
														
 
															+    int slot = ctz32(slots);
														
 
															     PCIBus *bus = acpi_pcihp_find_hotplug_bus(s, bsel);
														
 
															     if (!bus) {
														
--- a/hw/arm/nseries.c
+++ b/hw/arm/nseries.c
@@ -579,7 +579,10 @@ static uint32_t mipid_txrx(void *opaque, uint32_t cmd, int len)
 
															     case 0x26:	/* GAMSET */
														
 
															         if (!s->pm) {
														
 
															-            s->gamma = ffs(s->param[0] & 0xf) - 1;
														
 
															+            s->gamma = ctz32(s->param[0] & 0xf);
														
 
															+            if (s->gamma == 32) {
														
 
															+                s->gamma = -1; /* XXX: should this be 0? */
														
 
															+            }
														
 
															         } else if (s->pm < 0) {
														
 
															             s->pm = 1;
														
 
															         }
														
--- a/hw/arm/omap1.c
+++ b/hw/arm/omap1.c
@@ -2004,8 +2004,7 @@ static void omap_mpuio_write(void *opaque, hwaddr addr,
 
															     case 0x04:	/* OUTPUT_REG */
														
 
															         diff = (s->outputs ^ value) & ~s->dir;
														
 
															         s->outputs = value;
														
 
															-        while ((ln = ffs(diff))) {
														
 
															-            ln --;
														
 
															+        while ((ln = ctz32(diff)) != 32) {
														
 
															             if (s->handler[ln])
														
 
															                 qemu_set_irq(s->handler[ln], (value >> ln) & 1);
														
 
															             diff &= ~(1 << ln);
														
@@ -2017,8 +2016,7 @@ static void omap_mpuio_write(void *opaque, hwaddr addr,
 
															         s->dir = value;
														
 
															         value = s->outputs & ~s->dir;
														
 
															-        while ((ln = ffs(diff))) {
														
 
															-            ln --;
														
 
															+        while ((ln = ctz32(diff)) != 32) {
														
 
															             if (s->handler[ln])
														
 
															                 qemu_set_irq(s->handler[ln], (value >> ln) & 1);
														
 
															             diff &= ~(1 << ln);
														
--- a/hw/arm/pxa2xx_gpio.c
+++ b/hw/arm/pxa2xx_gpio.c
@@ -137,7 +137,7 @@ static void pxa2xx_gpio_handler_update(PXA2xxGPIOInfo *s) {
 
															         level = s->olevel[i] & s->dir[i];
														
 
															         for (diff = s->prev_level[i] ^ level; diff; diff ^= 1 << bit) {
														
 
															-            bit = ffs(diff) - 1;
														
 
															+            bit = ctz32(diff);
														
 
															             line = bit + 32 * i;
														
 
															             qemu_set_irq(s->handler[line], (level >> bit) & 1);
														
 
															         }
														
--- a/hw/arm/strongarm.c
+++ b/hw/arm/strongarm.c
@@ -528,7 +528,7 @@ static void strongarm_gpio_handler_update(StrongARMGPIOInfo *s)
 
															     level = s->olevel & s->dir;
														
 
															     for (diff = s->prev_level ^ level; diff; diff ^= 1 << bit) {
														
 
															-        bit = ffs(diff) - 1;
														
 
															+        bit = ctz32(diff);
														
 
															         qemu_set_irq(s->handler[bit], (level >> bit) & 1);
														
 
															     }
														
@@ -745,7 +745,7 @@ static void strongarm_ppc_handler_update(StrongARMPPCInfo *s)
 
															     level = s->olevel & s->dir;
														
 
															     for (diff = s->prev_level ^ level; diff; diff ^= 1 << bit) {
														
 
															-        bit = ffs(diff) - 1;
														
 
															+        bit = ctz32(diff);
														
 
															         qemu_set_irq(s->handler[bit], (level >> bit) & 1);
														
 
															     }
														
--- a/hw/block/m25p80.c
+++ b/hw/block/m25p80.c
@@ -621,7 +621,6 @@ static int m25p80_init(SSISlave *ss)
 
															     s->size = s->pi->sector_size * s->pi->n_sectors;
														
 
															     s->dirty_page = -1;
														
 
															-    s->storage = blk_blockalign(s->blk, s->size);
														
 
															     /* FIXME use a qdev drive property instead of drive_get_next() */
														
 
															     dinfo = drive_get_next(IF_MTD);
														
@@ -629,6 +628,9 @@ static int m25p80_init(SSISlave *ss)
 
															     if (dinfo) {
														
 
															         DB_PRINT_L(0, "Binding to IF_MTD drive\n");
														
 
															         s->blk = blk_by_legacy_dinfo(dinfo);
														
 
															+        blk_attach_dev_nofail(s->blk, s);
														
 
															+
														
 
															+        s->storage = blk_blockalign(s->blk, s->size);
														
 
															         /* FIXME: Move to late init */
														
 
															         if (blk_read(s->blk, 0, s->storage,
														
@@ -638,6 +640,7 @@ static int m25p80_init(SSISlave *ss)
 
															         }
														
 
															     } else {
														
 
															         DB_PRINT_L(0, "No BDRV - binding to RAM\n");
														
 
															+        s->storage = blk_blockalign(NULL, s->size);
														
 
															         memset(s->storage, 0xFF, s->size);
														
 
															     }
														
--- a/hw/bt/sdp.c
+++ b/hw/bt/sdp.c
@@ -707,7 +707,7 @@ static void sdp_service_record_build(struct sdp_service_record_s *record,
 
															         len += sdp_attr_max_size(&def->attributes[record->attributes ++].data,
														
 
															                         &record->uuids);
														
 
															     }
														
 
															-    record->uuids = 1 << ffs(record->uuids - 1);
														
 
															+    record->uuids = pow2ceil(record->uuids);
														
 
															     record->attribute_list =
														
 
															             g_malloc0(record->attributes * sizeof(*record->attribute_list));
														
 
															     record->uuid =
														
--- a/hw/char/virtio-serial-bus.c
+++ b/hw/char/virtio-serial-bus.c
@@ -814,12 +814,12 @@ static uint32_t find_free_port_id(VirtIOSerial *vser)
 
															     max_nr_ports = vser->serial.max_virtserial_ports;
														
 
															     for (i = 0; i < (max_nr_ports + 31) / 32; i++) {
														
 
															-        uint32_t map, bit;
														
 
															+        uint32_t map, zeroes;
														
 
															         map = vser->ports_map[i];
														
 
															-        bit = ffs(~map);
														
 
															-        if (bit) {
														
 
															-            return (bit - 1) + i * 32;
														
 
															+        zeroes = ctz32(~map);
														
 
															+        if (zeroes != 32) {
														
 
															+            return zeroes + i * 32;
														
 
															         }
														
 
															     }
														
 
															     return VIRTIO_CONSOLE_BAD_ID;
														
--- a/hw/display/tc6393xb.c
+++ b/hw/display/tc6393xb.c
@@ -171,7 +171,7 @@ static void tc6393xb_gpio_handler_update(TC6393xbState *s)
 
															     level = s->gpio_level & s->gpio_dir;
														
 
															     for (diff = s->prev_level ^ level; diff; diff ^= 1 << bit) {
														
 
															-        bit = ffs(diff) - 1;
														
 
															+        bit = ctz32(diff);
														
 
															         qemu_set_irq(s->handler[bit], (level >> bit) & 1);
														
 
															     }
														
--- a/hw/gpio/max7310.c
+++ b/hw/gpio/max7310.c
@@ -96,7 +96,7 @@ static int max7310_tx(I2CSlave *i2c, uint8_t data)
 
															     case 0x01:	/* Output port */
														
 
															         for (diff = (data ^ s->level) & ~s->direction; diff;
														
 
															                         diff &= ~(1 << line)) {
														
 
															-            line = ffs(diff) - 1;
														
 
															+            line = ctz32(diff);
														
 
															             if (s->handler[line])
														
 
															                 qemu_set_irq(s->handler[line], (data >> line) & 1);
														
 
															         }
														
--- a/hw/gpio/omap_gpio.c
+++ b/hw/gpio/omap_gpio.c
@@ -125,8 +125,7 @@ static void omap_gpio_write(void *opaque, hwaddr addr,
 
															     case 0x04:	/* DATA_OUTPUT */
														
 
															         diff = (s->outputs ^ value) & ~s->dir;
														
 
															         s->outputs = value;
														
 
															-        while ((ln = ffs(diff))) {
														
 
															-            ln --;
														
 
															+        while ((ln = ctz32(diff)) != 32) {
														
 
															             if (s->handler[ln])
														
 
															                 qemu_set_irq(s->handler[ln], (value >> ln) & 1);
														
 
															             diff &= ~(1 << ln);
														
@@ -138,8 +137,7 @@ static void omap_gpio_write(void *opaque, hwaddr addr,
 
															         s->dir = value;
														
 
															         value = s->outputs & ~s->dir;
														
 
															-        while ((ln = ffs(diff))) {
														
 
															-            ln --;
														
 
															+        while ((ln = ctz32(diff)) != 32) {
														
 
															             if (s->handler[ln])
														
 
															                 qemu_set_irq(s->handler[ln], (value >> ln) & 1);
														
 
															             diff &= ~(1 << ln);
														
@@ -253,8 +251,7 @@ static inline void omap2_gpio_module_out_update(struct omap2_gpio_s *s,
 
															     s->outputs ^= diff;
														
 
															     diff &= ~s->dir;
														
 
															-    while ((ln = ffs(diff))) {
														
 
															-        ln --;
														
 
															+    while ((ln = ctz32(diff)) != 32) {
														
 
															         qemu_set_irq(s->handler[ln], (s->outputs >> ln) & 1);
														
 
															         diff &= ~(1 << ln);
														
 
															     }
														
@@ -442,8 +439,8 @@ static void omap2_gpio_module_write(void *opaque, hwaddr addr,
 
															         s->dir = value;
														
 
															         value = s->outputs & ~s->dir;
														
 
															-        while ((ln = ffs(diff))) {
														
 
															-            diff &= ~(1 <<-- ln);
														
 
															+        while ((ln = ctz32(diff)) != 32) {
														
 
															+            diff &= ~(1 << ln);
														
 
															             qemu_set_irq(s->handler[ln], (value >> ln) & 1);
														
 
															         }
														
--- a/hw/gpio/zaurus.c
+++ b/hw/gpio/zaurus.c
@@ -65,7 +65,7 @@ static inline void scoop_gpio_handler_update(ScoopInfo *s) {
 
															     level = s->gpio_level & s->gpio_dir;
														
 
															     for (diff = s->prev_level ^ level; diff; diff ^= 1 << bit) {
														
 
															-        bit = ffs(diff) - 1;
														
 
															+        bit = ctz32(diff);
														
 
															         qemu_set_irq(s->handler[bit], (level >> bit) & 1);
														
 
															     }
														
--- a/hw/i2c/omap_i2c.c
+++ b/hw/i2c/omap_i2c.c
@@ -171,9 +171,13 @@ static uint32_t omap_i2c_read(void *opaque, hwaddr addr)
 
															     case 0x0c:	/* I2C_IV */
														
 
															         if (s->revision >= OMAP2_INTR_REV)
														
 
															             break;
														
 
															-        ret = ffs(s->stat & s->mask);
														
 
															-        if (ret)
														
 
															-            s->stat ^= 1 << (ret - 1);
														
 
															+        ret = ctz32(s->stat & s->mask);
														
 
															+        if (ret != 32) {
														
 
															+            s->stat ^= 1 << ret;
														
 
															+            ret++;
														
 
															+        } else {
														
 
															+            ret = 0;
														
 
															+        }
														
 
															         omap_i2c_interrupts_update(s);
														
 
															         return ret;
														
--- a/hw/intc/allwinner-a10-pic.c
+++ b/hw/intc/allwinner-a10-pic.c
@@ -23,7 +23,7 @@
 
															 static void aw_a10_pic_update(AwA10PICState *s)
														
 
															 {
														
 
															     uint8_t i;
														
 
															-    int irq = 0, fiq = 0, pending;
														
 
															+    int irq = 0, fiq = 0, zeroes;
														
 
															     s->vector = 0;
														
@@ -32,9 +32,9 @@ static void aw_a10_pic_update(AwA10PICState *s)
 
															         fiq |= s->select[i] & s->irq_pending[i] & ~s->mask[i];
														
 
															         if (!s->vector) {
														
 
															-            pending = ffs(s->irq_pending[i] & ~s->mask[i]);
														
 
															-            if (pending) {
														
 
															-                s->vector = (i * 32 + pending - 1) * 4;
														
 
															+            zeroes = ctz32(s->irq_pending[i] & ~s->mask[i]);
														
 
															+            if (zeroes != 32) {
														
 
															+                s->vector = (i * 32 + zeroes) * 4;
														
 
															             }
														
 
															         }
														
 
															     }
														
--- a/hw/intc/omap_intc.c
+++ b/hw/intc/omap_intc.c
@@ -60,7 +60,7 @@ struct omap_intr_handler_s {
 
															 static void omap_inth_sir_update(struct omap_intr_handler_s *s, int is_fiq)
														
 
															 {
														
 
															-    int i, j, sir_intr, p_intr, p, f;
														
 
															+    int i, j, sir_intr, p_intr, p;
														
 
															     uint32_t level;
														
 
															     sir_intr = 0;
														
 
															     p_intr = 255;
														
@@ -72,14 +72,15 @@ static void omap_inth_sir_update(struct omap_intr_handler_s *s, int is_fiq)
 
															     for (j = 0; j < s->nbanks; ++j) {
														
 
															         level = s->bank[j].irqs & ~s->bank[j].mask &
														
 
															                 (is_fiq ? s->bank[j].fiq : ~s->bank[j].fiq);
														
 
															-        for (f = ffs(level), i = f - 1, level >>= f - 1; f; i += f,
														
 
															-                        level >>= f) {
														
 
															+
														
 
															+        while (level != 0) {
														
 
															+            i = ctz32(level);
														
 
															             p = s->bank[j].priority[i];
														
 
															             if (p <= p_intr) {
														
 
															                 p_intr = p;
														
 
															                 sir_intr = 32 * j + i;
														
 
															             }
														
 
															-            f = ffs(level >> 1);
														
 
															+            level &= level - 1;
														
 
															         }
														
 
															     }
														
 
															     s->sir_intr[is_fiq] = sir_intr;
														
--- a/hw/pci-host/bonito.c
+++ b/hw/pci-host/bonito.c
@@ -427,7 +427,7 @@ static uint32_t bonito_sbridge_pciaddr(void *opaque, hwaddr addr)
 
															     cfgaddr |= (s->regs[BONITO_PCIMAP_CFG] & 0xffff) << 16;
														
 
															     idsel = (cfgaddr & BONITO_PCICONF_IDSEL_MASK) >> BONITO_PCICONF_IDSEL_OFFSET;
														
 
															-    devno = ffs(idsel) - 1;
														
 
															+    devno = ctz32(idsel);
														
 
															     funno = (cfgaddr & BONITO_PCICONF_FUN_MASK) >> BONITO_PCICONF_FUN_OFFSET;
														
 
															     regno = (cfgaddr & BONITO_PCICONF_REG_MASK) >> BONITO_PCICONF_REG_OFFSET;
														
--- a/hw/pci-host/uninorth.c
+++ b/hw/pci-host/uninorth.c
@@ -92,7 +92,10 @@ static uint32_t unin_get_config_reg(uint32_t reg, uint32_t addr)
 
															         uint32_t slot, func;
														
 
															         /* Grab CFA0 style values */
														
 
															-        slot = ffs(reg & 0xfffff800) - 1;
														
 
															+        slot = ctz32(reg & 0xfffff800);
														
 
															+        if (slot == 32) {
														
 
															+            slot = -1; /* XXX: should this be 0? */
														
 
															+        }
														
 
															         func = (reg >> 8) & 7;
														
 
															         /* ... and then convert them to x86 format */
														
--- a/hw/pci/msi.c
+++ b/hw/pci/msi.c
@@ -72,7 +72,7 @@ static inline uint8_t msi_cap_sizeof(uint16_t flags)
 
															 static inline unsigned int msi_nr_vectors(uint16_t flags)
														
 
															 {
														
 
															     return 1U <<
														
 
															-        ((flags & PCI_MSI_FLAGS_QSIZE) >> (ffs(PCI_MSI_FLAGS_QSIZE) - 1));
														
 
															+        ((flags & PCI_MSI_FLAGS_QSIZE) >> ctz32(PCI_MSI_FLAGS_QSIZE));
														
 
															 }
														
 
															 static inline uint8_t msi_flags_off(const PCIDevice* dev)
														
@@ -175,9 +175,9 @@ int msi_init(struct PCIDevice *dev, uint8_t offset,
 
															     assert(nr_vectors > 0);
														
 
															     assert(nr_vectors <= PCI_MSI_VECTORS_MAX);
														
 
															     /* the nr of MSI vectors is up to 32 */
														
 
															-    vectors_order = ffs(nr_vectors) - 1;
														
 
															+    vectors_order = ctz32(nr_vectors);
														
 
															-    flags = vectors_order << (ffs(PCI_MSI_FLAGS_QMASK) - 1);
														
 
															+    flags = vectors_order << ctz32(PCI_MSI_FLAGS_QMASK);
														
 
															     if (msi64bit) {
														
 
															         flags |= PCI_MSI_FLAGS_64BIT;
														
 
															     }
														
@@ -355,12 +355,12 @@ void msi_write_config(PCIDevice *dev, uint32_t addr, uint32_t val, int len)
 
															      * just don't crash the host
														
 
															      */
														
 
															     log_num_vecs =
														
 
															-        (flags & PCI_MSI_FLAGS_QSIZE) >> (ffs(PCI_MSI_FLAGS_QSIZE) - 1);
														
 
															+        (flags & PCI_MSI_FLAGS_QSIZE) >> ctz32(PCI_MSI_FLAGS_QSIZE);
														
 
															     log_max_vecs =
														
 
															-        (flags & PCI_MSI_FLAGS_QMASK) >> (ffs(PCI_MSI_FLAGS_QMASK) - 1);
														
 
															+        (flags & PCI_MSI_FLAGS_QMASK) >> ctz32(PCI_MSI_FLAGS_QMASK);
														
 
															     if (log_num_vecs > log_max_vecs) {
														
 
															         flags &= ~PCI_MSI_FLAGS_QSIZE;
														
 
															-        flags |= log_max_vecs << (ffs(PCI_MSI_FLAGS_QSIZE) - 1);
														
 
															+        flags |= log_max_vecs << ctz32(PCI_MSI_FLAGS_QSIZE);
														
 
															         pci_set_word(dev->config + msi_flags_off(dev), flags);
														
 
															     }
														
--- a/hw/pci/pcie_aer.c
+++ b/hw/pci/pcie_aer.c
@@ -410,7 +410,7 @@ static void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg)
 
															 static void pcie_aer_update_log(PCIDevice *dev, const PCIEAERErr *err)
														
 
															 {
														
 
															     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
														
 
															-    uint8_t first_bit = ffs(err->status) - 1;
														
 
															+    uint8_t first_bit = ctz32(err->status);
														
 
															     uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
														
 
															     int i;
														
--- a/hw/pci/shpc.c
+++ b/hw/pci/shpc.c
@@ -61,7 +61,7 @@
 
															 /* Same slot state masks are used for command and status registers */
														
 
															 #define SHPC_SLOT_STATE_MASK     0x03
														
 
															 #define SHPC_SLOT_STATE_SHIFT \
														
 
															-    (ffs(SHPC_SLOT_STATE_MASK) - 1)
														
 
															+    ctz32(SHPC_SLOT_STATE_MASK)
														
 
															 #define SHPC_STATE_NO       0x0
														
 
															 #define SHPC_STATE_PWRONLY  0x1
														
@@ -70,10 +70,10 @@
 
															 #define SHPC_SLOT_PWR_LED_MASK   0xC
														
 
															 #define SHPC_SLOT_PWR_LED_SHIFT \
														
 
															-    (ffs(SHPC_SLOT_PWR_LED_MASK) - 1)
														
 
															+    ctz32(SHPC_SLOT_PWR_LED_MASK)
														
 
															 #define SHPC_SLOT_ATTN_LED_MASK  0x30
														
 
															 #define SHPC_SLOT_ATTN_LED_SHIFT \
														
 
															-    (ffs(SHPC_SLOT_ATTN_LED_MASK) - 1)
														
 
															+    ctz32(SHPC_SLOT_ATTN_LED_MASK)
														
 
															 #define SHPC_LED_NO     0x0
														
 
															 #define SHPC_LED_ON     0x1
														
@@ -136,7 +136,7 @@ static int roundup_pow_of_two(int x)
 
															 static uint16_t shpc_get_status(SHPCDevice *shpc, int slot, uint16_t msk)
														
 
															 {
														
 
															     uint8_t *status = shpc->config + SHPC_SLOT_STATUS(slot);
														
 
															-    return (pci_get_word(status) & msk) >> (ffs(msk) - 1);
														
 
															+    return (pci_get_word(status) & msk) >> ctz32(msk);
														
 
															 }
														
 
															 static void shpc_set_status(SHPCDevice *shpc,
														
@@ -144,7 +144,7 @@ static void shpc_set_status(SHPCDevice *shpc,
 
															 {
														
 
															     uint8_t *status = shpc->config + SHPC_SLOT_STATUS(slot);
														
 
															     pci_word_test_and_clear_mask(status, msk);
														
 
															-    pci_word_test_and_set_mask(status, value << (ffs(msk) - 1));
														
 
															+    pci_word_test_and_set_mask(status, value << ctz32(msk));
														
 
															 }
														
 
															 static void shpc_interrupt_update(PCIDevice *d)
														
--- a/hw/pci/slotid_cap.c
+++ b/hw/pci/slotid_cap.c
@@ -3,7 +3,7 @@
 
															 #include "qemu/error-report.h"
														
 
															 #define SLOTID_CAP_LENGTH 4
														
 
															-#define SLOTID_NSLOTS_SHIFT (ffs(PCI_SID_ESR_NSLOTS) - 1)
														
 
															+#define SLOTID_NSLOTS_SHIFT ctz32(PCI_SID_ESR_NSLOTS)
														
 
															 int slotid_cap_init(PCIDevice *d, int nslots,
														
 
															                     uint8_t chassis,
														
--- a/hw/ppc/ppce500_spin.c
+++ b/hw/ppc/ppce500_spin.c
@@ -74,7 +74,7 @@ static void spin_reset(void *opaque)
 
															 /* Create -kernel TLB entries for BookE, linearly spanning 256MB.  */
														
 
															 static inline hwaddr booke206_page_size_to_tlb(uint64_t size)
														
 
															 {
														
 
															-    return (ffs(size >> 10) - 1) >> 1;
														
 
															+    return ctz32(size >> 10) >> 1;
														
 
															 }
														
 
															 static void mmubooke_create_initial_mapping(CPUPPCState *env,
														
--- a/hw/scsi/megasas.c
+++ b/hw/scsi/megasas.c
@@ -804,7 +804,7 @@ static int megasas_ctrl_get_info(MegasasState *s, MegasasCmd *cmd)
 
															                                MFI_INFO_LDOPS_READ_POLICY);
														
 
															     info.max_strips_per_io = cpu_to_le16(s->fw_sge);
														
 
															     info.stripe_sz_ops.min = 3;
														
 
															-    info.stripe_sz_ops.max = ffs(MEGASAS_MAX_SECTORS + 1) - 1;
														
 
															+    info.stripe_sz_ops.max = ctz32(MEGASAS_MAX_SECTORS + 1);
														
 
															     info.properties.pred_fail_poll_interval = cpu_to_le16(300);
														
 
															     info.properties.intr_throttle_cnt = cpu_to_le16(16);
														
 
															     info.properties.intr_throttle_timeout = cpu_to_le16(50);
														
--- a/hw/sd/sd.c
+++ b/hw/sd/sd.c
@@ -796,8 +796,9 @@ static sd_rsp_type_t sd_normal_command(SDState *sd,
 
															             sd->vhs = 0;
														
 
															             /* No response if not exactly one VHS bit is set.  */
														
 
															-            if (!(req.arg >> 8) || (req.arg >> ffs(req.arg & ~0xff)))
														
 
															+            if (!(req.arg >> 8) || (req.arg >> (ctz32(req.arg & ~0xff) + 1))) {
														
 
															                 return sd->spi ? sd_r7 : sd_r0;
														
 
															+            }
														
 
															             /* Accept.  */
														
 
															             sd->vhs = req.arg;
														
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -82,9 +82,6 @@ struct AioContext {
 
															     /* Used for aio_notify.  */
														
 
															     EventNotifier notifier;
														
 
															-    /* GPollFDs for aio_poll() */
														
 
															-    GArray *pollfds;
														
 
															-
														
 
															     /* Thread pool for performing work and receiving completion callbacks */
														
 
															     struct ThreadPool *thread_pool;
														
@@ -121,13 +118,14 @@ void aio_context_ref(AioContext *ctx);
 
															 void aio_context_unref(AioContext *ctx);
														
 
															 /* Take ownership of the AioContext.  If the AioContext will be shared between
														
 
															- * threads, a thread must have ownership when calling aio_poll().
														
 
															+ * threads, and a thread does not want to be interrupted, it will have to
														
 
															+ * take ownership around calls to aio_poll().  Otherwise, aio_poll()
														
 
															+ * automatically takes care of calling aio_context_acquire and
														
 
															+ * aio_context_release.
														
 
															  *
														
 
															- * Note that multiple threads calling aio_poll() means timers, BHs, and
														
 
															- * callbacks may be invoked from a different thread than they were registered
														
 
															- * from.  Therefore, code must use AioContext acquire/release or use
														
 
															- * fine-grained synchronization to protect shared state if other threads will
														
 
															- * be accessing it simultaneously.
														
 
															+ * Access to timers and BHs from a thread that has not acquired AioContext
														
 
															+ * is possible.  Access to callbacks for now must be done while the AioContext
														
 
															+ * is owned by the thread (FIXME).
														
 
															  */
														
 
															 void aio_context_acquire(AioContext *ctx);
														
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -382,7 +382,7 @@ void bdrv_lock_medium(BlockDriverState *bs, bool locked);
 
															 void bdrv_eject(BlockDriverState *bs, bool eject_flag);
														
 
															 const char *bdrv_get_format_name(BlockDriverState *bs);
														
 
															 BlockDriverState *bdrv_find_node(const char *node_name);
														
 
															-BlockDeviceInfoList *bdrv_named_nodes_list(void);
														
 
															+BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp);
														
 
															 BlockDriverState *bdrv_lookup_bs(const char *device,
														
 
															                                  const char *node_name,
														
 
															                                  Error **errp);
														
@@ -398,6 +398,7 @@ void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
 
															                          void *opaque);
														
 
															 const char *bdrv_get_node_name(const BlockDriverState *bs);
														
 
															 const char *bdrv_get_device_name(const BlockDriverState *bs);
														
 
															+const char *bdrv_get_device_or_node_name(const BlockDriverState *bs);
														
 
															 int bdrv_get_flags(BlockDriverState *bs);
														
 
															 int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
														
 
															                           const uint8_t *buf, int nb_sectors);
														
@@ -449,18 +450,39 @@ bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov);
 
															 struct HBitmapIter;
														
 
															 typedef struct BdrvDirtyBitmap BdrvDirtyBitmap;
														
 
															-BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity,
														
 
															+BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
														
 
															+                                          uint32_t granularity,
														
 
															+                                          const char *name,
														
 
															                                           Error **errp);
														
 
															+int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
														
 
															+                                       BdrvDirtyBitmap *bitmap,
														
 
															+                                       Error **errp);
														
 
															+BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
														
 
															+                                            BdrvDirtyBitmap *bitmap,
														
 
															+                                            Error **errp);
														
 
															+BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
														
 
															+                                           BdrvDirtyBitmap *bitmap,
														
 
															+                                           Error **errp);
														
 
															+BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs,
														
 
															+                                        const char *name);
														
 
															+void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap);
														
 
															 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap);
														
 
															+void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap);
														
 
															+void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap);
														
 
															 BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs);
														
 
															+uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs);
														
 
															+uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap);
														
 
															+bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap);
														
 
															+bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap);
														
 
															 int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector);
														
 
															-void bdrv_set_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
														
 
															+void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
														
 
															                            int64_t cur_sector, int nr_sectors);
														
 
															-void bdrv_reset_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
														
 
															+void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
														
 
															                              int64_t cur_sector, int nr_sectors);
														
 
															-void bdrv_dirty_iter_init(BlockDriverState *bs,
														
 
															-                          BdrvDirtyBitmap *bitmap, struct HBitmapIter *hbi);
														
 
															-int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap);
														
 
															+void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap);
														
 
															+void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, struct HBitmapIter *hbi);
														
 
															+void bdrv_set_dirty_iter(struct HBitmapIter *hbi, int64_t offset);
														
 
															+int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap);
														
 
															 void bdrv_enable_copy_on_read(BlockDriverState *bs);
														
 
															 void bdrv_disable_copy_on_read(BlockDriverState *bs);
														
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -439,6 +439,14 @@ extern BlockDriver bdrv_file;
 
															 extern BlockDriver bdrv_raw;
														
 
															 extern BlockDriver bdrv_qcow2;
														
 
															+/**
														
 
															+ * bdrv_setup_io_funcs:
														
 
															+ *
														
 
															+ * Prepare a #BlockDriver for I/O request processing by populating
														
 
															+ * unimplemented coroutine and AIO interfaces with generic wrapper functions
														
 
															+ * that fall back to implemented interfaces.
														
 
															+ */
														
 
															+void bdrv_setup_io_funcs(BlockDriver *bdrv);
														
 
															 int get_tmp_filename(char *filename, int size);
														
 
															 BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
														
@@ -590,7 +598,7 @@ void commit_active_start(BlockDriverState *bs, BlockDriverState *base,
 
															  */
														
 
															 void mirror_start(BlockDriverState *bs, BlockDriverState *target,
														
 
															                   const char *replaces,
														
 
															-                  int64_t speed, int64_t granularity, int64_t buf_size,
														
 
															+                  int64_t speed, uint32_t granularity, int64_t buf_size,
														
 
															                   MirrorSyncMode mode, BlockdevOnError on_source_error,
														
 
															                   BlockdevOnError on_target_error,
														
 
															                   BlockCompletionFunc *cb,
														
@@ -602,6 +610,7 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
 
															  * @target: Block device to write to.
														
 
															  * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
														
 
															  * @sync_mode: What parts of the disk image should be copied to the destination.
														
 
															+ * @sync_bitmap: The dirty bitmap if sync_mode is MIRROR_SYNC_MODE_DIRTY_BITMAP.
														
 
															  * @on_source_error: The action to take upon error reading from the source.
														
 
															  * @on_target_error: The action to take upon error writing to the target.
														
 
															  * @cb: Completion function for the job.
														
@@ -612,6 +621,7 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
 
															  */
														
 
															 void backup_start(BlockDriverState *bs, BlockDriverState *target,
														
 
															                   int64_t speed, MirrorSyncMode sync_mode,
														
 
															+                  BdrvDirtyBitmap *sync_bitmap,
														
 
															                   BlockdevOnError on_source_error,
														
 
															                   BlockdevOnError on_target_error,
														
 
															                   BlockCompletionFunc *cb, void *opaque,
														
@@ -624,4 +634,8 @@ bool blk_dev_is_tray_open(BlockBackend *blk);
 
															 bool blk_dev_is_medium_locked(BlockBackend *blk);
														
 
															 void blk_dev_resize_cb(BlockBackend *blk);
														
 
															+void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors);
														
 
															+void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
														
 
															+                      int nr_sectors);
														
 
															+
														
 
															 #endif /* BLOCK_INT_H */
														
--- a/include/block/blockjob.h
+++ b/include/block/blockjob.h
@@ -79,10 +79,16 @@ struct BlockJob {
 
															     bool cancelled;
														
 
															     /**
														
 
															-     * Set to true if the job is either paused, or will pause itself
														
 
															-     * as soon as possible (if busy == true).
														
 
															+     * Counter for pause request. If non-zero, the block job is either paused,
														
 
															+     * or if busy == true will pause itself as soon as possible.
														
 
															      */
														
 
															-    bool paused;
														
 
															+    int pause_count;
														
 
															+
														
 
															+    /**
														
 
															+     * Set to true if the job is paused by user.  Can be unpaused with the
														
 
															+     * block-job-resume QMP command.
														
 
															+     */
														
 
															+    bool user_paused;
														
 
															     /**
														
 
															      * Set to false by the job while it is in a quiescent state, where
														
@@ -225,10 +231,18 @@ void block_job_pause(BlockJob *job);
 
															  * block_job_resume:
														
 
															  * @job: The job to be resumed.
														
 
															  *
														
 
															- * Resume the specified job.
														
 
															+ * Resume the specified job.  Must be paired with a preceding block_job_pause.
														
 
															  */
														
 
															 void block_job_resume(BlockJob *job);
														
 
															+/**
														
 
															+ * block_job_enter:
														
 
															+ * @job: The job to enter.
														
 
															+ *
														
 
															+ * Continue the specified job by entering the coroutine.
														
 
															+ */
														
 
															+void block_job_enter(BlockJob *job);
														
 
															+
														
 
															 /**
														
 
															  * block_job_event_cancelled:
														
 
															  * @job: The job whose information is requested.
														
--- a/include/block/qapi.h
+++ b/include/block/qapi.h
@@ -29,7 +29,7 @@
 
															 #include "block/block.h"
														
 
															 #include "block/snapshot.h"
														
 
															-BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs);
														
 
															+BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs, Error **errp);
														
 
															 int bdrv_query_snapshot_info_list(BlockDriverState *bs,
														
 
															                                   SnapshotInfoList **p_list,
														
 
															                                   Error **errp);
														
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -568,7 +568,7 @@ static inline void
 
															 pci_set_byte_by_mask(uint8_t *config, uint8_t mask, uint8_t reg)
														
 
															 {
														
 
															     uint8_t val = pci_get_byte(config);
														
 
															-    uint8_t rval = reg << (ffs(mask) - 1);
														
 
															+    uint8_t rval = reg << ctz32(mask);
														
 
															     pci_set_byte(config, (~mask & val) | (mask & rval));
														
 
															 }
														
@@ -576,14 +576,14 @@ static inline uint8_t
 
															 pci_get_byte_by_mask(uint8_t *config, uint8_t mask)
														
 
															 {
														
 
															     uint8_t val = pci_get_byte(config);
														
 
															-    return (val & mask) >> (ffs(mask) - 1);
														
 
															+    return (val & mask) >> ctz32(mask);
														
 
															 }
														
 
															 static inline void
														
 
															 pci_set_word_by_mask(uint8_t *config, uint16_t mask, uint16_t reg)
														
 
															 {
														
 
															     uint16_t val = pci_get_word(config);
														
 
															-    uint16_t rval = reg << (ffs(mask) - 1);
														
 
															+    uint16_t rval = reg << ctz32(mask);
														
 
															     pci_set_word(config, (~mask & val) | (mask & rval));
														
 
															 }
														
@@ -591,14 +591,14 @@ static inline uint16_t
 
															 pci_get_word_by_mask(uint8_t *config, uint16_t mask)
														
 
															 {
														
 
															     uint16_t val = pci_get_word(config);
														
 
															-    return (val & mask) >> (ffs(mask) - 1);
														
 
															+    return (val & mask) >> ctz32(mask);
														
 
															 }
														
 
															 static inline void
														
 
															 pci_set_long_by_mask(uint8_t *config, uint32_t mask, uint32_t reg)
														
 
															 {
														
 
															     uint32_t val = pci_get_long(config);
														
 
															-    uint32_t rval = reg << (ffs(mask) - 1);
														
 
															+    uint32_t rval = reg << ctz32(mask);
														
 
															     pci_set_long(config, (~mask & val) | (mask & rval));
														
 
															 }
														
@@ -606,14 +606,14 @@ static inline uint32_t
 
															 pci_get_long_by_mask(uint8_t *config, uint32_t mask)
														
 
															 {
														
 
															     uint32_t val = pci_get_long(config);
														
 
															-    return (val & mask) >> (ffs(mask) - 1);
														
 
															+    return (val & mask) >> ctz32(mask);
														
 
															 }
														
 
															 static inline void
														
 
															 pci_set_quad_by_mask(uint8_t *config, uint64_t mask, uint64_t reg)
														
 
															 {
														
 
															     uint64_t val = pci_get_quad(config);
														
 
															-    uint64_t rval = reg << (ffs(mask) - 1);
														
 
															+    uint64_t rval = reg << ctz32(mask);
														
 
															     pci_set_quad(config, (~mask & val) | (mask & rval));
														
 
															 }
														
@@ -621,7 +621,7 @@ static inline uint64_t
 
															 pci_get_quad_by_mask(uint8_t *config, uint64_t mask)
														
 
															 {
														
 
															     uint64_t val = pci_get_quad(config);
														
 
															-    return (val & mask) >> (ffs(mask) - 1);
														
 
															+    return (val & mask) >> ctz32(mask);
														
 
															 }
														
 
															 PCIDevice *pci_create_multifunction(PCIBus *bus, int devfn, bool multifunction,
														
--- a/include/hw/pci/pcie_regs.h
+++ b/include/hw/pci/pcie_regs.h
@@ -27,34 +27,34 @@
 
															 /* PCI_EXP_FLAGS */
														
 
															 #define PCI_EXP_FLAGS_VER2              2 /* for now, supports only ver. 2 */
														
 
															-#define PCI_EXP_FLAGS_IRQ_SHIFT         (ffs(PCI_EXP_FLAGS_IRQ) - 1)
														
 
															-#define PCI_EXP_FLAGS_TYPE_SHIFT        (ffs(PCI_EXP_FLAGS_TYPE) - 1)
														
 
															+#define PCI_EXP_FLAGS_IRQ_SHIFT         ctz32(PCI_EXP_FLAGS_IRQ)
														
 
															+#define PCI_EXP_FLAGS_TYPE_SHIFT        ctz32(PCI_EXP_FLAGS_TYPE)
														
 
															 /* PCI_EXP_LINK{CAP, STA} */
														
 
															 /* link speed */
														
 
															 #define PCI_EXP_LNK_LS_25               1
														
 
															-#define PCI_EXP_LNK_MLW_SHIFT           (ffs(PCI_EXP_LNKCAP_MLW) - 1)
														
 
															+#define PCI_EXP_LNK_MLW_SHIFT           ctz32(PCI_EXP_LNKCAP_MLW)
														
 
															 #define PCI_EXP_LNK_MLW_1               (1 << PCI_EXP_LNK_MLW_SHIFT)
														
 
															 /* PCI_EXP_LINKCAP */
														
 
															-#define PCI_EXP_LNKCAP_ASPMS_SHIFT      (ffs(PCI_EXP_LNKCAP_ASPMS) - 1)
														
 
															+#define PCI_EXP_LNKCAP_ASPMS_SHIFT      ctz32(PCI_EXP_LNKCAP_ASPMS)
														
 
															 #define PCI_EXP_LNKCAP_ASPMS_0S         (1 << PCI_EXP_LNKCAP_ASPMS_SHIFT)
														
 
															-#define PCI_EXP_LNKCAP_PN_SHIFT         (ffs(PCI_EXP_LNKCAP_PN) - 1)
														
 
															+#define PCI_EXP_LNKCAP_PN_SHIFT         ctz32(PCI_EXP_LNKCAP_PN)
														
 
															-#define PCI_EXP_SLTCAP_PSN_SHIFT        (ffs(PCI_EXP_SLTCAP_PSN) - 1)
														
 
															+#define PCI_EXP_SLTCAP_PSN_SHIFT        ctz32(PCI_EXP_SLTCAP_PSN)
														
 
															 #define PCI_EXP_SLTCTL_IND_RESERVED     0x0
														
 
															 #define PCI_EXP_SLTCTL_IND_ON           0x1
														
 
															 #define PCI_EXP_SLTCTL_IND_BLINK        0x2
														
 
															 #define PCI_EXP_SLTCTL_IND_OFF          0x3
														
 
															-#define PCI_EXP_SLTCTL_AIC_SHIFT        (ffs(PCI_EXP_SLTCTL_AIC) - 1)
														
 
															+#define PCI_EXP_SLTCTL_AIC_SHIFT        ctz32(PCI_EXP_SLTCTL_AIC)
														
 
															 #define PCI_EXP_SLTCTL_AIC_OFF                          \
														
 
															     (PCI_EXP_SLTCTL_IND_OFF << PCI_EXP_SLTCTL_AIC_SHIFT)
														
 
															-#define PCI_EXP_SLTCTL_PIC_SHIFT        (ffs(PCI_EXP_SLTCTL_PIC) - 1)
														
 
															+#define PCI_EXP_SLTCTL_PIC_SHIFT        ctz32(PCI_EXP_SLTCTL_PIC)
														
 
															 #define PCI_EXP_SLTCTL_PIC_OFF                          \
														
 
															     (PCI_EXP_SLTCTL_IND_OFF << PCI_EXP_SLTCTL_PIC_SHIFT)
														
 
															 #define PCI_EXP_SLTCTL_PIC_ON                          \
														
@@ -109,7 +109,7 @@
 
															 #define PCI_ERR_ROOT_IRQ_MAX            32
														
 
															 #define PCI_ERR_ROOT_IRQ                0xf8000000
														
 
															-#define PCI_ERR_ROOT_IRQ_SHIFT          (ffs(PCI_ERR_ROOT_IRQ) - 1)
														
 
															+#define PCI_ERR_ROOT_IRQ_SHIFT          ctz32(PCI_ERR_ROOT_IRQ)
														
 
															 #define PCI_ERR_ROOT_STATUS_REPORT_MASK (PCI_ERR_ROOT_COR_RCV |         \
														
 
															                                          PCI_ERR_ROOT_MULTI_COR_RCV |   \
														
 
															                                          PCI_ERR_ROOT_UNCOR_RCV |       \
														
--- a/include/qapi/qmp/qerror.h
+++ b/include/qapi/qmp/qerror.h
@@ -37,9 +37,6 @@ void qerror_report_err(Error *err);
 
															 #define QERR_BASE_NOT_FOUND \
														
 
															     ERROR_CLASS_GENERIC_ERROR, "Base '%s' not found"
														
 
															-#define QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED \
														
 
															-    ERROR_CLASS_GENERIC_ERROR, "Block format '%s' used by device '%s' does not support feature '%s'"
														
 
															-
														
 
															 #define QERR_BLOCK_JOB_NOT_READY \
														
 
															     ERROR_CLASS_GENERIC_ERROR, "The active block job for device '%s' cannot be completed"
														
@@ -58,9 +55,6 @@ void qerror_report_err(Error *err);
 
															 #define QERR_DEVICE_IN_USE \
														
 
															     ERROR_CLASS_GENERIC_ERROR, "Device '%s' is in use"
														
 
															-#define QERR_DEVICE_IS_READ_ONLY \
														
 
															-    ERROR_CLASS_GENERIC_ERROR, "Device '%s' is read only"
														
 
															-
														
 
															 #define QERR_DEVICE_NO_HOTPLUG \
														
 
															     ERROR_CLASS_GENERIC_ERROR, "Device '%s' does not support hotplugging"
														
--- a/include/qemu/hbitmap.h
+++ b/include/qemu/hbitmap.h
@@ -64,6 +64,29 @@ struct HBitmapIter {
 
															  */
														
 
															 HBitmap *hbitmap_alloc(uint64_t size, int granularity);
														
 
															+/**
														
 
															+ * hbitmap_truncate:
														
 
															+ * @hb: The bitmap to change the size of.
														
 
															+ * @size: The number of elements to change the bitmap to accommodate.
														
 
															+ *
														
 
															+ * truncate or grow an existing bitmap to accommodate a new number of elements.
														
 
															+ * This may invalidate existing HBitmapIterators.
														
 
															+ */
														
 
															+void hbitmap_truncate(HBitmap *hb, uint64_t size);
														
 
															+
														
 
															+/**
														
 
															+ * hbitmap_merge:
														
 
															+ * @a: The bitmap to store the result in.
														
 
															+ * @b: The bitmap to merge into @a.
														
 
															+ * @return true if the merge was successful,
														
 
															+ *         false if it was not attempted.
														
 
															+ *
														
 
															+ * Merge two bitmaps together.
														
 
															+ * A := A (BITOR) B.
														
 
															+ * B is left unmodified.
														
 
															+ */
														
 
															+bool hbitmap_merge(HBitmap *a, const HBitmap *b);
														
 
															+
														
 
															 /**
														
 
															  * hbitmap_empty:
														
 
															  * @hb: HBitmap to operate on.
														
--- a/include/standard-headers/linux/virtio_blk.h
+++ b/include/standard-headers/linux/virtio_blk.h
@@ -58,7 +58,7 @@ struct virtio_blk_config {
 
															 	uint32_t size_max;
														
 
															 	/* The maximum number of segments (if VIRTIO_BLK_F_SEG_MAX) */
														
 
															 	uint32_t seg_max;
														
 
															-	/* geometry the device (if VIRTIO_BLK_F_GEOMETRY) */
														
 
															+	/* geometry of the device (if VIRTIO_BLK_F_GEOMETRY) */
														
 
															 	struct virtio_blk_geometry {
														
 
															 		uint16_t cylinders;
														
 
															 		uint8_t heads;
														
@@ -117,7 +117,11 @@ struct virtio_blk_config {
 
															 #define VIRTIO_BLK_T_BARRIER	0x80000000
														
 
															 #endif /* !VIRTIO_BLK_NO_LEGACY */
														
 
															-/* This is the first element of the read scatter-gather list. */
														
 
															+/*
														
 
															+ * This comes first in the read scatter-gather list.
														
 
															+ * For legacy virtio, if VIRTIO_F_ANY_LAYOUT is not negotiated,
														
 
															+ * this is the first element of the read scatter-gather list.
														
 
															+ */
														
 
															 struct virtio_blk_outhdr {
														
 
															 	/* VIRTIO_BLK_T* */
														
 
															 	__virtio32 type;
														
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -87,6 +87,8 @@ int blk_read_unthrottled(BlockBackend *blk, int64_t sector_num, uint8_t *buf,
 
															                          int nb_sectors);
														
 
															 int blk_write(BlockBackend *blk, int64_t sector_num, const uint8_t *buf,
														
 
															               int nb_sectors);
														
 
															+int blk_write_zeroes(BlockBackend *blk, int64_t sector_num,
														
 
															+                     int nb_sectors, BdrvRequestFlags flags);
														
 
															 BlockAIOCB *blk_aio_write_zeroes(BlockBackend *blk, int64_t sector_num,
														
 
															                                  int nb_sectors, BdrvRequestFlags flags,
														
 
															                                  BlockCompletionFunc *cb, void *opaque);
														
--- a/include/sysemu/os-win32.h
+++ b/include/sysemu/os-win32.h
@@ -72,9 +72,6 @@
 
															 #define sigsetjmp(env, savemask) setjmp(env)
														
 
															 #define siglongjmp(env, val) longjmp(env, val)
														
 
															-/* Declaration of ffs() is missing in MinGW's strings.h. */
														
 
															-int ffs(int i);
														
 
															-
														
 
															 /* Missing POSIX functions. Don't use MinGW-w64 macros. */
														
 
															 #undef gmtime_r
														
 
															 struct tm *gmtime_r(const time_t *timep, struct tm *result);
														
--- a/iothread.c
+++ b/iothread.c
@@ -31,21 +31,14 @@ typedef ObjectClass IOThreadClass;
 
															 static void *iothread_run(void *opaque)
														
 
															 {
														
 
															     IOThread *iothread = opaque;
														
 
															-    bool blocking;
														
 
															     qemu_mutex_lock(&iothread->init_done_lock);
														
 
															     iothread->thread_id = qemu_get_thread_id();
														
 
															     qemu_cond_signal(&iothread->init_done_cond);
														
 
															     qemu_mutex_unlock(&iothread->init_done_lock);
														
 
															-    while (!iothread->stopping) {
														
 
															-        aio_context_acquire(iothread->ctx);
														
 
															-        blocking = true;
														
 
															-        while (!iothread->stopping && aio_poll(iothread->ctx, blocking)) {
														
 
															-            /* Progress was made, keep going */
														
 
															-            blocking = false;
														
 
															-        }
														
 
															-        aio_context_release(iothread->ctx);
														
 
															+    while (!atomic_read(&iothread->stopping)) {
														
 
															+        aio_poll(iothread->ctx, true);
														
 
															     }
														
 
															     return NULL;
														
 
															 }
														
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -1141,18 +1141,18 @@ static int kvm_irqchip_get_virq(KVMState *s)
 
															 {
														
 
															     uint32_t *word = s->used_gsi_bitmap;
														
 
															     int max_words = ALIGN(s->gsi_count, 32) / 32;
														
 
															-    int i, bit;
														
 
															+    int i, zeroes;
														
 
															     bool retry = true;
														
 
															 again:
														
 
															     /* Return the lowest unused GSI in the bitmap */
														
 
															     for (i = 0; i < max_words; i++) {
														
 
															-        bit = ffs(~word[i]);
														
 
															-        if (!bit) {
														
 
															+        zeroes = ctz32(~word[i]);
														
 
															+        if (zeroes == 32) {
														
 
															             continue;
														
 
															         }
														
 
															-        return bit - 1 + i * 32;
														
 
															+        return zeroes + i * 32;
														
 
															     }
														
 
															     if (!s->direct_msi && retry) {
														
 
															         retry = false;
														
--- a/migration/block.c
+++ b/migration/block.c
@@ -304,7 +304,7 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds)
 
															     blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov,
														
 
															                                 nr_sectors, blk_mig_read_cb, blk);
														
 
															-    bdrv_reset_dirty_bitmap(bs, bmds->dirty_bitmap, cur_sector, nr_sectors);
														
 
															+    bdrv_reset_dirty_bitmap(bmds->dirty_bitmap, cur_sector, nr_sectors);
														
 
															     qemu_mutex_unlock_iothread();
														
 
															     bmds->cur_sector = cur_sector + nr_sectors;
														
@@ -320,7 +320,7 @@ static int set_dirty_tracking(void)
 
															     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
														
 
															         bmds->dirty_bitmap = bdrv_create_dirty_bitmap(bmds->bs, BLOCK_SIZE,
														
 
															-                                                      NULL);
														
 
															+                                                      NULL, NULL);
														
 
															         if (!bmds->dirty_bitmap) {
														
 
															             ret = -errno;
														
 
															             goto fail;
														
@@ -497,8 +497,7 @@ static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds,
 
															                 g_free(blk);
														
 
															             }
														
 
															-            bdrv_reset_dirty_bitmap(bmds->bs, bmds->dirty_bitmap, sector,
														
 
															-                                    nr_sectors);
														
 
															+            bdrv_reset_dirty_bitmap(bmds->dirty_bitmap, sector, nr_sectors);
														
 
															             break;
														
 
															         }
														
 
															         sector += BDRV_SECTORS_PER_DIRTY_CHUNK;
														
@@ -584,7 +583,7 @@ static int64_t get_remaining_dirty(void)
 
															     int64_t dirty = 0;
														
 
															     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
														
 
															-        dirty += bdrv_get_dirty_count(bmds->bs, bmds->dirty_bitmap);
														
 
															+        dirty += bdrv_get_dirty_count(bmds->dirty_bitmap);
														
 
															     }
														
 
															     return dirty << BDRV_SECTOR_BITS;
														
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -330,14 +330,19 @@
 
															 #
														
 
															 # Block dirty bitmap information.
														
 
															 #
														
 
															+# @name: #optional the name of the dirty bitmap (Since 2.4)
														
 
															+#
														
 
															 # @count: number of dirty bytes according to the dirty bitmap
														
 
															 #
														
 
															 # @granularity: granularity of the dirty bitmap in bytes (since 1.4)
														
 
															 #
														
 
															+# @frozen: whether the dirty bitmap is frozen (Since 2.4)
														
 
															+#
														
 
															 # Since: 1.3
														
 
															 ##
														
 
															 { 'type': 'BlockDirtyInfo',
														
 
															-  'data': {'count': 'int', 'granularity': 'int'} }
														
 
															+  'data': {'*name': 'str', 'count': 'int', 'granularity': 'uint32',
														
 
															+           'frozen': 'bool'} }
														
 
															 ##
														
 
															 # @BlockInfo:
														
@@ -510,10 +515,12 @@
 
															 #
														
 
															 # @none: only copy data written from now on
														
 
															 #
														
 
															+# @dirty-bitmap: only copy data described by the dirty bitmap. Since: 2.4
														
 
															+#
														
 
															 # Since: 1.3
														
 
															 ##
														
 
															 { 'enum': 'MirrorSyncMode',
														
 
															-  'data': ['top', 'full', 'none'] }
														
 
															+  'data': ['top', 'full', 'none', 'dirty-bitmap'] }
														
 
															 ##
														
 
															 # @BlockJobType:
														
@@ -688,14 +695,18 @@
 
															 #          probe if @mode is 'existing', else the format of the source
														
 
															 #
														
 
															 # @sync: what parts of the disk image should be copied to the destination
														
 
															-#        (all the disk, only the sectors allocated in the topmost image, or
														
 
															-#        only new I/O).
														
 
															+#        (all the disk, only the sectors allocated in the topmost image, from a
														
 
															+#        dirty bitmap, or only new I/O).
														
 
															 #
														
 
															 # @mode: #optional whether and how QEMU should create a new image, default is
														
 
															 #        'absolute-paths'.
														
 
															 #
														
 
															 # @speed: #optional the maximum speed, in bytes per second
														
 
															 #
														
 
															+# @bitmap: #optional the name of dirty bitmap if sync is "dirty-bitmap".
														
 
															+#          Must be present if sync is "dirty-bitmap", must NOT be present
														
 
															+#          otherwise. (Since 2.4)
														
 
															+#
														
 
															 # @on-source-error: #optional the action to take on an error on the source,
														
 
															 #                   default 'report'.  'stop' and 'enospc' can only be used
														
 
															 #                   if the block device supports io-status (see BlockInfo).
														
@@ -713,7 +724,7 @@
 
															 { 'type': 'DriveBackup',
														
 
															   'data': { 'device': 'str', 'target': 'str', '*format': 'str',
														
 
															             'sync': 'MirrorSyncMode', '*mode': 'NewImageMode',
														
 
															-            '*speed': 'int',
														
 
															+            '*speed': 'int', '*bitmap': 'str',
														
 
															             '*on-source-error': 'BlockdevOnError',
														
 
															             '*on-target-error': 'BlockdevOnError' } }
														
@@ -957,6 +968,76 @@
 
															             '*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
														
 
															             '*on-target-error': 'BlockdevOnError' } }
														
 
															+##
														
 
															+# @BlockDirtyBitmap
														
 
															+#
														
 
															+# @node: name of device/node which the bitmap is tracking
														
 
															+#
														
 
															+# @name: name of the dirty bitmap
														
 
															+#
														
 
															+# Since 2.4
														
 
															+##
														
 
															+{ 'type': 'BlockDirtyBitmap',
														
 
															+  'data': { 'node': 'str', 'name': 'str' } }
														
 
															+
														
 
															+##
														
 
															+# @BlockDirtyBitmapAdd
														
 
															+#
														
 
															+# @node: name of device/node which the bitmap is tracking
														
 
															+#
														
 
															+# @name: name of the dirty bitmap
														
 
															+#
														
 
															+# @granularity: #optional the bitmap granularity, default is 64k for
														
 
															+#               block-dirty-bitmap-add
														
 
															+#
														
 
															+# Since 2.4
														
 
															+##
														
 
															+{ 'type': 'BlockDirtyBitmapAdd',
														
 
															+  'data': { 'node': 'str', 'name': 'str', '*granularity': 'uint32' } }
														
 
															+
														
 
															+##
														
 
															+# @block-dirty-bitmap-add
														
 
															+#
														
 
															+# Create a dirty bitmap with a name on the node
														
 
															+#
														
 
															+# Returns: nothing on success
														
 
															+#          If @node is not a valid block device or node, DeviceNotFound
														
 
															+#          If @name is already taken, GenericError with an explanation
														
 
															+#
														
 
															+# Since 2.4
														
 
															+##
														
 
															+{ 'command': 'block-dirty-bitmap-add',
														
 
															+  'data': 'BlockDirtyBitmapAdd' }
														
 
															+
														
 
															+##
														
 
															+# @block-dirty-bitmap-remove
														
 
															+#
														
 
															+# Remove a dirty bitmap on the node
														
 
															+#
														
 
															+# Returns: nothing on success
														
 
															+#          If @node is not a valid block device or node, DeviceNotFound
														
 
															+#          If @name is not found, GenericError with an explanation
														
 
															+#          if @name is frozen by an operation, GenericError
														
 
															+#
														
 
															+# Since 2.4
														
 
															+##
														
 
															+{ 'command': 'block-dirty-bitmap-remove',
														
 
															+  'data': 'BlockDirtyBitmap' }
														
 
															+
														
 
															+##
														
 
															+# @block-dirty-bitmap-clear
														
 
															+#
														
 
															+# Clear (reset) a dirty bitmap on the device
														
 
															+#
														
 
															+# Returns: nothing on success
														
 
															+#          If @node is not a valid block device, DeviceNotFound
														
 
															+#          If @name is not found, GenericError with an explanation
														
 
															+#
														
 
															+# Since 2.4
														
 
															+##
														
 
															+{ 'command': 'block-dirty-bitmap-clear',
														
 
															+  'data': 'BlockDirtyBitmap' }
														
 
															+
														
 
															 ##
														
 
															 # @block_set_io_throttle:
														
 
															 #
														
@@ -1310,11 +1391,14 @@
 
															 # Driver specific block device options for the null backend.
														
 
															 #
														
 
															 # @size:    #optional size of the device in bytes.
														
 
															+# @latency-ns: #optional emulated latency (in nanoseconds) in processing
														
 
															+#              requests. Default to zero which completes requests immediately.
														
 
															+#              (Since 2.4)
														
 
															 #
														
 
															 # Since: 2.2
														
 
															 ##
														
 
															 { 'type': 'BlockdevOptionsNull',
														
 
															-  'data': { '*size': 'int' } }
														
 
															+  'data': { '*size': 'int', '*latency-ns': 'uint64' } }
														
 
															 ##
														
 
															 # @BlockdevOptionsVVFAT
														
@@ -1754,7 +1838,11 @@
 
															 #
														
 
															 # Emitted when a corruption has been detected in a disk image
														
 
															 #
														
 
															-# @device: device name
														
 
															+# @device: device name. This is always present for compatibility
														
 
															+#          reasons, but it can be empty ("") if the image does not
														
 
															+#          have a device name associated.
														
 
															+#
														
 
															+# @node-name: #optional node name (Since: 2.4)
														
 
															 #
														
 
															 # @msg: informative message for human consumption, such as the kind of
														
 
															 #       corruption being detected. It should not be parsed by machine as it is
														
@@ -1773,11 +1861,12 @@
 
															 # Since: 1.7
														
 
															 ##
														
 
															 { 'event': 'BLOCK_IMAGE_CORRUPTED',
														
 
															-  'data': { 'device' : 'str',
														
 
															-            'msg'    : 'str',
														
 
															-            '*offset': 'int',
														
 
															-            '*size'  : 'int',
														
 
															-            'fatal'  : 'bool' } }
														
 
															+  'data': { 'device'     : 'str',
														
 
															+            '*node-name' : 'str',
														
 
															+            'msg'        : 'str',
														
 
															+            '*offset'    : 'int',
														
 
															+            '*size'      : 'int',
														
 
															+            'fatal'      : 'bool' } }
														
 
															 ##
														
 
															 # @BLOCK_IO_ERROR
														
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -1305,20 +1305,312 @@ out3:
 
															     return ret;
														
 
															 }
														
 
															+enum ImgConvertBlockStatus {
														
 
															+    BLK_DATA,
														
 
															+    BLK_ZERO,
														
 
															+    BLK_BACKING_FILE,
														
 
															+};
														
 
															+
														
 
															+typedef struct ImgConvertState {
														
 
															+    BlockBackend **src;
														
 
															+    int64_t *src_sectors;
														
 
															+    int src_cur, src_num;
														
 
															+    int64_t src_cur_offset;
														
 
															+    int64_t total_sectors;
														
 
															+    int64_t allocated_sectors;
														
 
															+    enum ImgConvertBlockStatus status;
														
 
															+    int64_t sector_next_status;
														
 
															+    BlockBackend *target;
														
 
															+    bool has_zero_init;
														
 
															+    bool compressed;
														
 
															+    bool target_has_backing;
														
 
															+    int min_sparse;
														
 
															+    size_t cluster_sectors;
														
 
															+    size_t buf_sectors;
														
 
															+} ImgConvertState;
														
 
															+
														
 
															+static void convert_select_part(ImgConvertState *s, int64_t sector_num)
														
 
															+{
														
 
															+    assert(sector_num >= s->src_cur_offset);
														
 
															+    while (sector_num - s->src_cur_offset >= s->src_sectors[s->src_cur]) {
														
 
															+        s->src_cur_offset += s->src_sectors[s->src_cur];
														
 
															+        s->src_cur++;
														
 
															+        assert(s->src_cur < s->src_num);
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															+static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num)
														
 
															+{
														
 
															+    int64_t ret;
														
 
															+    int n;
														
 
															+
														
 
															+    convert_select_part(s, sector_num);
														
 
															+
														
 
															+    assert(s->total_sectors > sector_num);
														
 
															+    n = MIN(s->total_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS);
														
 
															+
														
 
															+    if (s->sector_next_status <= sector_num) {
														
 
															+        ret = bdrv_get_block_status(blk_bs(s->src[s->src_cur]),
														
 
															+                                    sector_num - s->src_cur_offset,
														
 
															+                                    n, &n);
														
 
															+        if (ret < 0) {
														
 
															+            return ret;
														
 
															+        }
														
 
															+
														
 
															+        if (ret & BDRV_BLOCK_ZERO) {
														
 
															+            s->status = BLK_ZERO;
														
 
															+        } else if (ret & BDRV_BLOCK_DATA) {
														
 
															+            s->status = BLK_DATA;
														
 
															+        } else if (!s->target_has_backing) {
														
 
															+            /* Without a target backing file we must copy over the contents of
														
 
															+             * the backing file as well. */
														
 
															+            /* TODO Check block status of the backing file chain to avoid
														
 
															+             * needlessly reading zeroes and limiting the iteration to the
														
 
															+             * buffer size */
														
 
															+            s->status = BLK_DATA;
														
 
															+        } else {
														
 
															+            s->status = BLK_BACKING_FILE;
														
 
															+        }
														
 
															+
														
 
															+        s->sector_next_status = sector_num + n;
														
 
															+    }
														
 
															+
														
 
															+    n = MIN(n, s->sector_next_status - sector_num);
														
 
															+    if (s->status == BLK_DATA) {
														
 
															+        n = MIN(n, s->buf_sectors);
														
 
															+    }
														
 
															+
														
 
															+    /* We need to write complete clusters for compressed images, so if an
														
 
															+     * unallocated area is shorter than that, we must consider the whole
														
 
															+     * cluster allocated. */
														
 
															+    if (s->compressed) {
														
 
															+        if (n < s->cluster_sectors) {
														
 
															+            n = MIN(s->cluster_sectors, s->total_sectors - sector_num);
														
 
															+            s->status = BLK_DATA;
														
 
															+        } else {
														
 
															+            n = QEMU_ALIGN_DOWN(n, s->cluster_sectors);
														
 
															+        }
														
 
															+    }
														
 
															+
														
 
															+    return n;
														
 
															+}
														
 
															+
														
 
															+static int convert_read(ImgConvertState *s, int64_t sector_num, int nb_sectors,
														
 
															+                        uint8_t *buf)
														
 
															+{
														
 
															+    int n;
														
 
															+    int ret;
														
 
															+
														
 
															+    if (s->status == BLK_ZERO || s->status == BLK_BACKING_FILE) {
														
 
															+        return 0;
														
 
															+    }
														
 
															+
														
 
															+    assert(nb_sectors <= s->buf_sectors);
														
 
															+    while (nb_sectors > 0) {
														
 
															+        BlockBackend *blk;
														
 
															+        int64_t bs_sectors;
														
 
															+
														
 
															+        /* In the case of compression with multiple source files, we can get a
														
 
															+         * nb_sectors that spreads into the next part. So we must be able to
														
 
															+         * read across multiple BDSes for one convert_read() call. */
														
 
															+        convert_select_part(s, sector_num);
														
 
															+        blk = s->src[s->src_cur];
														
 
															+        bs_sectors = s->src_sectors[s->src_cur];
														
 
															+
														
 
															+        n = MIN(nb_sectors, bs_sectors - (sector_num - s->src_cur_offset));
														
 
															+        ret = blk_read(blk, sector_num - s->src_cur_offset, buf, n);
														
 
															+        if (ret < 0) {
														
 
															+            return ret;
														
 
															+        }
														
 
															+
														
 
															+        sector_num += n;
														
 
															+        nb_sectors -= n;
														
 
															+        buf += n * BDRV_SECTOR_SIZE;
														
 
															+    }
														
 
															+
														
 
															+    return 0;
														
 
															+}
														
 
															+
														
 
															+static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors,
														
 
															+                         const uint8_t *buf)
														
 
															+{
														
 
															+    int ret;
														
 
															+
														
 
															+    while (nb_sectors > 0) {
														
 
															+        int n = nb_sectors;
														
 
															+
														
 
															+        switch (s->status) {
														
 
															+        case BLK_BACKING_FILE:
														
 
															+            /* If we have a backing file, leave clusters unallocated that are
														
 
															+             * unallocated in the source image, so that the backing file is
														
 
															+             * visible at the respective offset. */
														
 
															+            assert(s->target_has_backing);
														
 
															+            break;
														
 
															+
														
 
															+        case BLK_DATA:
														
 
															+            /* We must always write compressed clusters as a whole, so don't
														
 
															+             * try to find zeroed parts in the buffer. We can only save the
														
 
															+             * write if the buffer is completely zeroed and we're allowed to
														
 
															+             * keep the target sparse. */
														
 
															+            if (s->compressed) {
														
 
															+                if (s->has_zero_init && s->min_sparse &&
														
 
															+                    buffer_is_zero(buf, n * BDRV_SECTOR_SIZE))
														
 
															+                {
														
 
															+                    assert(!s->target_has_backing);
														
 
															+                    break;
														
 
															+                }
														
 
															+
														
 
															+                ret = blk_write_compressed(s->target, sector_num, buf, n);
														
 
															+                if (ret < 0) {
														
 
															+                    return ret;
														
 
															+                }
														
 
															+                break;
														
 
															+            }
														
 
															+
														
 
															+            /* If there is real non-zero data or we're told to keep the target
														
 
															+             * fully allocated (-S 0), we must write it. Otherwise we can treat
														
 
															+             * it as zero sectors. */
														
 
															+            if (!s->min_sparse ||
														
 
															+                is_allocated_sectors_min(buf, n, &n, s->min_sparse))
														
 
															+            {
														
 
															+                ret = blk_write(s->target, sector_num, buf, n);
														
 
															+                if (ret < 0) {
														
 
															+                    return ret;
														
 
															+                }
														
 
															+                break;
														
 
															+            }
														
 
															+            /* fall-through */
														
 
															+
														
 
															+        case BLK_ZERO:
														
 
															+            if (s->has_zero_init) {
														
 
															+                break;
														
 
															+            }
														
 
															+            ret = blk_write_zeroes(s->target, sector_num, n, 0);
														
 
															+            if (ret < 0) {
														
 
															+                return ret;
														
 
															+            }
														
 
															+            break;
														
 
															+        }
														
 
															+
														
 
															+        sector_num += n;
														
 
															+        nb_sectors -= n;
														
 
															+        buf += n * BDRV_SECTOR_SIZE;
														
 
															+    }
														
 
															+
														
 
															+    return 0;
														
 
															+}
														
 
															+
														
 
															+static int convert_do_copy(ImgConvertState *s)
														
 
															+{
														
 
															+    uint8_t *buf = NULL;
														
 
															+    int64_t sector_num, allocated_done;
														
 
															+    int ret;
														
 
															+    int n;
														
 
															+
														
 
															+    /* Check whether we have zero initialisation or can get it efficiently */
														
 
															+    s->has_zero_init = s->min_sparse && !s->target_has_backing
														
 
															+                     ? bdrv_has_zero_init(blk_bs(s->target))
														
 
															+                     : false;
														
 
															+
														
 
															+    if (!s->has_zero_init && !s->target_has_backing &&
														
 
															+        bdrv_can_write_zeroes_with_unmap(blk_bs(s->target)))
														
 
															+    {
														
 
															+        ret = bdrv_make_zero(blk_bs(s->target), BDRV_REQ_MAY_UNMAP);
														
 
															+        if (ret == 0) {
														
 
															+            s->has_zero_init = true;
														
 
															+        }
														
 
															+    }
														
 
															+
														
 
															+    /* Allocate buffer for copied data. For compressed images, only one cluster
														
 
															+     * can be copied at a time. */
														
 
															+    if (s->compressed) {
														
 
															+        if (s->cluster_sectors <= 0 || s->cluster_sectors > s->buf_sectors) {
														
 
															+            error_report("invalid cluster size");
														
 
															+            ret = -EINVAL;
														
 
															+            goto fail;
														
 
															+        }
														
 
															+        s->buf_sectors = s->cluster_sectors;
														
 
															+    }
														
 
															+    buf = blk_blockalign(s->target, s->buf_sectors * BDRV_SECTOR_SIZE);
														
 
															+
														
 
															+    /* Calculate allocated sectors for progress */
														
 
															+    s->allocated_sectors = 0;
														
 
															+    sector_num = 0;
														
 
															+    while (sector_num < s->total_sectors) {
														
 
															+        n = convert_iteration_sectors(s, sector_num);
														
 
															+        if (n < 0) {
														
 
															+            ret = n;
														
 
															+            goto fail;
														
 
															+        }
														
 
															+        if (s->status == BLK_DATA) {
														
 
															+            s->allocated_sectors += n;
														
 
															+        }
														
 
															+        sector_num += n;
														
 
															+    }
														
 
															+
														
 
															+    /* Do the copy */
														
 
															+    s->src_cur = 0;
														
 
															+    s->src_cur_offset = 0;
														
 
															+    s->sector_next_status = 0;
														
 
															+
														
 
															+    sector_num = 0;
														
 
															+    allocated_done = 0;
														
 
															+
														
 
															+    while (sector_num < s->total_sectors) {
														
 
															+        n = convert_iteration_sectors(s, sector_num);
														
 
															+        if (n < 0) {
														
 
															+            ret = n;
														
 
															+            goto fail;
														
 
															+        }
														
 
															+        if (s->status == BLK_DATA) {
														
 
															+            allocated_done += n;
														
 
															+            qemu_progress_print(100.0 * allocated_done / s->allocated_sectors,
														
 
															+                                0);
														
 
															+        }
														
 
															+
														
 
															+        ret = convert_read(s, sector_num, n, buf);
														
 
															+        if (ret < 0) {
														
 
															+            error_report("error while reading sector %" PRId64
														
 
															+                         ": %s", sector_num, strerror(-ret));
														
 
															+            goto fail;
														
 
															+        }
														
 
															+
														
 
															+        ret = convert_write(s, sector_num, n, buf);
														
 
															+        if (ret < 0) {
														
 
															+            error_report("error while writing sector %" PRId64
														
 
															+                         ": %s", sector_num, strerror(-ret));
														
 
															+            goto fail;
														
 
															+        }
														
 
															+
														
 
															+        sector_num += n;
														
 
															+    }
														
 
															+
														
 
															+    if (s->compressed) {
														
 
															+        /* signal EOF to align */
														
 
															+        ret = blk_write_compressed(s->target, 0, NULL, 0);
														
 
															+        if (ret < 0) {
														
 
															+            goto fail;
														
 
															+        }
														
 
															+    }
														
 
															+
														
 
															+    ret = 0;
														
 
															+fail:
														
 
															+    qemu_vfree(buf);
														
 
															+    return ret;
														
 
															+}
														
 
															+
														
 
															 static int img_convert(int argc, char **argv)
														
 
															 {
														
 
															-    int c, n, n1, bs_n, bs_i, compress, cluster_sectors, skip_create;
														
 
															+    int c, bs_n, bs_i, compress, cluster_sectors, skip_create;
														
 
															     int64_t ret = 0;
														
 
															     int progress = 0, flags, src_flags;
														
 
															     const char *fmt, *out_fmt, *cache, *src_cache, *out_baseimg, *out_filename;
														
 
															     BlockDriver *drv, *proto_drv;
														
 
															     BlockBackend **blk = NULL, *out_blk = NULL;
														
 
															     BlockDriverState **bs = NULL, *out_bs = NULL;
														
 
															-    int64_t total_sectors, nb_sectors, sector_num, bs_offset;
														
 
															+    int64_t total_sectors;
														
 
															     int64_t *bs_sectors = NULL;
														
 
															-    uint8_t * buf = NULL;
														
 
															     size_t bufsectors = IO_BUF_SIZE / BDRV_SECTOR_SIZE;
														
 
															-    const uint8_t *buf1;
														
 
															     BlockDriverInfo bdi;
														
 
															     QemuOpts *opts = NULL;
														
 
															     QemuOptsList *create_opts = NULL;
														
@@ -1329,6 +1621,7 @@ static int img_convert(int argc, char **argv)
 
															     bool quiet = false;
														
 
															     Error *local_err = NULL;
														
 
															     QemuOpts *sn_opts = NULL;
														
 
															+    ImgConvertState state;
														
 
															     fmt = NULL;
														
 
															     out_fmt = "raw";
														
@@ -1627,9 +1920,6 @@ static int img_convert(int argc, char **argv)
 
															     }
														
 
															     out_bs = blk_bs(out_blk);
														
 
															-    bs_i = 0;
														
 
															-    bs_offset = 0;
														
 
															-
														
 
															     /* increase bufsectors from the default 4096 (2M) if opt_transfer_length
														
 
															      * or discard_alignment of the out_bs is greater. Limit to 32768 (16MB)
														
 
															      * as maximum. */
														
@@ -1638,8 +1928,6 @@ static int img_convert(int argc, char **argv)
 
															                                          out_bs->bl.discard_alignment))
														
 
															                     );
														
 
															-    buf = blk_blockalign(out_blk, bufsectors * BDRV_SECTOR_SIZE);
														
 
															-
														
 
															     if (skip_create) {
														
 
															         int64_t output_sectors = blk_nb_sectors(out_blk);
														
 
															         if (output_sectors < 0) {
														
@@ -1666,203 +1954,20 @@ static int img_convert(int argc, char **argv)
 
															         cluster_sectors = bdi.cluster_size / BDRV_SECTOR_SIZE;
														
 
															     }
														
 
															-    if (compress) {
														
 
															-        if (cluster_sectors <= 0 || cluster_sectors > bufsectors) {
														
 
															-            error_report("invalid cluster size");
														
 
															-            ret = -1;
														
 
															-            goto out;
														
 
															-        }
														
 
															-        sector_num = 0;
														
 
															-
														
 
															-        nb_sectors = total_sectors;
														
 
															-
														
 
															-        for(;;) {
														
 
															-            int64_t bs_num;
														
 
															-            int remainder;
														
 
															-            uint8_t *buf2;
														
 
															-
														
 
															-            nb_sectors = total_sectors - sector_num;
														
 
															-            if (nb_sectors <= 0)
														
 
															-                break;
														
 
															-            if (nb_sectors >= cluster_sectors)
														
 
															-                n = cluster_sectors;
														
 
															-            else
														
 
															-                n = nb_sectors;
														
 
															-
														
 
															-            bs_num = sector_num - bs_offset;
														
 
															-            assert (bs_num >= 0);
														
 
															-            remainder = n;
														
 
															-            buf2 = buf;
														
 
															-            while (remainder > 0) {
														
 
															-                int nlow;
														
 
															-                while (bs_num == bs_sectors[bs_i]) {
														
 
															-                    bs_offset += bs_sectors[bs_i];
														
 
															-                    bs_i++;
														
 
															-                    assert (bs_i < bs_n);
														
 
															-                    bs_num = 0;
														
 
															-                    /* printf("changing part: sector_num=%" PRId64 ", "
														
 
															-                       "bs_i=%d, bs_offset=%" PRId64 ", bs_sectors=%" PRId64
														
 
															-                       "\n", sector_num, bs_i, bs_offset, bs_sectors[bs_i]); */
														
 
															-                }
														
 
															-                assert (bs_num < bs_sectors[bs_i]);
														
 
															-
														
 
															-                nlow = remainder > bs_sectors[bs_i] - bs_num
														
 
															-                    ? bs_sectors[bs_i] - bs_num : remainder;
														
 
															-
														
 
															-                ret = blk_read(blk[bs_i], bs_num, buf2, nlow);
														
 
															-                if (ret < 0) {
														
 
															-                    error_report("error while reading sector %" PRId64 ": %s",
														
 
															-                                 bs_num, strerror(-ret));
														
 
															-                    goto out;
														
 
															-                }
														
 
															-
														
 
															-                buf2 += nlow * 512;
														
 
															-                bs_num += nlow;
														
 
															-
														
 
															-                remainder -= nlow;
														
 
															-            }
														
 
															-            assert (remainder == 0);
														
 
															-
														
 
															-            if (!buffer_is_zero(buf, n * BDRV_SECTOR_SIZE)) {
														
 
															-                ret = blk_write_compressed(out_blk, sector_num, buf, n);
														
 
															-                if (ret != 0) {
														
 
															-                    error_report("error while compressing sector %" PRId64
														
 
															-                                 ": %s", sector_num, strerror(-ret));
														
 
															-                    goto out;
														
 
															-                }
														
 
															-            }
														
 
															-            sector_num += n;
														
 
															-            qemu_progress_print(100.0 * sector_num / total_sectors, 0);
														
 
															-        }
														
 
															-        /* signal EOF to align */
														
 
															-        blk_write_compressed(out_blk, 0, NULL, 0);
														
 
															-    } else {
														
 
															-        int64_t sectors_to_read, sectors_read, sector_num_next_status;
														
 
															-        bool count_allocated_sectors;
														
 
															-        int has_zero_init = min_sparse ? bdrv_has_zero_init(out_bs) : 0;
														
 
															-
														
 
															-        if (!has_zero_init && bdrv_can_write_zeroes_with_unmap(out_bs)) {
														
 
															-            ret = bdrv_make_zero(out_bs, BDRV_REQ_MAY_UNMAP);
														
 
															-            if (ret < 0) {
														
 
															-                goto out;
														
 
															-            }
														
 
															-            has_zero_init = 1;
														
 
															-        }
														
 
															-
														
 
															-        sectors_to_read = total_sectors;
														
 
															-        count_allocated_sectors = progress && (out_baseimg || has_zero_init);
														
 
															-restart:
														
 
															-        sector_num = 0; // total number of sectors converted so far
														
 
															-        sectors_read = 0;
														
 
															-        sector_num_next_status = 0;
														
 
															-
														
 
															-        for(;;) {
														
 
															-            nb_sectors = total_sectors - sector_num;
														
 
															-            if (nb_sectors <= 0) {
														
 
															-                if (count_allocated_sectors) {
														
 
															-                    sectors_to_read = sectors_read;
														
 
															-                    count_allocated_sectors = false;
														
 
															-                    goto restart;
														
 
															-                }
														
 
															-                ret = 0;
														
 
															-                break;
														
 
															-            }
														
 
															-
														
 
															-            while (sector_num - bs_offset >= bs_sectors[bs_i]) {
														
 
															-                bs_offset += bs_sectors[bs_i];
														
 
															-                bs_i ++;
														
 
															-                assert (bs_i < bs_n);
														
 
															-                /* printf("changing part: sector_num=%" PRId64 ", bs_i=%d, "
														
 
															-                  "bs_offset=%" PRId64 ", bs_sectors=%" PRId64 "\n",
														
 
															-                   sector_num, bs_i, bs_offset, bs_sectors[bs_i]); */
														
 
															-            }
														
 
															-
														
 
															-            if ((out_baseimg || has_zero_init) &&
														
 
															-                sector_num >= sector_num_next_status) {
														
 
															-                n = nb_sectors > INT_MAX ? INT_MAX : nb_sectors;
														
 
															-                ret = bdrv_get_block_status(bs[bs_i], sector_num - bs_offset,
														
 
															-                                            n, &n1);
														
 
															-                if (ret < 0) {
														
 
															-                    error_report("error while reading block status of sector %"
														
 
															-                                 PRId64 ": %s", sector_num - bs_offset,
														
 
															-                                 strerror(-ret));
														
 
															-                    goto out;
														
 
															-                }
														
 
															-                /* If the output image is zero initialized, we are not working
														
 
															-                 * on a shared base and the input is zero we can skip the next
														
 
															-                 * n1 sectors */
														
 
															-                if (has_zero_init && !out_baseimg && (ret & BDRV_BLOCK_ZERO)) {
														
 
															-                    sector_num += n1;
														
 
															-                    continue;
														
 
															-                }
														
 
															-                /* If the output image is being created as a copy on write
														
 
															-                 * image, assume that sectors which are unallocated in the
														
 
															-                 * input image are present in both the output's and input's
														
 
															-                 * base images (no need to copy them). */
														
 
															-                if (out_baseimg) {
														
 
															-                    if (!(ret & BDRV_BLOCK_DATA)) {
														
 
															-                        sector_num += n1;
														
 
															-                        continue;
														
 
															-                    }
														
 
															-                    /* The next 'n1' sectors are allocated in the input image.
														
 
															-                     * Copy only those as they may be followed by unallocated
														
 
															-                     * sectors. */
														
 
															-                    nb_sectors = n1;
														
 
															-                }
														
 
															-                /* avoid redundant callouts to get_block_status */
														
 
															-                sector_num_next_status = sector_num + n1;
														
 
															-            }
														
 
															-
														
 
															-            n = MIN(nb_sectors, bufsectors);
														
 
															-
														
 
															-            /* round down request length to an aligned sector, but
														
 
															-             * do not bother doing this on short requests. They happen
														
 
															-             * when we found an all-zero area, and the next sector to
														
 
															-             * write will not be sector_num + n. */
														
 
															-            if (cluster_sectors > 0 && n >= cluster_sectors) {
														
 
															-                int64_t next_aligned_sector = (sector_num + n);
														
 
															-                next_aligned_sector -= next_aligned_sector % cluster_sectors;
														
 
															-                if (sector_num + n > next_aligned_sector) {
														
 
															-                    n = next_aligned_sector - sector_num;
														
 
															-                }
														
 
															-            }
														
 
															-
														
 
															-            n = MIN(n, bs_sectors[bs_i] - (sector_num - bs_offset));
														
 
															-
														
 
															-            sectors_read += n;
														
 
															-            if (count_allocated_sectors) {
														
 
															-                sector_num += n;
														
 
															-                continue;
														
 
															-            }
														
 
															+    state = (ImgConvertState) {
														
 
															+        .src                = blk,
														
 
															+        .src_sectors        = bs_sectors,
														
 
															+        .src_num            = bs_n,
														
 
															+        .total_sectors      = total_sectors,
														
 
															+        .target             = out_blk,
														
 
															+        .compressed         = compress,
														
 
															+        .target_has_backing = (bool) out_baseimg,
														
 
															+        .min_sparse         = min_sparse,
														
 
															+        .cluster_sectors    = cluster_sectors,
														
 
															+        .buf_sectors        = bufsectors,
														
 
															+    };
														
 
															+    ret = convert_do_copy(&state);
														
 
															-            n1 = n;
														
 
															-            ret = blk_read(blk[bs_i], sector_num - bs_offset, buf, n);
														
 
															-            if (ret < 0) {
														
 
															-                error_report("error while reading sector %" PRId64 ": %s",
														
 
															-                             sector_num - bs_offset, strerror(-ret));
														
 
															-                goto out;
														
 
															-            }
														
 
															-            /* NOTE: at the same time we convert, we do not write zero
														
 
															-               sectors to have a chance to compress the image. Ideally, we
														
 
															-               should add a specific call to have the info to go faster */
														
 
															-            buf1 = buf;
														
 
															-            while (n > 0) {
														
 
															-                if (!has_zero_init ||
														
 
															-                    is_allocated_sectors_min(buf1, n, &n1, min_sparse)) {
														
 
															-                    ret = blk_write(out_blk, sector_num, buf1, n1);
														
 
															-                    if (ret < 0) {
														
 
															-                        error_report("error while writing sector %" PRId64
														
 
															-                                     ": %s", sector_num, strerror(-ret));
														
 
															-                        goto out;
														
 
															-                    }
														
 
															-                }
														
 
															-                sector_num += n1;
														
 
															-                n -= n1;
														
 
															-                buf1 += n1 * 512;
														
 
															-            }
														
 
															-            qemu_progress_print(100.0 * sectors_read / sectors_to_read, 0);
														
 
															-        }
														
 
															-    }
														
 
															 out:
														
 
															     if (!ret) {
														
 
															         qemu_progress_print(100, 0);
														
@@ -1870,7 +1975,6 @@ out:
 
															     qemu_progress_end();
														
 
															     qemu_opts_del(opts);
														
 
															     qemu_opts_free(create_opts);
														
 
															-    qemu_vfree(buf);
														
 
															     qemu_opts_del(sn_opts);
														
 
															     blk_unref(out_blk);
														
 
															     g_free(bs);
														
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -1007,6 +1007,43 @@ EQMP
 
															         .mhandler.cmd_new = qmp_marshal_input_block_stream,
														
 
															     },
														
 
															+SQMP
														
 
															+block-stream
														
 
															+------------
														
 
															+
														
 
															+Copy data from a backing file into a block device.
														
 
															+
														
 
															+Arguments:
														
 
															+
														
 
															+- "device": The device's ID, must be unique (json-string)
														
 
															+- "base": The file name of the backing image above which copying starts
														
 
															+          (json-string, optional)
														
 
															+- "backing-file": The backing file string to write into the active layer. This
														
 
															+                  filename is not validated.
														
 
															+
														
 
															+                  If a pathname string is such that it cannot be resolved by
														
 
															+                  QEMU, that means that subsequent QMP or HMP commands must use
														
 
															+                  node-names for the image in question, as filename lookup
														
 
															+                  methods will fail.
														
 
															+
														
 
															+                  If not specified, QEMU will automatically determine the
														
 
															+                  backing file string to use, or error out if there is no
														
 
															+                  obvious choice.  Care should be taken when specifying the
														
 
															+                  string, to specify a valid filename or protocol.
														
 
															+                  (json-string, optional) (Since 2.1)
														
 
															+- "speed":  the maximum speed, in bytes per second (json-int, optional)
														
 
															+- "on-error": the action to take on an error (default 'report').  'stop' and
														
 
															+              'enospc' can only be used if the block device supports io-status.
														
 
															+              (json-string, optional) (Since 2.1)
														
 
															+
														
 
															+Example:
														
 
															+
														
 
															+-> { "execute": "block-stream", "arguments": { "device": "virtio0",
														
 
															+                                               "base": "/tmp/master.qcow2" } }
														
 
															+<- { "return": {} }
														
 
															+
														
 
															+EQMP
														
 
															+
														
 
															     {
														
 
															         .name       = "block-commit",
														
 
															         .args_type  = "device:B,base:s?,top:s?,backing-file:s?,speed:o?",
														
@@ -1073,7 +1110,7 @@ EQMP
 
															     {
														
 
															         .name       = "drive-backup",
														
 
															         .args_type  = "sync:s,device:B,target:s,speed:i?,mode:s?,format:s?,"
														
 
															-                      "on-source-error:s?,on-target-error:s?",
														
 
															+                      "bitmap:s?,on-source-error:s?,on-target-error:s?",
														
 
															         .mhandler.cmd_new = qmp_marshal_input_drive_backup,
														
 
															     },
														
@@ -1100,8 +1137,10 @@ Arguments:
 
															             (json-string, optional)
														
 
															 - "sync": what parts of the disk image should be copied to the destination;
														
 
															   possibilities include "full" for all the disk, "top" for only the sectors
														
 
															-  allocated in the topmost image, or "none" to only replicate new I/O
														
 
															-  (MirrorSyncMode).
														
 
															+  allocated in the topmost image, "dirty-bitmap" for only the dirty sectors in
														
 
															+  the bitmap, or "none" to only replicate new I/O (MirrorSyncMode).
														
 
															+- "bitmap": dirty bitmap name for sync==dirty-bitmap. Must be present if sync
														
 
															+            is "dirty-bitmap", must NOT be present otherwise.
														
 
															 - "mode": whether and how QEMU should create a new image
														
 
															           (NewImageMode, optional, default 'absolute-paths')
														
 
															 - "speed": the maximum speed, in bytes per second (json-int, optional)
														
@@ -1266,6 +1305,91 @@ Example:
 
															                                          "name": "snapshot0" } } ] } }
														
 
															 <- { "return": {} }
														
 
															+EQMP
														
 
															+
														
 
															+    {
														
 
															+        .name       = "block-dirty-bitmap-add",
														
 
															+        .args_type  = "node:B,name:s,granularity:i?",
														
 
															+        .mhandler.cmd_new = qmp_marshal_input_block_dirty_bitmap_add,
														
 
															+    },
														
 
															+
														
 
															+SQMP
														
 
															+
														
 
															+block-dirty-bitmap-add
														
 
															+----------------------
														
 
															+Since 2.4
														
 
															+
														
 
															+Create a dirty bitmap with a name on the device, and start tracking the writes.
														
 
															+
														
 
															+Arguments:
														
 
															+
														
 
															+- "node": device/node on which to create dirty bitmap (json-string)
														
 
															+- "name": name of the new dirty bitmap (json-string)
														
 
															+- "granularity": granularity to track writes with (int, optional)
														
 
															+
														
 
															+Example:
														
 
															+
														
 
															+-> { "execute": "block-dirty-bitmap-add", "arguments": { "node": "drive0",
														
 
															+                                                   "name": "bitmap0" } }
														
 
															+<- { "return": {} }
														
 
															+
														
 
															+EQMP
														
 
															+
														
 
															+    {
														
 
															+        .name       = "block-dirty-bitmap-remove",
														
 
															+        .args_type  = "node:B,name:s",
														
 
															+        .mhandler.cmd_new = qmp_marshal_input_block_dirty_bitmap_remove,
														
 
															+    },
														
 
															+
														
 
															+SQMP
														
 
															+
														
 
															+block-dirty-bitmap-remove
														
 
															+-------------------------
														
 
															+Since 2.4
														
 
															+
														
 
															+Stop write tracking and remove the dirty bitmap that was created with
														
 
															+block-dirty-bitmap-add.
														
 
															+
														
 
															+Arguments:
														
 
															+
														
 
															+- "node": device/node on which to remove dirty bitmap (json-string)
														
 
															+- "name": name of the dirty bitmap to remove (json-string)
														
 
															+
														
 
															+Example:
														
 
															+
														
 
															+-> { "execute": "block-dirty-bitmap-remove", "arguments": { "node": "drive0",
														
 
															+                                                      "name": "bitmap0" } }
														
 
															+<- { "return": {} }
														
 
															+
														
 
															+EQMP
														
 
															+
														
 
															+    {
														
 
															+        .name       = "block-dirty-bitmap-clear",
														
 
															+        .args_type  = "node:B,name:s",
														
 
															+        .mhandler.cmd_new = qmp_marshal_input_block_dirty_bitmap_clear,
														
 
															+    },
														
 
															+
														
 
															+SQMP
														
 
															+
														
 
															+block-dirty-bitmap-clear
														
 
															+------------------------
														
 
															+Since 2.4
														
 
															+
														
 
															+Reset the dirty bitmap associated with a node so that an incremental backup
														
 
															+from this point in time forward will only backup clusters modified after this
														
 
															+clear operation.
														
 
															+
														
 
															+Arguments:
														
 
															+
														
 
															+- "node": device/node on which to remove dirty bitmap (json-string)
														
 
															+- "name": name of the dirty bitmap to remove (json-string)
														
 
															+
														
 
															+Example:
														
 
															+
														
 
															+-> { "execute": "block-dirty-bitmap-clear", "arguments": { "node": "drive0",
														
 
															+                                                           "name": "bitmap0" } }
														
 
															+<- { "return": {} }
														
 
															+
														
 
															 EQMP
														
 
															     {
														
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -2911,6 +2911,17 @@ sub process {
 
															 		if ($rawline =~ /\b(?:Qemu|QEmu)\b/) {
														
 
															 			WARN("use QEMU instead of Qemu or QEmu\n" . $herecurr);
														
 
															 		}
														
 
															+
														
 
															+# check for non-portable ffs() calls that have portable alternatives in QEMU
														
 
															+		if ($line =~ /\bffs\(/) {
														
 
															+			ERROR("use ctz32() instead of ffs()\n" . $herecurr);
														
 
															+		}
														
 
															+		if ($line =~ /\bffsl\(/) {
														
 
															+			ERROR("use ctz32() or ctz64() instead of ffsl()\n" . $herecurr);
														
 
															+		}
														
 
															+		if ($line =~ /\bffsll\(/) {
														
 
															+			ERROR("use ctz64() instead of ffsll()\n" . $herecurr);
														
 
															+		}
														
 
															 	}
														
 
															 	# If we have no input at all, then there is nothing to report on
														
--- a/scripts/qemu-gdb.py
+++ b/scripts/qemu-gdb.py
@@ -22,12 +22,86 @@ def isnull(ptr):
 
															 def int128(p):
														
 
															     return long(p['lo']) + (long(p['hi']) << 64)
														
 
															+def get_fs_base():
														
 
															+    '''Fetch %fs base value using arch_prctl(ARCH_GET_FS)'''
														
 
															+    # %rsp - 120 is scratch space according to the SystemV ABI
														
 
															+    old = gdb.parse_and_eval('*(uint64_t*)($rsp - 120)')
														
 
															+    gdb.execute('call arch_prctl(0x1003, $rsp - 120)', False, True)
														
 
															+    fs_base = gdb.parse_and_eval('*(uint64_t*)($rsp - 120)')
														
 
															+    gdb.execute('set *(uint64_t*)($rsp - 120) = %s' % old, False, True)
														
 
															+    return fs_base
														
 
															+
														
 
															+def get_glibc_pointer_guard():
														
 
															+    '''Fetch glibc pointer guard value'''
														
 
															+    fs_base = get_fs_base()
														
 
															+    return gdb.parse_and_eval('*(uint64_t*)((uint64_t)%s + 0x30)' % fs_base)
														
 
															+
														
 
															+def glibc_ptr_demangle(val, pointer_guard):
														
 
															+    '''Undo effect of glibc's PTR_MANGLE()'''
														
 
															+    return gdb.parse_and_eval('(((uint64_t)%s >> 0x11) | ((uint64_t)%s << (64 - 0x11))) ^ (uint64_t)%s' % (val, val, pointer_guard))
														
 
															+
														
 
															+def bt_jmpbuf(jmpbuf):
														
 
															+    '''Backtrace a jmpbuf'''
														
 
															+    JB_RBX  = 0
														
 
															+    JB_RBP  = 1
														
 
															+    JB_R12  = 2
														
 
															+    JB_R13  = 3
														
 
															+    JB_R14  = 4
														
 
															+    JB_R15  = 5
														
 
															+    JB_RSP  = 6
														
 
															+    JB_PC   = 7
														
 
															+
														
 
															+    old_rbx = gdb.parse_and_eval('(uint64_t)$rbx')
														
 
															+    old_rbp = gdb.parse_and_eval('(uint64_t)$rbp')
														
 
															+    old_rsp = gdb.parse_and_eval('(uint64_t)$rsp')
														
 
															+    old_r12 = gdb.parse_and_eval('(uint64_t)$r12')
														
 
															+    old_r13 = gdb.parse_and_eval('(uint64_t)$r13')
														
 
															+    old_r14 = gdb.parse_and_eval('(uint64_t)$r14')
														
 
															+    old_r15 = gdb.parse_and_eval('(uint64_t)$r15')
														
 
															+    old_rip = gdb.parse_and_eval('(uint64_t)$rip')
														
 
															+
														
 
															+    pointer_guard = get_glibc_pointer_guard()
														
 
															+    gdb.execute('set $rbx = %s' % jmpbuf[JB_RBX])
														
 
															+    gdb.execute('set $rbp = %s' % glibc_ptr_demangle(jmpbuf[JB_RBP], pointer_guard))
														
 
															+    gdb.execute('set $rsp = %s' % glibc_ptr_demangle(jmpbuf[JB_RSP], pointer_guard))
														
 
															+    gdb.execute('set $r12 = %s' % jmpbuf[JB_R12])
														
 
															+    gdb.execute('set $r13 = %s' % jmpbuf[JB_R13])
														
 
															+    gdb.execute('set $r14 = %s' % jmpbuf[JB_R14])
														
 
															+    gdb.execute('set $r15 = %s' % jmpbuf[JB_R15])
														
 
															+    gdb.execute('set $rip = %s' % glibc_ptr_demangle(jmpbuf[JB_PC], pointer_guard))
														
 
															+
														
 
															+    gdb.execute('bt')
														
 
															+
														
 
															+    gdb.execute('set $rbx = %s' % old_rbx)
														
 
															+    gdb.execute('set $rbp = %s' % old_rbp)
														
 
															+    gdb.execute('set $rsp = %s' % old_rsp)
														
 
															+    gdb.execute('set $r12 = %s' % old_r12)
														
 
															+    gdb.execute('set $r13 = %s' % old_r13)
														
 
															+    gdb.execute('set $r14 = %s' % old_r14)
														
 
															+    gdb.execute('set $r15 = %s' % old_r15)
														
 
															+    gdb.execute('set $rip = %s' % old_rip)
														
 
															+
														
 
															 class QemuCommand(gdb.Command):
														
 
															     '''Prefix for QEMU debug support commands'''
														
 
															     def __init__(self):
														
 
															         gdb.Command.__init__(self, 'qemu', gdb.COMMAND_DATA,
														
 
															                              gdb.COMPLETE_NONE, True)
														
 
															+class CoroutineCommand(gdb.Command):
														
 
															+    '''Display coroutine backtrace'''
														
 
															+    def __init__(self):
														
 
															+        gdb.Command.__init__(self, 'qemu coroutine', gdb.COMMAND_DATA,
														
 
															+                             gdb.COMPLETE_NONE)
														
 
															+
														
 
															+    def invoke(self, arg, from_tty):
														
 
															+        argv = gdb.string_to_argv(arg)
														
 
															+        if len(argv) != 1:
														
 
															+            gdb.write('usage: qemu coroutine <coroutine-pointer>\n')
														
 
															+            return
														
 
															+
														
 
															+        coroutine_pointer = gdb.parse_and_eval(argv[0]).cast(gdb.lookup_type('CoroutineUContext').pointer())
														
 
															+        bt_jmpbuf(coroutine_pointer['env']['__jmpbuf'])
														
 
															+
														
 
															 class MtreeCommand(gdb.Command):
														
 
															     '''Display the memory tree hierarchy'''
														
 
															     def __init__(self):
														
@@ -86,4 +160,5 @@ def print_item(self, ptr, offset = gdb.Value(0), level = 0):
 
															             subregion = subregion['subregions_link']['tqe_next']
														
 
															 QemuCommand()
														
 
															+CoroutineCommand()
														
 
															 MtreeCommand()
														
--- a/scripts/qmp/qmp.py
+++ b/scripts/qmp/qmp.py
@@ -21,6 +21,9 @@ class QMPConnectError(QMPError):
 
															 class QMPCapabilitiesError(QMPError):
														
 
															     pass
														
 
															+class QMPTimeoutError(QMPError):
														
 
															+    pass
														
 
															+
														
 
															 class QEMUMonitorProtocol:
														
 
															     def __init__(self, address, server=False):
														
 
															         """
														
@@ -72,6 +75,44 @@ def __json_read(self, only_event=False):
 
															     error = socket.error
														
 
															+    def __get_events(self, wait=False):
														
 
															+        """
														
 
															+        Check for new events in the stream and cache them in __events.
														
 
															+
														
 
															+        @param wait (bool): block until an event is available.
														
 
															+        @param wait (float): If wait is a float, treat it as a timeout value.
														
 
															+
														
 
															+        @raise QMPTimeoutError: If a timeout float is provided and the timeout
														
 
															+                                period elapses.
														
 
															+        @raise QMPConnectError: If wait is True but no events could be retrieved
														
 
															+                                or if some other error occurred.
														
 
															+        """
														
 
															+
														
 
															+        # Check for new events regardless and pull them into the cache:
														
 
															+        self.__sock.setblocking(0)
														
 
															+        try:
														
 
															+            self.__json_read()
														
 
															+        except socket.error, err:
														
 
															+            if err[0] == errno.EAGAIN:
														
 
															+                # No data available
														
 
															+                pass
														
 
															+        self.__sock.setblocking(1)
														
 
															+
														
 
															+        # Wait for new events, if needed.
														
 
															+        # if wait is 0.0, this means "no wait" and is also implicitly false.
														
 
															+        if not self.__events and wait:
														
 
															+            if isinstance(wait, float):
														
 
															+                self.__sock.settimeout(wait)
														
 
															+            try:
														
 
															+                ret = self.__json_read(only_event=True)
														
 
															+            except socket.timeout:
														
 
															+                raise QMPTimeoutError("Timeout waiting for event")
														
 
															+            except:
														
 
															+                raise QMPConnectError("Error while reading from socket")
														
 
															+            if ret is None:
														
 
															+                raise QMPConnectError("Error while reading from socket")
														
 
															+            self.__sock.settimeout(None)
														
 
															+
														
 
															     def connect(self, negotiate=True):
														
 
															         """
														
 
															         Connect to the QMP Monitor and perform capabilities negotiation.
														
@@ -140,43 +181,37 @@ def pull_event(self, wait=False):
 
															         """
														
 
															         Get and delete the first available QMP event.
														
 
															-        @param wait: block until an event is available (bool)
														
 
															+        @param wait (bool): block until an event is available.
														
 
															+        @param wait (float): If wait is a float, treat it as a timeout value.
														
 
															+
														
 
															+        @raise QMPTimeoutError: If a timeout float is provided and the timeout
														
 
															+                                period elapses.
														
 
															+        @raise QMPConnectError: If wait is True but no events could be retrieved
														
 
															+                                or if some other error occurred.
														
 
															+
														
 
															+        @return The first available QMP event, or None.
														
 
															         """
														
 
															-        self.__sock.setblocking(0)
														
 
															-        try:
														
 
															-            self.__json_read()
														
 
															-        except socket.error, err:
														
 
															-            if err[0] == errno.EAGAIN:
														
 
															-                # No data available
														
 
															-                pass
														
 
															-        self.__sock.setblocking(1)
														
 
															-        if not self.__events and wait:
														
 
															-            self.__json_read(only_event=True)
														
 
															-        event = self.__events[0]
														
 
															-        del self.__events[0]
														
 
															-        return event
														
 
															+        self.__get_events(wait)
														
 
															+
														
 
															+        if self.__events:
														
 
															+            return self.__events.pop(0)
														
 
															+        return None
														
 
															     def get_events(self, wait=False):
														
 
															         """
														
 
															         Get a list of available QMP events.
														
 
															-        @param wait: block until an event is available (bool)
														
 
															-        """
														
 
															-        self.__sock.setblocking(0)
														
 
															-        try:
														
 
															-            self.__json_read()
														
 
															-        except socket.error, err:
														
 
															-            if err[0] == errno.EAGAIN:
														
 
															-                # No data available
														
 
															-                pass
														
 
															-        self.__sock.setblocking(1)
														
 
															-        if not self.__events and wait:
														
 
															-            ret = self.__json_read(only_event=True)
														
 
															-            if ret == None:
														
 
															-                # We are in blocking mode, if don't get anything, something
														
 
															-                # went wrong
														
 
															-                raise QMPConnectError("Error while reading from socket")
														
 
															+        @param wait (bool): block until an event is available.
														
 
															+        @param wait (float): If wait is a float, treat it as a timeout value.
														
 
															+        @raise QMPTimeoutError: If a timeout float is provided and the timeout
														
 
															+                                period elapses.
														
 
															+        @raise QMPConnectError: If wait is True but no events could be retrieved
														
 
															+                                or if some other error occurred.
														
 
															+
														
 
															+        @return The list of available QMP events.
														
 
															+        """
														
 
															+        self.__get_events(wait)
														
 
															         return self.__events
														
 
															     def clear_events(self):
														
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -2251,8 +2251,8 @@ static inline ppcmas_tlb_t *booke206_get_tlbm(CPUPPCState *env, const int tlbn,
 
															 {
														
 
															     int r;
														
 
															     uint32_t ways = booke206_tlb_ways(env, tlbn);
														
 
															-    int ways_bits = ffs(ways) - 1;
														
 
															-    int tlb_bits = ffs(booke206_tlb_size(env, tlbn)) - 1;
														
 
															+    int ways_bits = ctz32(ways);
														
 
															+    int tlb_bits = ctz32(booke206_tlb_size(env, tlbn));
														
 
															     int i;
														
 
															     way &= ways - 1;
														
--- a/tests/qemu-iotests/122
+++ b/tests/qemu-iotests/122
@@ -0,0 +1,223 @@
 
															+#!/bin/bash
														
 
															+#
														
 
															+# Test some qemu-img convert cases
														
 
															+#
														
 
															+# Copyright (C) 2015 Red Hat, Inc.
														
 
															+#
														
 
															+# This program is free software; you can redistribute it and/or modify
														
 
															+# it under the terms of the GNU General Public License as published by
														
 
															+# the Free Software Foundation; either version 2 of the License, or
														
 
															+# (at your option) any later version.
														
 
															+#
														
 
															+# This program is distributed in the hope that it will be useful,
														
 
															+# but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
														
 
															+# GNU General Public License for more details.
														
 
															+#
														
 
															+# You should have received a copy of the GNU General Public License
														
 
															+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
														
 
															+#
														
 
															+
														
 
															+# creator
														
 
															+owner=kwolf@redhat.com
														
 
															+
														
 
															+seq="$(basename $0)"
														
 
															+echo "QA output created by $seq"
														
 
															+
														
 
															+here="$PWD"
														
 
															+tmp=/tmp/$$
														
 
															+status=1	# failure is the default!
														
 
															+
														
 
															+_cleanup()
														
 
															+{
														
 
															+    rm -f "$TEST_IMG".[123]
														
 
															+	_cleanup_test_img
														
 
															+}
														
 
															+trap "_cleanup; exit \$status" 0 1 2 3 15
														
 
															+
														
 
															+# get standard environment, filters and checks
														
 
															+. ./common.rc
														
 
															+. ./common.filter
														
 
															+
														
 
															+_supported_fmt qcow2
														
 
															+_supported_proto file
														
 
															+_supported_os Linux
														
 
															+
														
 
															+
														
 
															+TEST_IMG="$TEST_IMG".base _make_test_img 64M
														
 
															+$QEMU_IO -c "write -P 0x11 0 64M" "$TEST_IMG".base 2>&1 | _filter_qemu_io | _filter_testdir
														
 
															+
														
 
															+
														
 
															+echo
														
 
															+echo "=== Check allocation status regression with -B ==="
														
 
															+echo
														
 
															+
														
 
															+_make_test_img -b "$TEST_IMG".base
														
 
															+$QEMU_IO -c "write -P 0x22 0 3M" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
														
 
															+$QEMU_IMG convert -O $IMGFMT -B "$TEST_IMG".base "$TEST_IMG" "$TEST_IMG".orig
														
 
															+$QEMU_IMG map "$TEST_IMG".orig | _filter_qemu_img_map
														
 
															+
														
 
															+
														
 
															+echo
														
 
															+echo "=== Check that zero clusters are kept in overlay ==="
														
 
															+echo
														
 
															+
														
 
															+_make_test_img -b "$TEST_IMG".base
														
 
															+
														
 
															+$QEMU_IO -c "write -P 0 0 3M" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
														
 
															+$QEMU_IMG convert -O $IMGFMT -B "$TEST_IMG".base "$TEST_IMG" "$TEST_IMG".orig
														
 
															+$QEMU_IO -c "read -P 0 0 3M" "$TEST_IMG".orig 2>&1 | _filter_qemu_io | _filter_testdir
														
 
															+$QEMU_IMG convert -O $IMGFMT -c -B "$TEST_IMG".base "$TEST_IMG" "$TEST_IMG".orig
														
 
															+$QEMU_IO -c "read -P 0 0 3M" "$TEST_IMG".orig 2>&1 | _filter_qemu_io | _filter_testdir
														
 
															+
														
 
															+$QEMU_IO -c "write -z 0 3M" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
														
 
															+$QEMU_IMG convert -O $IMGFMT -B "$TEST_IMG".base "$TEST_IMG" "$TEST_IMG".orig
														
 
															+$QEMU_IO -c "read -P 0 0 3M" "$TEST_IMG".orig 2>&1 | _filter_qemu_io | _filter_testdir
														
 
															+$QEMU_IMG convert -O $IMGFMT -c -B "$TEST_IMG".base "$TEST_IMG" "$TEST_IMG".orig
														
 
															+$QEMU_IO -c "read -P 0 0 3M" "$TEST_IMG".orig 2>&1 | _filter_qemu_io | _filter_testdir
														
 
															+
														
 
															+
														
 
															+echo
														
 
															+echo "=== Concatenate multiple source images ==="
														
 
															+echo
														
 
															+
														
 
															+TEST_IMG="$TEST_IMG".1 _make_test_img 4M
														
 
															+TEST_IMG="$TEST_IMG".2 _make_test_img 4M
														
 
															+TEST_IMG="$TEST_IMG".3 _make_test_img 4M
														
 
															+
														
 
															+$QEMU_IO -c "write -P 0x11 0 64k" "$TEST_IMG".1 2>&1 | _filter_qemu_io | _filter_testdir
														
 
															+$QEMU_IO -c "write -P 0x22 0 64k" "$TEST_IMG".2 2>&1 | _filter_qemu_io | _filter_testdir
														
 
															+$QEMU_IO -c "write -P 0x33 0 64k" "$TEST_IMG".3 2>&1 | _filter_qemu_io | _filter_testdir
														
 
															+
														
 
															+$QEMU_IMG convert -O $IMGFMT "$TEST_IMG".[123] "$TEST_IMG"
														
 
															+$QEMU_IMG map "$TEST_IMG" | _filter_qemu_img_map
														
 
															+$QEMU_IO -c "read -P 0x11 0 64k" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
														
 
															+$QEMU_IO -c "read -P 0x22 4M 64k" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
														
 
															+$QEMU_IO -c "read -P 0x33 8M 64k" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
														
 
															+
														
 
															+$QEMU_IMG convert -c -O $IMGFMT "$TEST_IMG".[123] "$TEST_IMG"
														
 
															+$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map
														
 
															+$QEMU_IO -c "read -P 0x11 0 64k" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
														
 
															+$QEMU_IO -c "read -P 0x22 4M 64k" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
														
 
															+$QEMU_IO -c "read -P 0x33 8M 64k" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
														
 
															+
														
 
															+# -B can't be combined with concatenation
														
 
															+$QEMU_IMG convert -O $IMGFMT -B "$TEST_IMG".base "$TEST_IMG".[123] "$TEST_IMG"
														
 
															+$QEMU_IMG convert -O $IMGFMT -c -B "$TEST_IMG".base "$TEST_IMG".[123] "$TEST_IMG"
														
 
															+
														
 
															+
														
 
															+echo
														
 
															+echo "=== Compression with misaligned allocations and image sizes ==="
														
 
															+echo
														
 
															+
														
 
															+TEST_IMG="$TEST_IMG".1 _make_test_img 1023k -o cluster_size=1024
														
 
															+TEST_IMG="$TEST_IMG".2 _make_test_img 1023k -o cluster_size=1024
														
 
															+
														
 
															+$QEMU_IO -c "write -P 0x11   16k  16k" "$TEST_IMG".1 2>&1 | _filter_qemu_io | _filter_testdir
														
 
															+$QEMU_IO -c "write -P 0x22  130k 130k" "$TEST_IMG".1 2>&1 | _filter_qemu_io | _filter_testdir
														
 
															+$QEMU_IO -c "write -P 0x33 1022k   1k" "$TEST_IMG".1 2>&1 | _filter_qemu_io | _filter_testdir
														
 
															+$QEMU_IO -c "write -P 0x44    0k   1k" "$TEST_IMG".2 2>&1 | _filter_qemu_io | _filter_testdir
														
 
															+
														
 
															+$QEMU_IMG convert -c -O $IMGFMT "$TEST_IMG".[12] "$TEST_IMG"
														
 
															+$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map
														
 
															+$QEMU_IO -c "read -P 0       0k   16k" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
														
 
															+$QEMU_IO -c "read -P 0x11   16k   16k" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
														
 
															+$QEMU_IO -c "read -P 0      32k   98k" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
														
 
															+$QEMU_IO -c "read -P 0x22  130k  130k" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
														
 
															+$QEMU_IO -c "read -P 0     260k  762k" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
														
 
															+$QEMU_IO -c "read -P 0x33 1022k    1k" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
														
 
															+$QEMU_IO -c "read -P 0x44 1023k    1k" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
														
 
															+$QEMU_IO -c "read -P 0    1024k 1022k" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
														
 
															+
														
 
															+
														
 
															+echo
														
 
															+echo "=== Full allocation with -S 0 ==="
														
 
															+echo
														
 
															+
														
 
															+# Standalone image
														
 
															+_make_test_img 64M
														
 
															+$QEMU_IO -c "write -P 0x22 0 3M" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
														
 
															+$QEMU_IO -c "write -P 0 3M 3M" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
														
 
															+
														
 
															+echo
														
 
															+echo convert -S 0:
														
 
															+$QEMU_IMG convert -O $IMGFMT -S 0 "$TEST_IMG" "$TEST_IMG".orig
														
 
															+$QEMU_IO -c "read -P 0x22 0 3M" "$TEST_IMG".orig 2>&1 | _filter_qemu_io | _filter_testdir
														
 
															+$QEMU_IO -c "read -P 0 3M 61M" "$TEST_IMG".orig 2>&1 | _filter_qemu_io | _filter_testdir
														
 
															+$QEMU_IMG map --output=json "$TEST_IMG".orig | _filter_qemu_img_map
														
 
															+
														
 
															+echo
														
 
															+echo convert -c -S 0:
														
 
															+$QEMU_IMG convert -O $IMGFMT -c -S 0 "$TEST_IMG" "$TEST_IMG".orig
														
 
															+$QEMU_IO -c "read -P 0x22 0 3M" "$TEST_IMG".orig 2>&1 | _filter_qemu_io | _filter_testdir
														
 
															+$QEMU_IO -c "read -P 0 3M 61M" "$TEST_IMG".orig 2>&1 | _filter_qemu_io | _filter_testdir
														
 
															+$QEMU_IMG map --output=json "$TEST_IMG".orig | _filter_qemu_img_map
														
 
															+
														
 
															+# With backing file
														
 
															+TEST_IMG="$TEST_IMG".base _make_test_img 64M
														
 
															+$QEMU_IO -c "write -P 0x11 0 32M" "$TEST_IMG".base 2>&1 | _filter_qemu_io | _filter_testdir
														
 
															+
														
 
															+_make_test_img -b "$TEST_IMG".base 64M
														
 
															+$QEMU_IO -c "write -P 0x22 0 3M" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
														
 
															+
														
 
															+echo
														
 
															+echo convert -S 0 with source backing file:
														
 
															+$QEMU_IMG convert -O $IMGFMT -S 0 "$TEST_IMG" "$TEST_IMG".orig
														
 
															+$QEMU_IO -c "read -P 0x22 0 3M" "$TEST_IMG".orig 2>&1 | _filter_qemu_io | _filter_testdir
														
 
															+$QEMU_IO -c "read -P 0x11 3M 29M" "$TEST_IMG".orig 2>&1 | _filter_qemu_io | _filter_testdir
														
 
															+$QEMU_IO -c "read -P 0 32M 32M" "$TEST_IMG".orig 2>&1 | _filter_qemu_io | _filter_testdir
														
 
															+$QEMU_IMG map --output=json "$TEST_IMG".orig | _filter_qemu_img_map
														
 
															+
														
 
															+echo
														
 
															+echo convert -c -S 0 with source backing file:
														
 
															+$QEMU_IMG convert -O $IMGFMT -c -S 0 "$TEST_IMG" "$TEST_IMG".orig
														
 
															+$QEMU_IO -c "read -P 0x22 0 3M" "$TEST_IMG".orig 2>&1 | _filter_qemu_io | _filter_testdir
														
 
															+$QEMU_IO -c "read -P 0x11 3M 29M" "$TEST_IMG".orig 2>&1 | _filter_qemu_io | _filter_testdir
														
 
															+$QEMU_IO -c "read -P 0 32M 32M" "$TEST_IMG".orig 2>&1 | _filter_qemu_io | _filter_testdir
														
 
															+$QEMU_IMG map --output=json "$TEST_IMG".orig | _filter_qemu_img_map
														
 
															+
														
 
															+# With keeping the backing file
														
 
															+echo
														
 
															+echo convert -S 0 -B ...
														
 
															+$QEMU_IMG convert -O $IMGFMT -S 0 "$TEST_IMG" "$TEST_IMG".orig
														
 
															+$QEMU_IO -c "read -P 0x22 0 3M" "$TEST_IMG".orig 2>&1 | _filter_qemu_io | _filter_testdir
														
 
															+$QEMU_IO -c "read -P 0x11 3M 29M" "$TEST_IMG".orig 2>&1 | _filter_qemu_io | _filter_testdir
														
 
															+$QEMU_IO -c "read -P 0 32M 32M" "$TEST_IMG".orig 2>&1 | _filter_qemu_io | _filter_testdir
														
 
															+$QEMU_IMG map --output=json "$TEST_IMG".orig | _filter_qemu_img_map
														
 
															+
														
 
															+echo
														
 
															+echo convert -c -S 0 -B ...
														
 
															+$QEMU_IMG convert -O $IMGFMT -c -S 0 "$TEST_IMG" "$TEST_IMG".orig
														
 
															+$QEMU_IO -c "read -P 0x22 0 3M" "$TEST_IMG".orig 2>&1 | _filter_qemu_io | _filter_testdir
														
 
															+$QEMU_IO -c "read -P 0x11 3M 29M" "$TEST_IMG".orig 2>&1 | _filter_qemu_io | _filter_testdir
														
 
															+$QEMU_IO -c "read -P 0 32M 32M" "$TEST_IMG".orig 2>&1 | _filter_qemu_io | _filter_testdir
														
 
															+$QEMU_IMG map --output=json "$TEST_IMG".orig | _filter_qemu_img_map
														
 
															+
														
 
															+
														
 
															+echo
														
 
															+echo "=== Non-zero -S ==="
														
 
															+echo
														
 
															+
														
 
															+_make_test_img 64M -o cluster_size=1k
														
 
															+$QEMU_IO -c "write -P 0 0 64k" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
														
 
															+$QEMU_IO -c "write 0 1k" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
														
 
															+$QEMU_IO -c "write 8k 1k" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
														
 
															+$QEMU_IO -c "write 17k 1k" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
														
 
															+
														
 
															+for min_sparse in 4k 8k; do
														
 
															+    echo
														
 
															+    echo convert -S $min_sparse
														
 
															+    $QEMU_IMG convert -O $IMGFMT -o cluster_size=1k -S $min_sparse "$TEST_IMG" "$TEST_IMG".orig
														
 
															+    $QEMU_IMG map --output=json "$TEST_IMG".orig | _filter_qemu_img_map
														
 
															+
														
 
															+    echo
														
 
															+    echo convert -c -S $min_sparse
														
 
															+    # For compressed images, -S values other than 0 are ignored
														
 
															+    $QEMU_IMG convert -O $IMGFMT -o cluster_size=1k -c -S $min_sparse "$TEST_IMG" "$TEST_IMG".orig
														
 
															+    $QEMU_IMG map --output=json "$TEST_IMG".orig | _filter_qemu_img_map
														
 
															+done
														
 
															+
														
 
															+# success, all done
														
 
															+echo '*** done'
														
 
															+rm -f $seq.full
														
 
															+status=0
														
--- a/tests/qemu-iotests/122.out
+++ b/tests/qemu-iotests/122.out
@@ -0,0 +1,209 @@
 
															+QA output created by 122
														
 
															+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864
														
 
															+wrote 67108864/67108864 bytes at offset 0
														
 
															+64 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
														
 
															+
														
 
															+=== Check allocation status regression with -B ===
														
 
															+
														
 
															+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file='TEST_DIR/t.IMGFMT.base'
														
 
															+wrote 3145728/3145728 bytes at offset 0
														
 
															+3 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
														
 
															+Offset          Length          File
														
 
															+0               0x300000        TEST_DIR/t.IMGFMT.orig
														
 
															+0x300000        0x3d00000       TEST_DIR/t.IMGFMT.base
														
 
															+
														
 
															+=== Check that zero clusters are kept in overlay ===
														
 
															+
														
 
															+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file='TEST_DIR/t.IMGFMT.base'
														
 
															+wrote 3145728/3145728 bytes at offset 0
														
 
															+3 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
														
 
															+read 3145728/3145728 bytes at offset 0
														
 
															+3 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
														
 
															+read 3145728/3145728 bytes at offset 0
														
 
															+3 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
														
 
															+wrote 3145728/3145728 bytes at offset 0
														
 
															+3 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
														
 
															+read 3145728/3145728 bytes at offset 0
														
 
															+3 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
														
 
															+read 3145728/3145728 bytes at offset 0
														
 
															+3 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
														
 
															+
														
 
															+=== Concatenate multiple source images ===
														
 
															+
														
 
															+Formatting 'TEST_DIR/t.IMGFMT.1', fmt=IMGFMT size=4194304
														
 
															+Formatting 'TEST_DIR/t.IMGFMT.2', fmt=IMGFMT size=4194304
														
 
															+Formatting 'TEST_DIR/t.IMGFMT.3', fmt=IMGFMT size=4194304
														
 
															+wrote 65536/65536 bytes at offset 0
														
 
															+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
														
 
															+wrote 65536/65536 bytes at offset 0
														
 
															+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
														
 
															+wrote 65536/65536 bytes at offset 0
														
 
															+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
														
 
															+Offset          Length          File
														
 
															+0               0x10000         TEST_DIR/t.IMGFMT
														
 
															+0x400000        0x10000         TEST_DIR/t.IMGFMT
														
 
															+0x800000        0x10000         TEST_DIR/t.IMGFMT
														
 
															+read 65536/65536 bytes at offset 0
														
 
															+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
														
 
															+read 65536/65536 bytes at offset 4194304
														
 
															+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
														
 
															+read 65536/65536 bytes at offset 8388608
														
 
															+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
														
 
															+[{ "start": 0, "length": 65536, "depth": 0, "zero": false, "data": true},
														
 
															+{ "start": 65536, "length": 4128768, "depth": 0, "zero": true, "data": false},
														
 
															+{ "start": 4194304, "length": 65536, "depth": 0, "zero": false, "data": true},
														
 
															+{ "start": 4259840, "length": 4128768, "depth": 0, "zero": true, "data": false},
														
 
															+{ "start": 8388608, "length": 65536, "depth": 0, "zero": false, "data": true},
														
 
															+{ "start": 8454144, "length": 4128768, "depth": 0, "zero": true, "data": false}]
														
 
															+read 65536/65536 bytes at offset 0
														
 
															+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
														
 
															+read 65536/65536 bytes at offset 4194304
														
 
															+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
														
 
															+read 65536/65536 bytes at offset 8388608
														
 
															+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
														
 
															+qemu-img: -B makes no sense when concatenating multiple input images
														
 
															+qemu-img: -B makes no sense when concatenating multiple input images
														
 
															+
														
 
															+=== Compression with misaligned allocations and image sizes ===
														
 
															+
														
 
															+Formatting 'TEST_DIR/t.IMGFMT.1', fmt=IMGFMT size=1047552
														
 
															+Formatting 'TEST_DIR/t.IMGFMT.2', fmt=IMGFMT size=1047552
														
 
															+wrote 16384/16384 bytes at offset 16384
														
 
															+16 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
														
 
															+wrote 133120/133120 bytes at offset 133120
														
 
															+130 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
														
 
															+wrote 1024/1024 bytes at offset 1046528
														
 
															+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
														
 
															+wrote 1024/1024 bytes at offset 0
														
 
															+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
														
 
															+[{ "start": 0, "length": 65536, "depth": 0, "zero": false, "data": true},
														
 
															+{ "start": 65536, "length": 65536, "depth": 0, "zero": true, "data": false},
														
 
															+{ "start": 131072, "length": 196608, "depth": 0, "zero": false, "data": true},
														
 
															+{ "start": 327680, "length": 655360, "depth": 0, "zero": true, "data": false},
														
 
															+{ "start": 983040, "length": 65536, "depth": 0, "zero": false, "data": true},
														
 
															+{ "start": 1048576, "length": 1046528, "depth": 0, "zero": true, "data": false}]
														
 
															+read 16384/16384 bytes at offset 0
														
 
															+16 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
														
 
															+read 16384/16384 bytes at offset 16384
														
 
															+16 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
														
 
															+read 100352/100352 bytes at offset 32768
														
 
															+98 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
														
 
															+read 133120/133120 bytes at offset 133120
														
 
															+130 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
														
 
															+read 780288/780288 bytes at offset 266240
														
 
															+762 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
														
 
															+read 1024/1024 bytes at offset 1046528
														
 
															+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
														
 
															+read 1024/1024 bytes at offset 1047552
														
 
															+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
														
 
															+read 1046528/1046528 bytes at offset 1048576
														
 
															+1022 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
														
 
															+
														
 
															+=== Full allocation with -S 0 ===
														
 
															+
														
 
															+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
														
 
															+wrote 3145728/3145728 bytes at offset 0
														
 
															+3 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
														
 
															+wrote 3145728/3145728 bytes at offset 3145728
														
 
															+3 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
														
 
															+
														
 
															+convert -S 0:
														
 
															+read 3145728/3145728 bytes at offset 0
														
 
															+3 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
														
 
															+read 63963136/63963136 bytes at offset 3145728
														
 
															+61 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
														
 
															+[{ "start": 0, "length": 6291456, "depth": 0, "zero": false, "data": true, "offset": 327680},
														
 
															+{ "start": 6291456, "length": 60817408, "depth": 0, "zero": true, "data": false}]
														
 
															+
														
 
															+convert -c -S 0:
														
 
															+read 3145728/3145728 bytes at offset 0
														
 
															+3 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
														
 
															+read 63963136/63963136 bytes at offset 3145728
														
 
															+61 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
														
 
															+[{ "start": 0, "length": 6291456, "depth": 0, "zero": false, "data": true},
														
 
															+{ "start": 6291456, "length": 60817408, "depth": 0, "zero": true, "data": false}]
														
 
															+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864
														
 
															+wrote 33554432/33554432 bytes at offset 0
														
 
															+32 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
														
 
															+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file='TEST_DIR/t.IMGFMT.base'
														
 
															+wrote 3145728/3145728 bytes at offset 0
														
 
															+3 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
														
 
															+
														
 
															+convert -S 0 with source backing file:
														
 
															+read 3145728/3145728 bytes at offset 0
														
 
															+3 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
														
 
															+read 30408704/30408704 bytes at offset 3145728
														
 
															+29 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
														
 
															+read 33554432/33554432 bytes at offset 33554432
														
 
															+32 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
														
 
															+[{ "start": 0, "length": 67108864, "depth": 0, "zero": false, "data": true, "offset": 327680}]
														
 
															+
														
 
															+convert -c -S 0 with source backing file:
														
 
															+read 3145728/3145728 bytes at offset 0
														
 
															+3 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
														
 
															+read 30408704/30408704 bytes at offset 3145728
														
 
															+29 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
														
 
															+read 33554432/33554432 bytes at offset 33554432
														
 
															+32 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
														
 
															+[{ "start": 0, "length": 67108864, "depth": 0, "zero": false, "data": true}]
														
 
															+
														
 
															+convert -S 0 -B ...
														
 
															+read 3145728/3145728 bytes at offset 0
														
 
															+3 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
														
 
															+read 30408704/30408704 bytes at offset 3145728
														
 
															+29 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
														
 
															+read 33554432/33554432 bytes at offset 33554432
														
 
															+32 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
														
 
															+[{ "start": 0, "length": 67108864, "depth": 0, "zero": false, "data": true, "offset": 327680}]
														
 
															+
														
 
															+convert -c -S 0 -B ...
														
 
															+read 3145728/3145728 bytes at offset 0
														
 
															+3 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
														
 
															+read 30408704/30408704 bytes at offset 3145728
														
 
															+29 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
														
 
															+read 33554432/33554432 bytes at offset 33554432
														
 
															+32 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
														
 
															+[{ "start": 0, "length": 67108864, "depth": 0, "zero": false, "data": true}]
														
 
															+
														
 
															+=== Non-zero -S ===
														
 
															+
														
 
															+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
														
 
															+wrote 65536/65536 bytes at offset 0
														
 
															+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
														
 
															+wrote 1024/1024 bytes at offset 0
														
 
															+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
														
 
															+wrote 1024/1024 bytes at offset 8192
														
 
															+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
														
 
															+wrote 1024/1024 bytes at offset 17408
														
 
															+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
														
 
															+
														
 
															+convert -S 4k
														
 
															+[{ "start": 0, "length": 1024, "depth": 0, "zero": false, "data": true, "offset": 8192},
														
 
															+{ "start": 1024, "length": 7168, "depth": 0, "zero": true, "data": false},
														
 
															+{ "start": 8192, "length": 1024, "depth": 0, "zero": false, "data": true, "offset": 9216},
														
 
															+{ "start": 9216, "length": 8192, "depth": 0, "zero": true, "data": false},
														
 
															+{ "start": 17408, "length": 1024, "depth": 0, "zero": false, "data": true, "offset": 10240},
														
 
															+{ "start": 18432, "length": 67090432, "depth": 0, "zero": true, "data": false}]
														
 
															+
														
 
															+convert -c -S 4k
														
 
															+[{ "start": 0, "length": 1024, "depth": 0, "zero": false, "data": true},
														
 
															+{ "start": 1024, "length": 7168, "depth": 0, "zero": true, "data": false},
														
 
															+{ "start": 8192, "length": 1024, "depth": 0, "zero": false, "data": true},
														
 
															+{ "start": 9216, "length": 8192, "depth": 0, "zero": true, "data": false},
														
 
															+{ "start": 17408, "length": 1024, "depth": 0, "zero": false, "data": true},
														
 
															+{ "start": 18432, "length": 67090432, "depth": 0, "zero": true, "data": false}]
														
 
															+
														
 
															+convert -S 8k
														
 
															+[{ "start": 0, "length": 9216, "depth": 0, "zero": false, "data": true, "offset": 8192},
														
 
															+{ "start": 9216, "length": 8192, "depth": 0, "zero": true, "data": false},
														
 
															+{ "start": 17408, "length": 1024, "depth": 0, "zero": false, "data": true, "offset": 17408},
														
 
															+{ "start": 18432, "length": 67090432, "depth": 0, "zero": true, "data": false}]
														
 
															+
														
 
															+convert -c -S 8k
														
 
															+[{ "start": 0, "length": 1024, "depth": 0, "zero": false, "data": true},
														
 
															+{ "start": 1024, "length": 7168, "depth": 0, "zero": true, "data": false},
														
 
															+{ "start": 8192, "length": 1024, "depth": 0, "zero": false, "data": true},
														
 
															+{ "start": 9216, "length": 8192, "depth": 0, "zero": true, "data": false},
														
 
															+{ "start": 17408, "length": 1024, "depth": 0, "zero": false, "data": true},
														
 
															+{ "start": 18432, "length": 67090432, "depth": 0, "zero": true, "data": false}]
														
 
															+*** done
														
--- a/tests/qemu-iotests/124
+++ b/tests/qemu-iotests/124
@@ -0,0 +1,363 @@
 
															+#!/usr/bin/env python
														
 
															+#
														
 
															+# Tests for incremental drive-backup
														
 
															+#
														
 
															+# Copyright (C) 2015 John Snow for Red Hat, Inc.
														
 
															+#
														
 
															+# Based on 056.
														
 
															+#
														
 
															+# This program is free software; you can redistribute it and/or modify
														
 
															+# it under the terms of the GNU General Public License as published by
														
 
															+# the Free Software Foundation; either version 2 of the License, or
														
 
															+# (at your option) any later version.
														
 
															+#
														
 
															+# This program is distributed in the hope that it will be useful,
														
 
															+# but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
														
 
															+# GNU General Public License for more details.
														
 
															+#
														
 
															+# You should have received a copy of the GNU General Public License
														
 
															+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
														
 
															+#
														
 
															+
														
 
															+import os
														
 
															+import iotests
														
 
															+
														
 
															+
														
 
															+def io_write_patterns(img, patterns):
														
 
															+    for pattern in patterns:
														
 
															+        iotests.qemu_io('-c', 'write -P%s %s %s' % pattern, img)
														
 
															+
														
 
															+
														
 
															+def try_remove(img):
														
 
															+    try:
														
 
															+        os.remove(img)
														
 
															+    except OSError:
														
 
															+        pass
														
 
															+
														
 
															+
														
 
															+class Bitmap:
														
 
															+    def __init__(self, name, drive):
														
 
															+        self.name = name
														
 
															+        self.drive = drive
														
 
															+        self.num = 0
														
 
															+        self.backups = list()
														
 
															+
														
 
															+    def base_target(self):
														
 
															+        return (self.drive['backup'], None)
														
 
															+
														
 
															+    def new_target(self, num=None):
														
 
															+        if num is None:
														
 
															+            num = self.num
														
 
															+        self.num = num + 1
														
 
															+        base = os.path.join(iotests.test_dir,
														
 
															+                            "%s.%s." % (self.drive['id'], self.name))
														
 
															+        suff = "%i.%s" % (num, self.drive['fmt'])
														
 
															+        target = base + "inc" + suff
														
 
															+        reference = base + "ref" + suff
														
 
															+        self.backups.append((target, reference))
														
 
															+        return (target, reference)
														
 
															+
														
 
															+    def last_target(self):
														
 
															+        if self.backups:
														
 
															+            return self.backups[-1]
														
 
															+        return self.base_target()
														
 
															+
														
 
															+    def del_target(self):
														
 
															+        for image in self.backups.pop():
														
 
															+            try_remove(image)
														
 
															+        self.num -= 1
														
 
															+
														
 
															+    def cleanup(self):
														
 
															+        for backup in self.backups:
														
 
															+            for image in backup:
														
 
															+                try_remove(image)
														
 
															+
														
 
															+
														
 
															+class TestIncrementalBackup(iotests.QMPTestCase):
														
 
															+    def setUp(self):
														
 
															+        self.bitmaps = list()
														
 
															+        self.files = list()
														
 
															+        self.drives = list()
														
 
															+        self.vm = iotests.VM()
														
 
															+        self.err_img = os.path.join(iotests.test_dir, 'err.%s' % iotests.imgfmt)
														
 
															+
														
 
															+        # Create a base image with a distinctive patterning
														
 
															+        drive0 = self.add_node('drive0')
														
 
															+        self.img_create(drive0['file'], drive0['fmt'])
														
 
															+        self.vm.add_drive(drive0['file'])
														
 
															+        io_write_patterns(drive0['file'], (('0x41', 0, 512),
														
 
															+                                           ('0xd5', '1M', '32k'),
														
 
															+                                           ('0xdc', '32M', '124k')))
														
 
															+        self.vm.launch()
														
 
															+
														
 
															+
														
 
															+    def add_node(self, node_id, fmt=iotests.imgfmt, path=None, backup=None):
														
 
															+        if path is None:
														
 
															+            path = os.path.join(iotests.test_dir, '%s.%s' % (node_id, fmt))
														
 
															+        if backup is None:
														
 
															+            backup = os.path.join(iotests.test_dir,
														
 
															+                                  '%s.full.backup.%s' % (node_id, fmt))
														
 
															+
														
 
															+        self.drives.append({
														
 
															+            'id': node_id,
														
 
															+            'file': path,
														
 
															+            'backup': backup,
														
 
															+            'fmt': fmt })
														
 
															+        return self.drives[-1]
														
 
															+
														
 
															+
														
 
															+    def img_create(self, img, fmt=iotests.imgfmt, size='64M',
														
 
															+                   parent=None, parentFormat=None):
														
 
															+        if parent:
														
 
															+            if parentFormat is None:
														
 
															+                parentFormat = fmt
														
 
															+            iotests.qemu_img('create', '-f', fmt, img, size,
														
 
															+                             '-b', parent, '-F', parentFormat)
														
 
															+        else:
														
 
															+            iotests.qemu_img('create', '-f', fmt, img, size)
														
 
															+        self.files.append(img)
														
 
															+
														
 
															+
														
 
															+    def do_qmp_backup(self, error='Input/output error', **kwargs):
														
 
															+        res = self.vm.qmp('drive-backup', **kwargs)
														
 
															+        self.assert_qmp(res, 'return', {})
														
 
															+
														
 
															+        event = self.vm.event_wait(name="BLOCK_JOB_COMPLETED",
														
 
															+                                   match={'data': {'device': kwargs['device']}})
														
 
															+        self.assertIsNotNone(event)
														
 
															+
														
 
															+        try:
														
 
															+            failure = self.dictpath(event, 'data/error')
														
 
															+        except AssertionError:
														
 
															+            # Backup succeeded.
														
 
															+            self.assert_qmp(event, 'data/offset', event['data']['len'])
														
 
															+            return True
														
 
															+        else:
														
 
															+            # Backup failed.
														
 
															+            self.assert_qmp(event, 'data/error', error)
														
 
															+            return False
														
 
															+
														
 
															+
														
 
															+    def create_anchor_backup(self, drive=None):
														
 
															+        if drive is None:
														
 
															+            drive = self.drives[-1]
														
 
															+        res = self.do_qmp_backup(device=drive['id'], sync='full',
														
 
															+                                 format=drive['fmt'], target=drive['backup'])
														
 
															+        self.assertTrue(res)
														
 
															+        self.files.append(drive['backup'])
														
 
															+        return drive['backup']
														
 
															+
														
 
															+
														
 
															+    def make_reference_backup(self, bitmap=None):
														
 
															+        if bitmap is None:
														
 
															+            bitmap = self.bitmaps[-1]
														
 
															+        _, reference = bitmap.last_target()
														
 
															+        res = self.do_qmp_backup(device=bitmap.drive['id'], sync='full',
														
 
															+                                 format=bitmap.drive['fmt'], target=reference)
														
 
															+        self.assertTrue(res)
														
 
															+
														
 
															+
														
 
															+    def add_bitmap(self, name, drive, **kwargs):
														
 
															+        bitmap = Bitmap(name, drive)
														
 
															+        self.bitmaps.append(bitmap)
														
 
															+        result = self.vm.qmp('block-dirty-bitmap-add', node=drive['id'],
														
 
															+                             name=bitmap.name, **kwargs)
														
 
															+        self.assert_qmp(result, 'return', {})
														
 
															+        return bitmap
														
 
															+
														
 
															+
														
 
															+    def prepare_backup(self, bitmap=None, parent=None):
														
 
															+        if bitmap is None:
														
 
															+            bitmap = self.bitmaps[-1]
														
 
															+        if parent is None:
														
 
															+            parent, _ = bitmap.last_target()
														
 
															+
														
 
															+        target, _ = bitmap.new_target()
														
 
															+        self.img_create(target, bitmap.drive['fmt'], parent=parent)
														
 
															+        return target
														
 
															+
														
 
															+
														
 
															+    def create_incremental(self, bitmap=None, parent=None,
														
 
															+                           parentFormat=None, validate=True):
														
 
															+        if bitmap is None:
														
 
															+            bitmap = self.bitmaps[-1]
														
 
															+        if parent is None:
														
 
															+            parent, _ = bitmap.last_target()
														
 
															+
														
 
															+        target = self.prepare_backup(bitmap, parent)
														
 
															+        res = self.do_qmp_backup(device=bitmap.drive['id'],
														
 
															+                                 sync='dirty-bitmap', bitmap=bitmap.name,
														
 
															+                                 format=bitmap.drive['fmt'], target=target,
														
 
															+                                 mode='existing')
														
 
															+        if not res:
														
 
															+            bitmap.del_target();
														
 
															+            self.assertFalse(validate)
														
 
															+        else:
														
 
															+            self.make_reference_backup(bitmap)
														
 
															+        return res
														
 
															+
														
 
															+
														
 
															+    def check_backups(self):
														
 
															+        for bitmap in self.bitmaps:
														
 
															+            for incremental, reference in bitmap.backups:
														
 
															+                self.assertTrue(iotests.compare_images(incremental, reference))
														
 
															+            last = bitmap.last_target()[0]
														
 
															+            self.assertTrue(iotests.compare_images(last, bitmap.drive['file']))
														
 
															+
														
 
															+
														
 
															+    def hmp_io_writes(self, drive, patterns):
														
 
															+        for pattern in patterns:
														
 
															+            self.vm.hmp_qemu_io(drive, 'write -P%s %s %s' % pattern)
														
 
															+        self.vm.hmp_qemu_io(drive, 'flush')
														
 
															+
														
 
															+
														
 
															+    def do_incremental_simple(self, **kwargs):
														
 
															+        self.create_anchor_backup()
														
 
															+        self.add_bitmap('bitmap0', self.drives[0], **kwargs)
														
 
															+
														
 
															+        # Sanity: Create a "hollow" incremental backup
														
 
															+        self.create_incremental()
														
 
															+        # Three writes: One complete overwrite, one new segment,
														
 
															+        # and one partial overlap.
														
 
															+        self.hmp_io_writes(self.drives[0]['id'], (('0xab', 0, 512),
														
 
															+                                                  ('0xfe', '16M', '256k'),
														
 
															+                                                  ('0x64', '32736k', '64k')))
														
 
															+        self.create_incremental()
														
 
															+        # Three more writes, one of each kind, like above
														
 
															+        self.hmp_io_writes(self.drives[0]['id'], (('0x9a', 0, 512),
														
 
															+                                                  ('0x55', '8M', '352k'),
														
 
															+                                                  ('0x78', '15872k', '1M')))
														
 
															+        self.create_incremental()
														
 
															+        self.vm.shutdown()
														
 
															+        self.check_backups()
														
 
															+
														
 
															+
														
 
															+    def test_incremental_simple(self):
														
 
															+        '''
														
 
															+        Test: Create and verify three incremental backups.
														
 
															+
														
 
															+        Create a bitmap and a full backup before VM execution begins,
														
 
															+        then create a series of three incremental backups "during execution,"
														
 
															+        i.e.; after IO requests begin modifying the drive.
														
 
															+        '''
														
 
															+        return self.do_incremental_simple()
														
 
															+
														
 
															+
														
 
															+    def test_small_granularity(self):
														
 
															+        '''
														
 
															+        Test: Create and verify backups made with a small granularity bitmap.
														
 
															+
														
 
															+        Perform the same test as test_incremental_simple, but with a granularity
														
 
															+        of only 32KiB instead of the present default of 64KiB.
														
 
															+        '''
														
 
															+        return self.do_incremental_simple(granularity=32768)
														
 
															+
														
 
															+
														
 
															+    def test_large_granularity(self):
														
 
															+        '''
														
 
															+        Test: Create and verify backups made with a large granularity bitmap.
														
 
															+
														
 
															+        Perform the same test as test_incremental_simple, but with a granularity
														
 
															+        of 128KiB instead of the present default of 64KiB.
														
 
															+        '''
														
 
															+        return self.do_incremental_simple(granularity=131072)
														
 
															+
														
 
															+
														
 
															+    def test_incremental_failure(self):
														
 
															+        '''Test: Verify backups made after a failure are correct.
														
 
															+
														
 
															+        Simulate a failure during an incremental backup block job,
														
 
															+        emulate additional writes, then create another incremental backup
														
 
															+        afterwards and verify that the backup created is correct.
														
 
															+        '''
														
 
															+
														
 
															+        # Create a blkdebug interface to this img as 'drive1',
														
 
															+        # but don't actually create a new image.
														
 
															+        drive1 = self.add_node('drive1', self.drives[0]['fmt'],
														
 
															+                               path=self.drives[0]['file'],
														
 
															+                               backup=self.drives[0]['backup'])
														
 
															+        result = self.vm.qmp('blockdev-add', options={
														
 
															+            'id': drive1['id'],
														
 
															+            'driver': drive1['fmt'],
														
 
															+            'file': {
														
 
															+                'driver': 'blkdebug',
														
 
															+                'image': {
														
 
															+                    'driver': 'file',
														
 
															+                    'filename': drive1['file']
														
 
															+                },
														
 
															+                'set-state': [{
														
 
															+                    'event': 'flush_to_disk',
														
 
															+                    'state': 1,
														
 
															+                    'new_state': 2
														
 
															+                }],
														
 
															+                'inject-error': [{
														
 
															+                    'event': 'read_aio',
														
 
															+                    'errno': 5,
														
 
															+                    'state': 2,
														
 
															+                    'immediately': False,
														
 
															+                    'once': True
														
 
															+                }],
														
 
															+            }
														
 
															+        })
														
 
															+        self.assert_qmp(result, 'return', {})
														
 
															+
														
 
															+        self.create_anchor_backup(self.drives[0])
														
 
															+        self.add_bitmap('bitmap0', drive1)
														
 
															+        # Note: at this point, during a normal execution,
														
 
															+        # Assume that the VM resumes and begins issuing IO requests here.
														
 
															+
														
 
															+        self.hmp_io_writes(drive1['id'], (('0xab', 0, 512),
														
 
															+                                          ('0xfe', '16M', '256k'),
														
 
															+                                          ('0x64', '32736k', '64k')))
														
 
															+
														
 
															+        result = self.create_incremental(validate=False)
														
 
															+        self.assertFalse(result)
														
 
															+        self.hmp_io_writes(drive1['id'], (('0x9a', 0, 512),
														
 
															+                                          ('0x55', '8M', '352k'),
														
 
															+                                          ('0x78', '15872k', '1M')))
														
 
															+        self.create_incremental()
														
 
															+        self.vm.shutdown()
														
 
															+        self.check_backups()
														
 
															+
														
 
															+
														
 
															+    def test_sync_dirty_bitmap_missing(self):
														
 
															+        self.assert_no_active_block_jobs()
														
 
															+        self.files.append(self.err_img)
														
 
															+        result = self.vm.qmp('drive-backup', device=self.drives[0]['id'],
														
 
															+                             sync='dirty-bitmap', format=self.drives[0]['fmt'],
														
 
															+                             target=self.err_img)
														
 
															+        self.assert_qmp(result, 'error/class', 'GenericError')
														
 
															+
														
 
															+
														
 
															+    def test_sync_dirty_bitmap_not_found(self):
														
 
															+        self.assert_no_active_block_jobs()
														
 
															+        self.files.append(self.err_img)
														
 
															+        result = self.vm.qmp('drive-backup', device=self.drives[0]['id'],
														
 
															+                             sync='dirty-bitmap', bitmap='unknown',
														
 
															+                             format=self.drives[0]['fmt'], target=self.err_img)
														
 
															+        self.assert_qmp(result, 'error/class', 'GenericError')
														
 
															+
														
 
															+
														
 
															+    def test_sync_dirty_bitmap_bad_granularity(self):
														
 
															+        '''
														
 
															+        Test: Test what happens if we provide an improper granularity.
														
 
															+
														
 
															+        The granularity must always be a power of 2.
														
 
															+        '''
														
 
															+        self.assert_no_active_block_jobs()
														
 
															+        self.assertRaises(AssertionError, self.add_bitmap,
														
 
															+                          'bitmap0', self.drives[0],
														
 
															+                          granularity=64000)
														
 
															+
														
 
															+
														
 
															+    def tearDown(self):
														
 
															+        self.vm.shutdown()
														
 
															+        for bitmap in self.bitmaps:
														
 
															+            bitmap.cleanup()
														
 
															+        for filename in self.files:
														
 
															+            try_remove(filename)
														
 
															+
														
 
															+
														
 
															+if __name__ == '__main__':
														
 
															+    iotests.main(supported_fmts=['qcow2'])
														
--- a/tests/qemu-iotests/124.out
+++ b/tests/qemu-iotests/124.out
@@ -0,0 +1,5 @@
 
															+.......
														
 
															+----------------------------------------------------------------------
														
 
															+Ran 7 tests
														
 
															+
														
 
															+OK
														
--- a/tests/qemu-iotests/129
+++ b/tests/qemu-iotests/129
@@ -0,0 +1,86 @@
 
															+#!/usr/bin/env python
														
 
															+#
														
 
															+# Tests that "bdrv_drain_all" doesn't drain block jobs
														
 
															+#
														
 
															+# Copyright (C) 2015 Red Hat, Inc.
														
 
															+#
														
 
															+# This program is free software; you can redistribute it and/or modify
														
 
															+# it under the terms of the GNU General Public License as published by
														
 
															+# the Free Software Foundation; either version 2 of the License, or
														
 
															+# (at your option) any later version.
														
 
															+#
														
 
															+# This program is distributed in the hope that it will be useful,
														
 
															+# but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
														
 
															+# GNU General Public License for more details.
														
 
															+#
														
 
															+# You should have received a copy of the GNU General Public License
														
 
															+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
														
 
															+#
														
 
															+
														
 
															+import os
														
 
															+import iotests
														
 
															+import time
														
 
															+
														
 
															+class TestStopWithBlockJob(iotests.QMPTestCase):
														
 
															+    test_img = os.path.join(iotests.test_dir, 'test.img')
														
 
															+    target_img = os.path.join(iotests.test_dir, 'target.img')
														
 
															+    base_img = os.path.join(iotests.test_dir, 'base.img')
														
 
															+
														
 
															+    def setUp(self):
														
 
															+        iotests.qemu_img('create', '-f', iotests.imgfmt, self.base_img, "1G")
														
 
															+        iotests.qemu_img('create', '-f', iotests.imgfmt, self.test_img, "-b", self.base_img)
														
 
															+        iotests.qemu_io('-f', iotests.imgfmt, '-c', 'write -P0x5d 1M 128M', self.test_img)
														
 
															+        self.vm = iotests.VM().add_drive(self.test_img)
														
 
															+        self.vm.launch()
														
 
															+
														
 
															+    def tearDown(self):
														
 
															+        params = {"device": "drive0",
														
 
															+                  "bps": 0,
														
 
															+                  "bps_rd": 0,
														
 
															+                  "bps_wr": 0,
														
 
															+                  "iops": 0,
														
 
															+                  "iops_rd": 0,
														
 
															+                  "iops_wr": 0,
														
 
															+                 }
														
 
															+        result = self.vm.qmp("block_set_io_throttle", conv_keys=False,
														
 
															+                             **params)
														
 
															+        self.vm.shutdown()
														
 
															+
														
 
															+    def do_test_stop(self, cmd, **args):
														
 
															+        """Test 'stop' while block job is running on a throttled drive.
														
 
															+        The 'stop' command shouldn't drain the job"""
														
 
															+        params = {"device": "drive0",
														
 
															+                  "bps": 1024,
														
 
															+                  "bps_rd": 0,
														
 
															+                  "bps_wr": 0,
														
 
															+                  "iops": 0,
														
 
															+                  "iops_rd": 0,
														
 
															+                  "iops_wr": 0,
														
 
															+                 }
														
 
															+        result = self.vm.qmp("block_set_io_throttle", conv_keys=False,
														
 
															+                             **params)
														
 
															+        self.assert_qmp(result, 'return', {})
														
 
															+        result = self.vm.qmp(cmd, **args)
														
 
															+        self.assert_qmp(result, 'return', {})
														
 
															+        result = self.vm.qmp("stop")
														
 
															+        self.assert_qmp(result, 'return', {})
														
 
															+        result = self.vm.qmp("query-block-jobs")
														
 
															+        self.assert_qmp(result, 'return[0]/busy', True)
														
 
															+        self.assert_qmp(result, 'return[0]/ready', False)
														
 
															+
														
 
															+    def test_drive_mirror(self):
														
 
															+        self.do_test_stop("drive-mirror", device="drive0",
														
 
															+                          target=self.target_img,
														
 
															+                          sync="full")
														
 
															+
														
 
															+    def test_drive_backup(self):
														
 
															+        self.do_test_stop("drive-backup", device="drive0",
														
 
															+                          target=self.target_img,
														
 
															+                          sync="full")
														
 
															+
														
 
															+    def test_block_commit(self):
														
 
															+        self.do_test_stop("block-commit", device="drive0")
														
 
															+
														
 
															+if __name__ == '__main__':
														
 
															+    iotests.main(supported_fmts=["qcow2"])
														
--- a/tests/qemu-iotests/129.out
+++ b/tests/qemu-iotests/129.out
@@ -0,0 +1,5 @@
 
															+...
														
 
															+----------------------------------------------------------------------
														
 
															+Ran 3 tests
														
 
															+
														
 
															+OK
														
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -122,6 +122,9 @@
 
															 115 rw auto
														
 
															 116 rw auto quick
														
 
															 121 rw auto
														
 
															+122 rw auto
														
 
															 123 rw auto quick
														
 
															+124 rw auto backing
														
 
															 128 rw auto quick
														
 
															+129 rw auto quick
														
 
															 130 rw auto quick
														
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -78,6 +78,23 @@ def create_image(name, size):
 
															         i = i + 512
														
 
															     file.close()
														
 
															+# Test if 'match' is a recursive subset of 'event'
														
 
															+def event_match(event, match=None):
														
 
															+    if match is None:
														
 
															+        return True
														
 
															+
														
 
															+    for key in match:
														
 
															+        if key in event:
														
 
															+            if isinstance(event[key], dict):
														
 
															+                if not event_match(event[key], match[key]):
														
 
															+                    return False
														
 
															+            elif event[key] != match[key]:
														
 
															+                return False
														
 
															+        else:
														
 
															+            return False
														
 
															+
														
 
															+    return True
														
 
															+
														
 
															 class VM(object):
														
 
															     '''A QEMU VM'''
														
@@ -92,6 +109,7 @@ def __init__(self):
 
															                      '-machine', 'accel=qtest',
														
 
															                      '-display', 'none', '-vga', 'none']
														
 
															         self._num_drives = 0
														
 
															+        self._events = []
														
 
															     # This can be used to add an unused monitor instance.
														
 
															     def add_monitor_telnet(self, ip, port):
														
@@ -202,14 +220,34 @@ def qtest(self, cmd):
 
															     def get_qmp_event(self, wait=False):
														
 
															         '''Poll for one queued QMP events and return it'''
														
 
															+        if len(self._events) > 0:
														
 
															+            return self._events.pop(0)
														
 
															         return self._qmp.pull_event(wait=wait)
														
 
															     def get_qmp_events(self, wait=False):
														
 
															         '''Poll for queued QMP events and return a list of dicts'''
														
 
															         events = self._qmp.get_events(wait=wait)
														
 
															+        events.extend(self._events)
														
 
															+        del self._events[:]
														
 
															         self._qmp.clear_events()
														
 
															         return events
														
 
															+    def event_wait(self, name='BLOCK_JOB_COMPLETED', timeout=60.0, match=None):
														
 
															+        # Search cached events
														
 
															+        for event in self._events:
														
 
															+            if (event['event'] == name) and event_match(event, match):
														
 
															+                self._events.remove(event)
														
 
															+                return event
														
 
															+
														
 
															+        # Poll for new events
														
 
															+        while True:
														
 
															+            event = self._qmp.pull_event(wait=timeout)
														
 
															+            if (event['event'] == name) and event_match(event, match):
														
 
															+                return event
														
 
															+            self._events.append(event)
														
 
															+
														
 
															+        return None
														
 
															+
														
 
															 index_re = re.compile(r'([^\[]+)\[([^\]]+)\]')
														
 
															 class QMPTestCase(unittest.TestCase):
														
--- a/tests/test-aio.c
+++ b/tests/test-aio.c
@@ -107,6 +107,7 @@ static void test_notify(void)
 
															 typedef struct {
														
 
															     QemuMutex start_lock;
														
 
															+    EventNotifier notifier;
														
 
															     bool thread_acquired;
														
 
															 } AcquireTestData;
														
@@ -118,6 +119,8 @@ static void *test_acquire_thread(void *opaque)
 
															     qemu_mutex_lock(&data->start_lock);
														
 
															     qemu_mutex_unlock(&data->start_lock);
														
 
															+    g_usleep(500000);
														
 
															+    event_notifier_set(&data->notifier);
														
 
															     aio_context_acquire(ctx);
														
 
															     aio_context_release(ctx);
														
@@ -126,20 +129,19 @@ static void *test_acquire_thread(void *opaque)
 
															     return NULL;
														
 
															 }
														
 
															-static void dummy_notifier_read(EventNotifier *unused)
														
 
															+static void dummy_notifier_read(EventNotifier *n)
														
 
															 {
														
 
															-    g_assert(false); /* should never be invoked */
														
 
															+    event_notifier_test_and_clear(n);
														
 
															 }
														
 
															 static void test_acquire(void)
														
 
															 {
														
 
															     QemuThread thread;
														
 
															-    EventNotifier notifier;
														
 
															     AcquireTestData data;
														
 
															     /* Dummy event notifier ensures aio_poll() will block */
														
 
															-    event_notifier_init(&notifier, false);
														
 
															-    aio_set_event_notifier(ctx, &notifier, dummy_notifier_read);
														
 
															+    event_notifier_init(&data.notifier, false);
														
 
															+    aio_set_event_notifier(ctx, &data.notifier, dummy_notifier_read);
														
 
															     g_assert(!aio_poll(ctx, false)); /* consume aio_notify() */
														
 
															     qemu_mutex_init(&data.start_lock);
														
@@ -153,12 +155,13 @@ static void test_acquire(void)
 
															     /* Block in aio_poll(), let other thread kick us and acquire context */
														
 
															     aio_context_acquire(ctx);
														
 
															     qemu_mutex_unlock(&data.start_lock); /* let the thread run */
														
 
															-    g_assert(!aio_poll(ctx, true));
														
 
															+    g_assert(aio_poll(ctx, true));
														
 
															+    g_assert(!data.thread_acquired);
														
 
															     aio_context_release(ctx);
														
 
															     qemu_thread_join(&thread);
														
 
															-    aio_set_event_notifier(ctx, &notifier, NULL);
														
 
															-    event_notifier_cleanup(&notifier);
														
 
															+    aio_set_event_notifier(ctx, &data.notifier, NULL);
														
 
															+    event_notifier_cleanup(&data.notifier);
														
 
															     g_assert(data.thread_acquired);
														
 
															 }
														
--- a/tests/test-hbitmap.c
+++ b/tests/test-hbitmap.c
@@ -11,6 +11,8 @@
 
															 #include <glib.h>
														
 
															 #include <stdarg.h>
														
 
															+#include <string.h>
														
 
															+#include <sys/types.h>
														
 
															 #include "qemu/hbitmap.h"
														
 
															 #define LOG_BITS_PER_LONG          (BITS_PER_LONG == 32 ? 5 : 6)
														
@@ -23,6 +25,7 @@ typedef struct TestHBitmapData {
 
															     HBitmap       *hb;
														
 
															     unsigned long *bits;
														
 
															     size_t         size;
														
 
															+    size_t         old_size;
														
 
															     int            granularity;
														
 
															 } TestHBitmapData;
														
@@ -91,6 +94,44 @@ static void hbitmap_test_init(TestHBitmapData *data,
 
															     }
														
 
															 }
														
 
															+static inline size_t hbitmap_test_array_size(size_t bits)
														
 
															+{
														
 
															+    size_t n = (bits + BITS_PER_LONG - 1) / BITS_PER_LONG;
														
 
															+    return n ? n : 1;
														
 
															+}
														
 
															+
														
 
															+static void hbitmap_test_truncate_impl(TestHBitmapData *data,
														
 
															+                                       size_t size)
														
 
															+{
														
 
															+    size_t n;
														
 
															+    size_t m;
														
 
															+    data->old_size = data->size;
														
 
															+    data->size = size;
														
 
															+
														
 
															+    if (data->size == data->old_size) {
														
 
															+        return;
														
 
															+    }
														
 
															+
														
 
															+    n = hbitmap_test_array_size(size);
														
 
															+    m = hbitmap_test_array_size(data->old_size);
														
 
															+    data->bits = g_realloc(data->bits, sizeof(unsigned long) * n);
														
 
															+    if (n > m) {
														
 
															+        memset(&data->bits[m], 0x00, sizeof(unsigned long) * (n - m));
														
 
															+    }
														
 
															+
														
 
															+    /* If we shrink to an uneven multiple of sizeof(unsigned long),
														
 
															+     * scrub the leftover memory. */
														
 
															+    if (data->size < data->old_size) {
														
 
															+        m = size % (sizeof(unsigned long) * 8);
														
 
															+        if (m) {
														
 
															+            unsigned long mask = (1ULL << m) - 1;
														
 
															+            data->bits[n-1] &= mask;
														
 
															+        }
														
 
															+    }
														
 
															+
														
 
															+    hbitmap_truncate(data->hb, size);
														
 
															+}
														
 
															+
														
 
															 static void hbitmap_test_teardown(TestHBitmapData *data,
														
 
															                                   const void *unused)
														
 
															 {
														
@@ -369,6 +410,198 @@ static void test_hbitmap_iter_granularity(TestHBitmapData *data,
 
															     g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0);
														
 
															 }
														
 
															+static void hbitmap_test_set_boundary_bits(TestHBitmapData *data, ssize_t diff)
														
 
															+{
														
 
															+    size_t size = data->size;
														
 
															+
														
 
															+    /* First bit */
														
 
															+    hbitmap_test_set(data, 0, 1);
														
 
															+    if (diff < 0) {
														
 
															+        /* Last bit in new, shortened map */
														
 
															+        hbitmap_test_set(data, size + diff - 1, 1);
														
 
															+
														
 
															+        /* First bit to be truncated away */
														
 
															+        hbitmap_test_set(data, size + diff, 1);
														
 
															+    }
														
 
															+    /* Last bit */
														
 
															+    hbitmap_test_set(data, size - 1, 1);
														
 
															+    if (data->granularity == 0) {
														
 
															+        hbitmap_test_check_get(data);
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															+static void hbitmap_test_check_boundary_bits(TestHBitmapData *data)
														
 
															+{
														
 
															+    size_t size = MIN(data->size, data->old_size);
														
 
															+
														
 
															+    if (data->granularity == 0) {
														
 
															+        hbitmap_test_check_get(data);
														
 
															+        hbitmap_test_check(data, 0);
														
 
															+    } else {
														
 
															+        /* If a granularity was set, note that every distinct
														
 
															+         * (bit >> granularity) value that was set will increase
														
 
															+         * the bit pop count by 2^granularity, not just 1.
														
 
															+         *
														
 
															+         * The hbitmap_test_check facility does not currently tolerate
														
 
															+         * non-zero granularities, so test the boundaries and the population
														
 
															+         * count manually.
														
 
															+         */
														
 
															+        g_assert(hbitmap_get(data->hb, 0));
														
 
															+        g_assert(hbitmap_get(data->hb, size - 1));
														
 
															+        g_assert_cmpint(2 << data->granularity, ==, hbitmap_count(data->hb));
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															+/* Generic truncate test. */
														
 
															+static void hbitmap_test_truncate(TestHBitmapData *data,
														
 
															+                                  size_t size,
														
 
															+                                  ssize_t diff,
														
 
															+                                  int granularity)
														
 
															+{
														
 
															+    hbitmap_test_init(data, size, granularity);
														
 
															+    hbitmap_test_set_boundary_bits(data, diff);
														
 
															+    hbitmap_test_truncate_impl(data, size + diff);
														
 
															+    hbitmap_test_check_boundary_bits(data);
														
 
															+}
														
 
															+
														
 
															+static void test_hbitmap_truncate_nop(TestHBitmapData *data,
														
 
															+                                      const void *unused)
														
 
															+{
														
 
															+    hbitmap_test_truncate(data, L2, 0, 0);
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * Grow by an amount smaller than the granularity, without crossing
														
 
															+ * a granularity alignment boundary. Effectively a NOP.
														
 
															+ */
														
 
															+static void test_hbitmap_truncate_grow_negligible(TestHBitmapData *data,
														
 
															+                                                  const void *unused)
														
 
															+{
														
 
															+    size_t size = L2 - 1;
														
 
															+    size_t diff = 1;
														
 
															+    int granularity = 1;
														
 
															+
														
 
															+    hbitmap_test_truncate(data, size, diff, granularity);
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * Shrink by an amount smaller than the granularity, without crossing
														
 
															+ * a granularity alignment boundary. Effectively a NOP.
														
 
															+ */
														
 
															+static void test_hbitmap_truncate_shrink_negligible(TestHBitmapData *data,
														
 
															+                                                    const void *unused)
														
 
															+{
														
 
															+    size_t size = L2;
														
 
															+    ssize_t diff = -1;
														
 
															+    int granularity = 1;
														
 
															+
														
 
															+    hbitmap_test_truncate(data, size, diff, granularity);
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * Grow by an amount smaller than the granularity, but crossing over
														
 
															+ * a granularity alignment boundary.
														
 
															+ */
														
 
															+static void test_hbitmap_truncate_grow_tiny(TestHBitmapData *data,
														
 
															+                                            const void *unused)
														
 
															+{
														
 
															+    size_t size = L2 - 2;
														
 
															+    ssize_t diff = 1;
														
 
															+    int granularity = 1;
														
 
															+
														
 
															+    hbitmap_test_truncate(data, size, diff, granularity);
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * Shrink by an amount smaller than the granularity, but crossing over
														
 
															+ * a granularity alignment boundary.
														
 
															+ */
														
 
															+static void test_hbitmap_truncate_shrink_tiny(TestHBitmapData *data,
														
 
															+                                              const void *unused)
														
 
															+{
														
 
															+    size_t size = L2 - 1;
														
 
															+    ssize_t diff = -1;
														
 
															+    int granularity = 1;
														
 
															+
														
 
															+    hbitmap_test_truncate(data, size, diff, granularity);
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * Grow by an amount smaller than sizeof(long), and not crossing over
														
 
															+ * a sizeof(long) alignment boundary.
														
 
															+ */
														
 
															+static void test_hbitmap_truncate_grow_small(TestHBitmapData *data,
														
 
															+                                             const void *unused)
														
 
															+{
														
 
															+    size_t size = L2 + 1;
														
 
															+    size_t diff = sizeof(long) / 2;
														
 
															+
														
 
															+    hbitmap_test_truncate(data, size, diff, 0);
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * Shrink by an amount smaller than sizeof(long), and not crossing over
														
 
															+ * a sizeof(long) alignment boundary.
														
 
															+ */
														
 
															+static void test_hbitmap_truncate_shrink_small(TestHBitmapData *data,
														
 
															+                                               const void *unused)
														
 
															+{
														
 
															+    size_t size = L2;
														
 
															+    size_t diff = sizeof(long) / 2;
														
 
															+
														
 
															+    hbitmap_test_truncate(data, size, -diff, 0);
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * Grow by an amount smaller than sizeof(long), while crossing over
														
 
															+ * a sizeof(long) alignment boundary.
														
 
															+ */
														
 
															+static void test_hbitmap_truncate_grow_medium(TestHBitmapData *data,
														
 
															+                                              const void *unused)
														
 
															+{
														
 
															+    size_t size = L2 - 1;
														
 
															+    size_t diff = sizeof(long) / 2;
														
 
															+
														
 
															+    hbitmap_test_truncate(data, size, diff, 0);
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * Shrink by an amount smaller than sizeof(long), while crossing over
														
 
															+ * a sizeof(long) alignment boundary.
														
 
															+ */
														
 
															+static void test_hbitmap_truncate_shrink_medium(TestHBitmapData *data,
														
 
															+                                                const void *unused)
														
 
															+{
														
 
															+    size_t size = L2 + 1;
														
 
															+    size_t diff = sizeof(long) / 2;
														
 
															+
														
 
															+    hbitmap_test_truncate(data, size, -diff, 0);
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * Grow by an amount larger than sizeof(long).
														
 
															+ */
														
 
															+static void test_hbitmap_truncate_grow_large(TestHBitmapData *data,
														
 
															+                                             const void *unused)
														
 
															+{
														
 
															+    size_t size = L2;
														
 
															+    size_t diff = 8 * sizeof(long);
														
 
															+
														
 
															+    hbitmap_test_truncate(data, size, diff, 0);
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * Shrink by an amount larger than sizeof(long).
														
 
															+ */
														
 
															+static void test_hbitmap_truncate_shrink_large(TestHBitmapData *data,
														
 
															+                                               const void *unused)
														
 
															+{
														
 
															+    size_t size = L2;
														
 
															+    size_t diff = 8 * sizeof(long);
														
 
															+
														
 
															+    hbitmap_test_truncate(data, size, -diff, 0);
														
 
															+}
														
 
															+
														
 
															 static void hbitmap_test_add(const char *testpath,
														
 
															                                    void (*test_func)(TestHBitmapData *data, const void *user_data))
														
 
															 {
														
@@ -395,6 +628,28 @@ int main(int argc, char **argv)
 
															     hbitmap_test_add("/hbitmap/reset/empty", test_hbitmap_reset_empty);
														
 
															     hbitmap_test_add("/hbitmap/reset/general", test_hbitmap_reset);
														
 
															     hbitmap_test_add("/hbitmap/granularity", test_hbitmap_granularity);
														
 
															+
														
 
															+    hbitmap_test_add("/hbitmap/truncate/nop", test_hbitmap_truncate_nop);
														
 
															+    hbitmap_test_add("/hbitmap/truncate/grow/negligible",
														
 
															+                     test_hbitmap_truncate_grow_negligible);
														
 
															+    hbitmap_test_add("/hbitmap/truncate/shrink/negligible",
														
 
															+                     test_hbitmap_truncate_shrink_negligible);
														
 
															+    hbitmap_test_add("/hbitmap/truncate/grow/tiny",
														
 
															+                     test_hbitmap_truncate_grow_tiny);
														
 
															+    hbitmap_test_add("/hbitmap/truncate/shrink/tiny",
														
 
															+                     test_hbitmap_truncate_shrink_tiny);
														
 
															+    hbitmap_test_add("/hbitmap/truncate/grow/small",
														
 
															+                     test_hbitmap_truncate_grow_small);
														
 
															+    hbitmap_test_add("/hbitmap/truncate/shrink/small",
														
 
															+                     test_hbitmap_truncate_shrink_small);
														
 
															+    hbitmap_test_add("/hbitmap/truncate/grow/medium",
														
 
															+                     test_hbitmap_truncate_grow_medium);
														
 
															+    hbitmap_test_add("/hbitmap/truncate/shrink/medium",
														
 
															+                     test_hbitmap_truncate_shrink_medium);
														
 
															+    hbitmap_test_add("/hbitmap/truncate/grow/large",
														
 
															+                     test_hbitmap_truncate_grow_large);
														
 
															+    hbitmap_test_add("/hbitmap/truncate/shrink/large",
														
 
															+                     test_hbitmap_truncate_shrink_large);
														
 
															     g_test_run();
														
 
															     return 0;
														
--- a/thread-pool.c
+++ b/thread-pool.c
@@ -170,12 +170,12 @@ restart:
 
															         if (elem->state != THREAD_DONE) {
														
 
															             continue;
														
 
															         }
														
 
															-        if (elem->state == THREAD_DONE) {
														
 
															-            trace_thread_pool_complete(pool, elem, elem->common.opaque,
														
 
															-                                       elem->ret);
														
 
															-        }
														
 
															-        if (elem->state == THREAD_DONE && elem->common.cb) {
														
 
															-            QLIST_REMOVE(elem, all);
														
 
															+
														
 
															+        trace_thread_pool_complete(pool, elem, elem->common.opaque,
														
 
															+                                   elem->ret);
														
 
															+        QLIST_REMOVE(elem, all);
														
 
															+
														
 
															+        if (elem->common.cb) {
														
 
															             /* Read state before ret.  */
														
 
															             smp_rmb();
														
@@ -188,8 +188,6 @@ restart:
 
															             qemu_aio_unref(elem);
														
 
															             goto restart;
														
 
															         } else {
														
 
															-            /* remove the request */
														
 
															-            QLIST_REMOVE(elem, all);
														
 
															             qemu_aio_unref(elem);
														
 
															         }
														
 
															     }
														
--- a/util/hbitmap.c
+++ b/util/hbitmap.c
@@ -90,6 +90,9 @@ struct HBitmap {
 
															      * bitmap will still allocate HBITMAP_LEVELS arrays.
														
 
															      */
														
 
															     unsigned long *levels[HBITMAP_LEVELS];
														
 
															+
														
 
															+    /* The length of each levels[] array. */
														
 
															+    uint64_t sizes[HBITMAP_LEVELS];
														
 
															 };
														
 
															 /* Advance hbi to the next nonzero word and return it.  hbi->pos
														
@@ -384,6 +387,7 @@ HBitmap *hbitmap_alloc(uint64_t size, int granularity)
 
															     hb->granularity = granularity;
														
 
															     for (i = HBITMAP_LEVELS; i-- > 0; ) {
														
 
															         size = MAX((size + BITS_PER_LONG - 1) >> BITS_PER_LEVEL, 1);
														
 
															+        hb->sizes[i] = size;
														
 
															         hb->levels[i] = g_new0(unsigned long, size);
														
 
															     }
														
@@ -395,3 +399,84 @@ HBitmap *hbitmap_alloc(uint64_t size, int granularity)
 
															     hb->levels[0][0] |= 1UL << (BITS_PER_LONG - 1);
														
 
															     return hb;
														
 
															 }
														
 
															+
														
 
															+void hbitmap_truncate(HBitmap *hb, uint64_t size)
														
 
															+{
														
 
															+    bool shrink;
														
 
															+    unsigned i;
														
 
															+    uint64_t num_elements = size;
														
 
															+    uint64_t old;
														
 
															+
														
 
															+    /* Size comes in as logical elements, adjust for granularity. */
														
 
															+    size = (size + (1ULL << hb->granularity) - 1) >> hb->granularity;
														
 
															+    assert(size <= ((uint64_t)1 << HBITMAP_LOG_MAX_SIZE));
														
 
															+    shrink = size < hb->size;
														
 
															+
														
 
															+    /* bit sizes are identical; nothing to do. */
														
 
															+    if (size == hb->size) {
														
 
															+        return;
														
 
															+    }
														
 
															+
														
 
															+    /* If we're losing bits, let's clear those bits before we invalidate all of
														
 
															+     * our invariants. This helps keep the bitcount consistent, and will prevent
														
 
															+     * us from carrying around garbage bits beyond the end of the map.
														
 
															+     */
														
 
															+    if (shrink) {
														
 
															+        /* Don't clear partial granularity groups;
														
 
															+         * start at the first full one. */
														
 
															+        uint64_t start = QEMU_ALIGN_UP(num_elements, 1 << hb->granularity);
														
 
															+        uint64_t fix_count = (hb->size << hb->granularity) - start;
														
 
															+
														
 
															+        assert(fix_count);
														
 
															+        hbitmap_reset(hb, start, fix_count);
														
 
															+    }
														
 
															+
														
 
															+    hb->size = size;
														
 
															+    for (i = HBITMAP_LEVELS; i-- > 0; ) {
														
 
															+        size = MAX(BITS_TO_LONGS(size), 1);
														
 
															+        if (hb->sizes[i] == size) {
														
 
															+            break;
														
 
															+        }
														
 
															+        old = hb->sizes[i];
														
 
															+        hb->sizes[i] = size;
														
 
															+        hb->levels[i] = g_realloc(hb->levels[i], size * sizeof(unsigned long));
														
 
															+        if (!shrink) {
														
 
															+            memset(&hb->levels[i][old], 0x00,
														
 
															+                   (size - old) * sizeof(*hb->levels[i]));
														
 
															+        }
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															+
														
 
															+/**
														
 
															+ * Given HBitmaps A and B, let A := A (BITOR) B.
														
 
															+ * Bitmap B will not be modified.
														
 
															+ *
														
 
															+ * @return true if the merge was successful,
														
 
															+ *         false if it was not attempted.
														
 
															+ */
														
 
															+bool hbitmap_merge(HBitmap *a, const HBitmap *b)
														
 
															+{
														
 
															+    int i;
														
 
															+    uint64_t j;
														
 
															+
														
 
															+    if ((a->size != b->size) || (a->granularity != b->granularity)) {
														
 
															+        return false;
														
 
															+    }
														
 
															+
														
 
															+    if (hbitmap_count(b) == 0) {
														
 
															+        return true;
														
 
															+    }
														
 
															+
														
 
															+    /* This merge is O(size), as BITS_PER_LONG and HBITMAP_LEVELS are constant.
														
 
															+     * It may be possible to improve running times for sparsely populated maps
														
 
															+     * by using hbitmap_iter_next, but this is suboptimal for dense maps.
														
 
															+     */
														
 
															+    for (i = HBITMAP_LEVELS - 1; i >= 0; i--) {
														
 
															+        for (j = 0; j < a->sizes[i]; j++) {
														
 
															+            a->levels[i][j] |= b->levels[i][j];
														
 
															+        }
														
 
															+    }
														
 
															+
														
 
															+    return true;
														
 
															+}