Pārlūkot izejas kodu

Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging

Block layer patches

# gpg: Signature made Thu 16 Jun 2016 15:01:27 BST
# gpg:                using RSA key 0x7F09B272C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>"
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74  56FE 7F09 B272 C88F 2FD6

* remotes/kevin/tags/for-upstream: (39 commits)
  hbitmap: add 'pos < size' asserts
  iotests: Add test for oVirt-like storage migration
  iotests: Add test for post-mirror backing chains
  block/null: Implement bdrv_refresh_filename()
  block/mirror: Fix target backing BDS
  block: Allow replacement of a BDS by its overlay
  rbd:change error_setg() to error_setg_errno()
  iotests: 095: Clean up QEMU before showing image info
  block: Create the commit block job before reopening any image
  block: Prevent sleeping jobs from resuming if they have been paused
  block: use the block job list in qmp_query_block_jobs()
  block: use the block job list in bdrv_drain_all()
  block: Fix snapshot=on with aio=native
  block: Remove bs->zero_beyond_eof
  qcow2: Let vmstate call qcow2_co_preadv/pwrite directly
  block: Make bdrv_load/save_vmstate coroutine_fns
  block: Allow .bdrv_load/save_vmstate() to return 0/-errno
  block: Make .bdrv_load_vmstate() vectored
  block: Introduce bdrv_preadv()
  doc: Fix mailing list address in tests/qemu-iotests/README
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Peter Maydell 9 gadi atpakaļ
vecāks
revīzija
dc278c58fa

+ 20 - 12
block.c

@@ -684,6 +684,10 @@ static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options,
     /* For temporary files, unconditional cache=unsafe is fine */
     /* For temporary files, unconditional cache=unsafe is fine */
     qdict_set_default_str(child_options, BDRV_OPT_CACHE_DIRECT, "off");
     qdict_set_default_str(child_options, BDRV_OPT_CACHE_DIRECT, "off");
     qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on");
     qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on");
+
+    /* aio=native doesn't work for cache.direct=off, so disable it for the
+     * temporary snapshot */
+    *child_flags &= ~BDRV_O_NATIVE_AIO;
 }
 }
 
 
 /*
 /*
@@ -937,8 +941,7 @@ static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
         goto fail_opts;
         goto fail_opts;
     }
     }
 
 
-    bs->request_alignment = 512;
-    bs->zero_beyond_eof = true;
+    bs->request_alignment = drv->bdrv_co_preadv ? 1 : 512;
     bs->read_only = !(bs->open_flags & BDRV_O_RDWR);
     bs->read_only = !(bs->open_flags & BDRV_O_RDWR);
 
 
     if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
     if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
@@ -2192,7 +2195,6 @@ static void bdrv_close(BlockDriverState *bs)
         bs->encrypted = 0;
         bs->encrypted = 0;
         bs->valid_key = 0;
         bs->valid_key = 0;
         bs->sg = 0;
         bs->sg = 0;
-        bs->zero_beyond_eof = false;
         QDECREF(bs->options);
         QDECREF(bs->options);
         QDECREF(bs->explicit_options);
         QDECREF(bs->explicit_options);
         bs->options = NULL;
         bs->options = NULL;
@@ -2224,9 +2226,23 @@ void bdrv_close_all(void)
 static void change_parent_backing_link(BlockDriverState *from,
 static void change_parent_backing_link(BlockDriverState *from,
                                        BlockDriverState *to)
                                        BlockDriverState *to)
 {
 {
-    BdrvChild *c, *next;
+    BdrvChild *c, *next, *to_c;
 
 
     QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
     QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
+        if (c->role == &child_backing) {
+            /* @from is generally not allowed to be a backing file, except for
+             * when @to is the overlay. In that case, @from may not be replaced
+             * by @to as @to's backing node. */
+            QLIST_FOREACH(to_c, &to->children, next) {
+                if (to_c == c) {
+                    break;
+                }
+            }
+            if (to_c) {
+                continue;
+            }
+        }
+
         assert(c->role != &child_backing);
         assert(c->role != &child_backing);
         bdrv_ref(to);
         bdrv_ref(to);
         bdrv_replace_child(c, to);
         bdrv_replace_child(c, to);
@@ -2275,14 +2291,6 @@ void bdrv_replace_in_backing_chain(BlockDriverState *old, BlockDriverState *new)
 
 
     change_parent_backing_link(old, new);
     change_parent_backing_link(old, new);
 
 
-    /* Change backing files if a previously independent node is added to the
-     * chain. For active commit, we replace top by its own (indirect) backing
-     * file and don't do anything here so we don't build a loop. */
-    if (new->backing == NULL && !bdrv_chain_contains(backing_bs(old), new)) {
-        bdrv_set_backing_hd(new, backing_bs(old));
-        bdrv_set_backing_hd(old, NULL);
-    }
-
     bdrv_unref(old);
     bdrv_unref(old);
 }
 }
 
 

+ 6 - 5
block/commit.c

@@ -236,6 +236,11 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
         return;
         return;
     }
     }
 
 
+    s = block_job_create(&commit_job_driver, bs, speed, cb, opaque, errp);
+    if (!s) {
+        return;
+    }
+
     orig_base_flags    = bdrv_get_flags(base);
     orig_base_flags    = bdrv_get_flags(base);
     orig_overlay_flags = bdrv_get_flags(overlay_bs);
     orig_overlay_flags = bdrv_get_flags(overlay_bs);
 
 
@@ -252,16 +257,12 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
         bdrv_reopen_multiple(reopen_queue, &local_err);
         bdrv_reopen_multiple(reopen_queue, &local_err);
         if (local_err != NULL) {
         if (local_err != NULL) {
             error_propagate(errp, local_err);
             error_propagate(errp, local_err);
+            block_job_unref(&s->common);
             return;
             return;
         }
         }
     }
     }
 
 
 
 
-    s = block_job_create(&commit_job_driver, bs, speed, cb, opaque, errp);
-    if (!s) {
-        return;
-    }
-
     s->base = blk_new();
     s->base = blk_new();
     blk_insert_bs(s->base, base);
     blk_insert_bs(s->base, base);
 
 

+ 196 - 110
block/io.c

@@ -289,15 +289,21 @@ void bdrv_drain_all(void)
     bool busy = true;
     bool busy = true;
     BlockDriverState *bs;
     BlockDriverState *bs;
     BdrvNextIterator it;
     BdrvNextIterator it;
+    BlockJob *job = NULL;
     GSList *aio_ctxs = NULL, *ctx;
     GSList *aio_ctxs = NULL, *ctx;
 
 
+    while ((job = block_job_next(job))) {
+        AioContext *aio_context = blk_get_aio_context(job->blk);
+
+        aio_context_acquire(aio_context);
+        block_job_pause(job);
+        aio_context_release(aio_context);
+    }
+
     for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
     for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
         AioContext *aio_context = bdrv_get_aio_context(bs);
         AioContext *aio_context = bdrv_get_aio_context(bs);
 
 
         aio_context_acquire(aio_context);
         aio_context_acquire(aio_context);
-        if (bs->job) {
-            block_job_pause(bs->job);
-        }
         bdrv_parent_drained_begin(bs);
         bdrv_parent_drained_begin(bs);
         bdrv_io_unplugged_begin(bs);
         bdrv_io_unplugged_begin(bs);
         bdrv_drain_recurse(bs);
         bdrv_drain_recurse(bs);
@@ -340,12 +346,18 @@ void bdrv_drain_all(void)
         aio_context_acquire(aio_context);
         aio_context_acquire(aio_context);
         bdrv_io_unplugged_end(bs);
         bdrv_io_unplugged_end(bs);
         bdrv_parent_drained_end(bs);
         bdrv_parent_drained_end(bs);
-        if (bs->job) {
-            block_job_resume(bs->job);
-        }
         aio_context_release(aio_context);
         aio_context_release(aio_context);
     }
     }
     g_slist_free(aio_ctxs);
     g_slist_free(aio_ctxs);
+
+    job = NULL;
+    while ((job = block_job_next(job))) {
+        AioContext *aio_context = blk_get_aio_context(job->blk);
+
+        aio_context_acquire(aio_context);
+        block_job_resume(job);
+        aio_context_release(aio_context);
+    }
 }
 }
 
 
 /**
 /**
@@ -404,12 +416,12 @@ static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
 }
 }
 
 
 /**
 /**
- * Round a region to cluster boundaries
+ * Round a region to cluster boundaries (sector-based)
  */
  */
-void bdrv_round_to_clusters(BlockDriverState *bs,
-                            int64_t sector_num, int nb_sectors,
-                            int64_t *cluster_sector_num,
-                            int *cluster_nb_sectors)
+void bdrv_round_sectors_to_clusters(BlockDriverState *bs,
+                                    int64_t sector_num, int nb_sectors,
+                                    int64_t *cluster_sector_num,
+                                    int *cluster_nb_sectors)
 {
 {
     BlockDriverInfo bdi;
     BlockDriverInfo bdi;
 
 
@@ -424,6 +436,26 @@ void bdrv_round_to_clusters(BlockDriverState *bs,
     }
     }
 }
 }
 
 
+/**
+ * Round a region to cluster boundaries
+ */
+void bdrv_round_to_clusters(BlockDriverState *bs,
+                            int64_t offset, unsigned int bytes,
+                            int64_t *cluster_offset,
+                            unsigned int *cluster_bytes)
+{
+    BlockDriverInfo bdi;
+
+    if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
+        *cluster_offset = offset;
+        *cluster_bytes = bytes;
+    } else {
+        int64_t c = bdi.cluster_size;
+        *cluster_offset = QEMU_ALIGN_DOWN(offset, c);
+        *cluster_bytes = QEMU_ALIGN_UP(offset - *cluster_offset + bytes, c);
+    }
+}
+
 static int bdrv_get_cluster_size(BlockDriverState *bs)
 static int bdrv_get_cluster_size(BlockDriverState *bs)
 {
 {
     BlockDriverInfo bdi;
     BlockDriverInfo bdi;
@@ -680,6 +712,18 @@ int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
     }
     }
 }
 }
 
 
+int bdrv_preadv(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov)
+{
+    int ret;
+
+    ret = bdrv_prwv_co(bs, offset, qiov, false, 0);
+    if (ret < 0) {
+        return ret;
+    }
+
+    return qiov->size;
+}
+
 int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes)
 int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes)
 {
 {
     QEMUIOVector qiov;
     QEMUIOVector qiov;
@@ -687,19 +731,13 @@ int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes)
         .iov_base = (void *)buf,
         .iov_base = (void *)buf,
         .iov_len = bytes,
         .iov_len = bytes,
     };
     };
-    int ret;
 
 
     if (bytes < 0) {
     if (bytes < 0) {
         return -EINVAL;
         return -EINVAL;
     }
     }
 
 
     qemu_iovec_init_external(&qiov, &iov, 1);
     qemu_iovec_init_external(&qiov, &iov, 1);
-    ret = bdrv_prwv_co(bs, offset, &qiov, false, 0);
-    if (ret < 0) {
-        return ret;
-    }
-
-    return bytes;
+    return bdrv_preadv(bs, offset, &qiov);
 }
 }
 
 
 int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov)
 int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov)
@@ -776,6 +814,8 @@ static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,
     int64_t sector_num;
     int64_t sector_num;
     unsigned int nb_sectors;
     unsigned int nb_sectors;
 
 
+    assert(!(flags & ~BDRV_REQ_MASK));
+
     if (drv->bdrv_co_preadv) {
     if (drv->bdrv_co_preadv) {
         return drv->bdrv_co_preadv(bs, offset, bytes, qiov, flags);
         return drv->bdrv_co_preadv(bs, offset, bytes, qiov, flags);
     }
     }
@@ -815,6 +855,8 @@ static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
     unsigned int nb_sectors;
     unsigned int nb_sectors;
     int ret;
     int ret;
 
 
+    assert(!(flags & ~BDRV_REQ_MASK));
+
     if (drv->bdrv_co_pwritev) {
     if (drv->bdrv_co_pwritev) {
         ret = drv->bdrv_co_pwritev(bs, offset, bytes, qiov,
         ret = drv->bdrv_co_pwritev(bs, offset, bytes, qiov,
                                    flags & bs->supported_write_flags);
                                    flags & bs->supported_write_flags);
@@ -861,7 +903,7 @@ emulate_flags:
 }
 }
 
 
 static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
 static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+        int64_t offset, unsigned int bytes, QEMUIOVector *qiov)
 {
 {
     /* Perform I/O through a temporary buffer so that users who scribble over
     /* Perform I/O through a temporary buffer so that users who scribble over
      * their read buffer while the operation is in progress do not end up
      * their read buffer while the operation is in progress do not end up
@@ -873,21 +915,20 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
     BlockDriver *drv = bs->drv;
     BlockDriver *drv = bs->drv;
     struct iovec iov;
     struct iovec iov;
     QEMUIOVector bounce_qiov;
     QEMUIOVector bounce_qiov;
-    int64_t cluster_sector_num;
-    int cluster_nb_sectors;
+    int64_t cluster_offset;
+    unsigned int cluster_bytes;
     size_t skip_bytes;
     size_t skip_bytes;
     int ret;
     int ret;
 
 
     /* Cover entire cluster so no additional backing file I/O is required when
     /* Cover entire cluster so no additional backing file I/O is required when
      * allocating cluster in the image file.
      * allocating cluster in the image file.
      */
      */
-    bdrv_round_to_clusters(bs, sector_num, nb_sectors,
-                           &cluster_sector_num, &cluster_nb_sectors);
+    bdrv_round_to_clusters(bs, offset, bytes, &cluster_offset, &cluster_bytes);
 
 
-    trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
-                                   cluster_sector_num, cluster_nb_sectors);
+    trace_bdrv_co_do_copy_on_readv(bs, offset, bytes,
+                                   cluster_offset, cluster_bytes);
 
 
-    iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
+    iov.iov_len = cluster_bytes;
     iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len);
     iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len);
     if (bounce_buffer == NULL) {
     if (bounce_buffer == NULL) {
         ret = -ENOMEM;
         ret = -ENOMEM;
@@ -896,8 +937,7 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
 
 
     qemu_iovec_init_external(&bounce_qiov, &iov, 1);
     qemu_iovec_init_external(&bounce_qiov, &iov, 1);
 
 
-    ret = bdrv_driver_preadv(bs, cluster_sector_num * BDRV_SECTOR_SIZE,
-                             cluster_nb_sectors * BDRV_SECTOR_SIZE,
+    ret = bdrv_driver_preadv(bs, cluster_offset, cluster_bytes,
                              &bounce_qiov, 0);
                              &bounce_qiov, 0);
     if (ret < 0) {
     if (ret < 0) {
         goto err;
         goto err;
@@ -905,16 +945,12 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
 
 
     if (drv->bdrv_co_pwrite_zeroes &&
     if (drv->bdrv_co_pwrite_zeroes &&
         buffer_is_zero(bounce_buffer, iov.iov_len)) {
         buffer_is_zero(bounce_buffer, iov.iov_len)) {
-        ret = bdrv_co_do_pwrite_zeroes(bs,
-                                       cluster_sector_num * BDRV_SECTOR_SIZE,
-                                       cluster_nb_sectors * BDRV_SECTOR_SIZE,
-                                       0);
+        ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, cluster_bytes, 0);
     } else {
     } else {
         /* This does not change the data on the disk, it is not necessary
         /* This does not change the data on the disk, it is not necessary
          * to flush even in cache=writethrough mode.
          * to flush even in cache=writethrough mode.
          */
          */
-        ret = bdrv_driver_pwritev(bs, cluster_sector_num * BDRV_SECTOR_SIZE,
-                                  cluster_nb_sectors * BDRV_SECTOR_SIZE,
+        ret = bdrv_driver_pwritev(bs, cluster_offset, cluster_bytes,
                                   &bounce_qiov, 0);
                                   &bounce_qiov, 0);
     }
     }
 
 
@@ -926,9 +962,8 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
         goto err;
         goto err;
     }
     }
 
 
-    skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
-    qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes,
-                        nb_sectors * BDRV_SECTOR_SIZE);
+    skip_bytes = offset - cluster_offset;
+    qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes, bytes);
 
 
 err:
 err:
     qemu_vfree(bounce_buffer);
     qemu_vfree(bounce_buffer);
@@ -944,15 +979,15 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
     BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
     BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
     int64_t align, QEMUIOVector *qiov, int flags)
     int64_t align, QEMUIOVector *qiov, int flags)
 {
 {
+    int64_t total_bytes, max_bytes;
     int ret;
     int ret;
 
 
-    int64_t sector_num = offset >> BDRV_SECTOR_BITS;
-    unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
-
-    assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
-    assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
+    assert(is_power_of_2(align));
+    assert((offset & (align - 1)) == 0);
+    assert((bytes & (align - 1)) == 0);
     assert(!qiov || bytes == qiov->size);
     assert(!qiov || bytes == qiov->size);
     assert((bs->open_flags & BDRV_O_NO_IO) == 0);
     assert((bs->open_flags & BDRV_O_NO_IO) == 0);
+    assert(!(flags & ~BDRV_REQ_MASK));
 
 
     /* Handle Copy on Read and associated serialisation */
     /* Handle Copy on Read and associated serialisation */
     if (flags & BDRV_REQ_COPY_ON_READ) {
     if (flags & BDRV_REQ_COPY_ON_READ) {
@@ -969,59 +1004,50 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
     }
     }
 
 
     if (flags & BDRV_REQ_COPY_ON_READ) {
     if (flags & BDRV_REQ_COPY_ON_READ) {
+        int64_t start_sector = offset >> BDRV_SECTOR_BITS;
+        int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
+        unsigned int nb_sectors = end_sector - start_sector;
         int pnum;
         int pnum;
 
 
-        ret = bdrv_is_allocated(bs, sector_num, nb_sectors, &pnum);
+        ret = bdrv_is_allocated(bs, start_sector, nb_sectors, &pnum);
         if (ret < 0) {
         if (ret < 0) {
             goto out;
             goto out;
         }
         }
 
 
         if (!ret || pnum != nb_sectors) {
         if (!ret || pnum != nb_sectors) {
-            ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
+            ret = bdrv_co_do_copy_on_readv(bs, offset, bytes, qiov);
             goto out;
             goto out;
         }
         }
     }
     }
 
 
     /* Forward the request to the BlockDriver */
     /* Forward the request to the BlockDriver */
-    if (!bs->zero_beyond_eof) {
-        ret = bdrv_driver_preadv(bs, offset, bytes, qiov, 0);
-    } else {
-        /* Read zeros after EOF */
-        int64_t total_sectors, max_nb_sectors;
-
-        total_sectors = bdrv_nb_sectors(bs);
-        if (total_sectors < 0) {
-            ret = total_sectors;
-            goto out;
-        }
+    total_bytes = bdrv_getlength(bs);
+    if (total_bytes < 0) {
+        ret = total_bytes;
+        goto out;
+    }
 
 
-        max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num),
-                                  align >> BDRV_SECTOR_BITS);
-        if (nb_sectors < max_nb_sectors) {
-            ret = bdrv_driver_preadv(bs, offset, bytes, qiov, 0);
-        } else if (max_nb_sectors > 0) {
-            QEMUIOVector local_qiov;
+    max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align);
+    if (bytes < max_bytes) {
+        ret = bdrv_driver_preadv(bs, offset, bytes, qiov, 0);
+    } else if (max_bytes > 0) {
+        QEMUIOVector local_qiov;
 
 
-            qemu_iovec_init(&local_qiov, qiov->niov);
-            qemu_iovec_concat(&local_qiov, qiov, 0,
-                              max_nb_sectors * BDRV_SECTOR_SIZE);
+        qemu_iovec_init(&local_qiov, qiov->niov);
+        qemu_iovec_concat(&local_qiov, qiov, 0, max_bytes);
 
 
-            ret = bdrv_driver_preadv(bs, offset,
-                                     max_nb_sectors * BDRV_SECTOR_SIZE,
-                                     &local_qiov, 0);
+        ret = bdrv_driver_preadv(bs, offset, max_bytes, &local_qiov, 0);
 
 
-            qemu_iovec_destroy(&local_qiov);
-        } else {
-            ret = 0;
-        }
+        qemu_iovec_destroy(&local_qiov);
+    } else {
+        ret = 0;
+    }
 
 
-        /* Reading beyond end of file is supposed to produce zeroes */
-        if (ret == 0 && total_sectors < sector_num + nb_sectors) {
-            uint64_t offset = MAX(0, total_sectors - sector_num);
-            uint64_t bytes = (sector_num + nb_sectors - offset) *
-                              BDRV_SECTOR_SIZE;
-            qemu_iovec_memset(qiov, offset * BDRV_SECTOR_SIZE, 0, bytes);
-        }
+    /* Reading beyond end of file is supposed to produce zeroes */
+    if (ret == 0 && total_bytes < offset + bytes) {
+        uint64_t zero_offset = MAX(0, total_bytes - offset);
+        uint64_t zero_bytes = offset + bytes - zero_offset;
+        qemu_iovec_memset(qiov, zero_offset, 0, zero_bytes);
     }
     }
 
 
 out:
 out:
@@ -1038,8 +1064,7 @@ int coroutine_fn bdrv_co_preadv(BlockDriverState *bs,
     BlockDriver *drv = bs->drv;
     BlockDriver *drv = bs->drv;
     BdrvTrackedRequest req;
     BdrvTrackedRequest req;
 
 
-    /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
-    uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
+    uint64_t align = bs->request_alignment;
     uint8_t *head_buf = NULL;
     uint8_t *head_buf = NULL;
     uint8_t *tail_buf = NULL;
     uint8_t *tail_buf = NULL;
     QEMUIOVector local_qiov;
     QEMUIOVector local_qiov;
@@ -1235,13 +1260,12 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
     bool waited;
     bool waited;
     int ret;
     int ret;
 
 
-    int64_t sector_num = offset >> BDRV_SECTOR_BITS;
-    unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
+    int64_t start_sector = offset >> BDRV_SECTOR_BITS;
+    int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
 
 
-    assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
-    assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
     assert(!qiov || bytes == qiov->size);
     assert(!qiov || bytes == qiov->size);
     assert((bs->open_flags & BDRV_O_NO_IO) == 0);
     assert((bs->open_flags & BDRV_O_NO_IO) == 0);
+    assert(!(flags & ~BDRV_REQ_MASK));
 
 
     waited = wait_serialising_requests(req);
     waited = wait_serialising_requests(req);
     assert(!waited || !req->serialising);
     assert(!waited || !req->serialising);
@@ -1263,22 +1287,21 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
         /* Do nothing, write notifier decided to fail this request */
         /* Do nothing, write notifier decided to fail this request */
     } else if (flags & BDRV_REQ_ZERO_WRITE) {
     } else if (flags & BDRV_REQ_ZERO_WRITE) {
         bdrv_debug_event(bs, BLKDBG_PWRITEV_ZERO);
         bdrv_debug_event(bs, BLKDBG_PWRITEV_ZERO);
-        ret = bdrv_co_do_pwrite_zeroes(bs, sector_num << BDRV_SECTOR_BITS,
-                                       nb_sectors << BDRV_SECTOR_BITS, flags);
+        ret = bdrv_co_do_pwrite_zeroes(bs, offset, bytes, flags);
     } else {
     } else {
         bdrv_debug_event(bs, BLKDBG_PWRITEV);
         bdrv_debug_event(bs, BLKDBG_PWRITEV);
         ret = bdrv_driver_pwritev(bs, offset, bytes, qiov, flags);
         ret = bdrv_driver_pwritev(bs, offset, bytes, qiov, flags);
     }
     }
     bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE);
     bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE);
 
 
-    bdrv_set_dirty(bs, sector_num, nb_sectors);
+    bdrv_set_dirty(bs, start_sector, end_sector - start_sector);
 
 
     if (bs->wr_highest_offset < offset + bytes) {
     if (bs->wr_highest_offset < offset + bytes) {
         bs->wr_highest_offset = offset + bytes;
         bs->wr_highest_offset = offset + bytes;
     }
     }
 
 
     if (ret >= 0) {
     if (ret >= 0) {
-        bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
+        bs->total_sectors = MAX(bs->total_sectors, end_sector);
     }
     }
 
 
     return ret;
     return ret;
@@ -1293,7 +1316,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
     uint8_t *buf = NULL;
     uint8_t *buf = NULL;
     QEMUIOVector local_qiov;
     QEMUIOVector local_qiov;
     struct iovec iov;
     struct iovec iov;
-    uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
+    uint64_t align = bs->request_alignment;
     unsigned int head_padding_bytes, tail_padding_bytes;
     unsigned int head_padding_bytes, tail_padding_bytes;
     int ret = 0;
     int ret = 0;
 
 
@@ -1380,8 +1403,7 @@ int coroutine_fn bdrv_co_pwritev(BlockDriverState *bs,
     BdrvRequestFlags flags)
     BdrvRequestFlags flags)
 {
 {
     BdrvTrackedRequest req;
     BdrvTrackedRequest req;
-    /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
-    uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
+    uint64_t align = bs->request_alignment;
     uint8_t *head_buf = NULL;
     uint8_t *head_buf = NULL;
     uint8_t *tail_buf = NULL;
     uint8_t *tail_buf = NULL;
     QEMUIOVector local_qiov;
     QEMUIOVector local_qiov;
@@ -1824,6 +1846,62 @@ int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
     return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
     return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
 }
 }
 
 
+typedef struct BdrvVmstateCo {
+    BlockDriverState    *bs;
+    QEMUIOVector        *qiov;
+    int64_t             pos;
+    bool                is_read;
+    int                 ret;
+} BdrvVmstateCo;
+
+static int coroutine_fn
+bdrv_co_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
+                   bool is_read)
+{
+    BlockDriver *drv = bs->drv;
+
+    if (!drv) {
+        return -ENOMEDIUM;
+    } else if (drv->bdrv_load_vmstate) {
+        return is_read ? drv->bdrv_load_vmstate(bs, qiov, pos)
+                       : drv->bdrv_save_vmstate(bs, qiov, pos);
+    } else if (bs->file) {
+        return bdrv_co_rw_vmstate(bs->file->bs, qiov, pos, is_read);
+    }
+
+    return -ENOTSUP;
+}
+
+static void coroutine_fn bdrv_co_rw_vmstate_entry(void *opaque)
+{
+    BdrvVmstateCo *co = opaque;
+    co->ret = bdrv_co_rw_vmstate(co->bs, co->qiov, co->pos, co->is_read);
+}
+
+static inline int
+bdrv_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
+                bool is_read)
+{
+    if (qemu_in_coroutine()) {
+        return bdrv_co_rw_vmstate(bs, qiov, pos, is_read);
+    } else {
+        BdrvVmstateCo data = {
+            .bs         = bs,
+            .qiov       = qiov,
+            .pos        = pos,
+            .is_read    = is_read,
+            .ret        = -EINPROGRESS,
+        };
+        Coroutine *co = qemu_coroutine_create(bdrv_co_rw_vmstate_entry);
+
+        qemu_coroutine_enter(co, &data);
+        while (data.ret == -EINPROGRESS) {
+            aio_poll(bdrv_get_aio_context(bs), true);
+        }
+        return data.ret;
+    }
+}
+
 int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
 int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
                       int64_t pos, int size)
                       int64_t pos, int size)
 {
 {
@@ -1832,37 +1910,45 @@ int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
         .iov_base   = (void *) buf,
         .iov_base   = (void *) buf,
         .iov_len    = size,
         .iov_len    = size,
     };
     };
+    int ret;
 
 
     qemu_iovec_init_external(&qiov, &iov, 1);
     qemu_iovec_init_external(&qiov, &iov, 1);
-    return bdrv_writev_vmstate(bs, &qiov, pos);
+
+    ret = bdrv_writev_vmstate(bs, &qiov, pos);
+    if (ret < 0) {
+        return ret;
+    }
+
+    return size;
 }
 }
 
 
 int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
 int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
 {
 {
-    BlockDriver *drv = bs->drv;
+    return bdrv_rw_vmstate(bs, qiov, pos, false);
+}
 
 
-    if (!drv) {
-        return -ENOMEDIUM;
-    } else if (drv->bdrv_save_vmstate) {
-        return drv->bdrv_save_vmstate(bs, qiov, pos);
-    } else if (bs->file) {
-        return bdrv_writev_vmstate(bs->file->bs, qiov, pos);
+int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
+                      int64_t pos, int size)
+{
+    QEMUIOVector qiov;
+    struct iovec iov = {
+        .iov_base   = buf,
+        .iov_len    = size,
+    };
+    int ret;
+
+    qemu_iovec_init_external(&qiov, &iov, 1);
+    ret = bdrv_readv_vmstate(bs, &qiov, pos);
+    if (ret < 0) {
+        return ret;
     }
     }
 
 
-    return -ENOTSUP;
+    return size;
 }
 }
 
 
-int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
-                      int64_t pos, int size)
+int bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
 {
 {
-    BlockDriver *drv = bs->drv;
-    if (!drv)
-        return -ENOMEDIUM;
-    if (drv->bdrv_load_vmstate)
-        return drv->bdrv_load_vmstate(bs, buf, pos, size);
-    if (bs->file)
-        return bdrv_load_vmstate(bs->file->bs, buf, pos, size);
-    return -ENOTSUP;
+    return bdrv_rw_vmstate(bs, qiov, pos, true);
 }
 }
 
 
 /**************************************************************/
 /**************************************************************/

+ 65 - 23
block/linux-aio.c

@@ -11,8 +11,10 @@
 #include "qemu-common.h"
 #include "qemu-common.h"
 #include "block/aio.h"
 #include "block/aio.h"
 #include "qemu/queue.h"
 #include "qemu/queue.h"
+#include "block/block.h"
 #include "block/raw-aio.h"
 #include "block/raw-aio.h"
 #include "qemu/event_notifier.h"
 #include "qemu/event_notifier.h"
+#include "qemu/coroutine.h"
 
 
 #include <libaio.h>
 #include <libaio.h>
 
 
@@ -30,6 +32,7 @@
 
 
 struct qemu_laiocb {
 struct qemu_laiocb {
     BlockAIOCB common;
     BlockAIOCB common;
+    Coroutine *co;
     LinuxAioState *ctx;
     LinuxAioState *ctx;
     struct iocb iocb;
     struct iocb iocb;
     ssize_t ret;
     ssize_t ret;
@@ -88,9 +91,14 @@ static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
             }
             }
         }
         }
     }
     }
-    laiocb->common.cb(laiocb->common.opaque, ret);
 
 
-    qemu_aio_unref(laiocb);
+    laiocb->ret = ret;
+    if (laiocb->co) {
+        qemu_coroutine_enter(laiocb->co, NULL);
+    } else {
+        laiocb->common.cb(laiocb->common.opaque, ret);
+        qemu_aio_unref(laiocb);
+    }
 }
 }
 
 
 /* The completion BH fetches completed I/O requests and invokes their
 /* The completion BH fetches completed I/O requests and invokes their
@@ -141,6 +149,8 @@ static void qemu_laio_completion_bh(void *opaque)
     if (!s->io_q.plugged && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
     if (!s->io_q.plugged && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
         ioq_submit(s);
         ioq_submit(s);
     }
     }
+
+    qemu_bh_cancel(s->completion_bh);
 }
 }
 
 
 static void qemu_laio_completion_cb(EventNotifier *e)
 static void qemu_laio_completion_cb(EventNotifier *e)
@@ -148,7 +158,7 @@ static void qemu_laio_completion_cb(EventNotifier *e)
     LinuxAioState *s = container_of(e, LinuxAioState, e);
     LinuxAioState *s = container_of(e, LinuxAioState, e);
 
 
     if (event_notifier_test_and_clear(&s->e)) {
     if (event_notifier_test_and_clear(&s->e)) {
-        qemu_bh_schedule(s->completion_bh);
+        qemu_laio_completion_bh(s);
     }
     }
 }
 }
 
 
@@ -230,22 +240,12 @@ void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s)
     }
     }
 }
 }
 
 
-BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque, int type)
+static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
+                          int type)
 {
 {
-    struct qemu_laiocb *laiocb;
-    struct iocb *iocbs;
-    off_t offset = sector_num * 512;
-
-    laiocb = qemu_aio_get(&laio_aiocb_info, bs, cb, opaque);
-    laiocb->nbytes = nb_sectors * 512;
-    laiocb->ctx = s;
-    laiocb->ret = -EINPROGRESS;
-    laiocb->is_read = (type == QEMU_AIO_READ);
-    laiocb->qiov = qiov;
-
-    iocbs = &laiocb->iocb;
+    LinuxAioState *s = laiocb->ctx;
+    struct iocb *iocbs = &laiocb->iocb;
+    QEMUIOVector *qiov = laiocb->qiov;
 
 
     switch (type) {
     switch (type) {
     case QEMU_AIO_WRITE:
     case QEMU_AIO_WRITE:
@@ -258,7 +258,7 @@ BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
     default:
     default:
         fprintf(stderr, "%s: invalid AIO request type 0x%x.\n",
         fprintf(stderr, "%s: invalid AIO request type 0x%x.\n",
                         __func__, type);
                         __func__, type);
-        goto out_free_aiocb;
+        return -EIO;
     }
     }
     io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e));
     io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e));
 
 
@@ -268,11 +268,53 @@ BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
         (!s->io_q.plugged || s->io_q.n >= MAX_QUEUED_IO)) {
         (!s->io_q.plugged || s->io_q.n >= MAX_QUEUED_IO)) {
         ioq_submit(s);
         ioq_submit(s);
     }
     }
-    return &laiocb->common;
 
 
-out_free_aiocb:
-    qemu_aio_unref(laiocb);
-    return NULL;
+    return 0;
+}
+
+int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
+                                uint64_t offset, QEMUIOVector *qiov, int type)
+{
+    int ret;
+    struct qemu_laiocb laiocb = {
+        .co         = qemu_coroutine_self(),
+        .nbytes     = qiov->size,
+        .ctx        = s,
+        .is_read    = (type == QEMU_AIO_READ),
+        .qiov       = qiov,
+    };
+
+    ret = laio_do_submit(fd, &laiocb, offset, type);
+    if (ret < 0) {
+        return ret;
+    }
+
+    qemu_coroutine_yield();
+    return laiocb.ret;
+}
+
+BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockCompletionFunc *cb, void *opaque, int type)
+{
+    struct qemu_laiocb *laiocb;
+    off_t offset = sector_num * BDRV_SECTOR_SIZE;
+    int ret;
+
+    laiocb = qemu_aio_get(&laio_aiocb_info, bs, cb, opaque);
+    laiocb->nbytes = nb_sectors * BDRV_SECTOR_SIZE;
+    laiocb->ctx = s;
+    laiocb->ret = -EINPROGRESS;
+    laiocb->is_read = (type == QEMU_AIO_READ);
+    laiocb->qiov = qiov;
+
+    ret = laio_do_submit(fd, laiocb, offset, type);
+    if (ret < 0) {
+        qemu_aio_unref(laiocb);
+        return NULL;
+    }
+
+    return &laiocb->common;
 }
 }
 
 
 void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context)
 void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context)

+ 36 - 19
block/mirror.c

@@ -44,6 +44,7 @@ typedef struct MirrorBlockJob {
     /* Used to block operations on the drive-mirror-replace target */
     /* Used to block operations on the drive-mirror-replace target */
     Error *replace_blocker;
     Error *replace_blocker;
     bool is_none_mode;
     bool is_none_mode;
+    BlockMirrorBackingMode backing_mode;
     BlockdevOnError on_source_error, on_target_error;
     BlockdevOnError on_source_error, on_target_error;
     bool synced;
     bool synced;
     bool should_complete;
     bool should_complete;
@@ -157,8 +158,7 @@ static void mirror_read_complete(void *opaque, int ret)
         return;
         return;
     }
     }
     blk_aio_pwritev(s->target, op->sector_num * BDRV_SECTOR_SIZE, &op->qiov,
     blk_aio_pwritev(s->target, op->sector_num * BDRV_SECTOR_SIZE, &op->qiov,
-                    op->nb_sectors * BDRV_SECTOR_SIZE,
-                    mirror_write_complete, op);
+                    0, mirror_write_complete, op);
 }
 }
 
 
 static inline void mirror_clip_sectors(MirrorBlockJob *s,
 static inline void mirror_clip_sectors(MirrorBlockJob *s,
@@ -186,8 +186,9 @@ static int mirror_cow_align(MirrorBlockJob *s,
     need_cow |= !test_bit((*sector_num + *nb_sectors - 1) / chunk_sectors,
     need_cow |= !test_bit((*sector_num + *nb_sectors - 1) / chunk_sectors,
                           s->cow_bitmap);
                           s->cow_bitmap);
     if (need_cow) {
     if (need_cow) {
-        bdrv_round_to_clusters(blk_bs(s->target), *sector_num, *nb_sectors,
-                               &align_sector_num, &align_nb_sectors);
+        bdrv_round_sectors_to_clusters(blk_bs(s->target), *sector_num,
+                                       *nb_sectors, &align_sector_num,
+                                       &align_nb_sectors);
     }
     }
 
 
     if (align_nb_sectors > max_sectors) {
     if (align_nb_sectors > max_sectors) {
@@ -274,8 +275,7 @@ static int mirror_do_read(MirrorBlockJob *s, int64_t sector_num,
     s->sectors_in_flight += nb_sectors;
     s->sectors_in_flight += nb_sectors;
     trace_mirror_one_iteration(s, sector_num, nb_sectors);
     trace_mirror_one_iteration(s, sector_num, nb_sectors);
 
 
-    blk_aio_preadv(source, sector_num * BDRV_SECTOR_SIZE, &op->qiov,
-                   nb_sectors * BDRV_SECTOR_SIZE,
+    blk_aio_preadv(source, sector_num * BDRV_SECTOR_SIZE, &op->qiov, 0,
                    mirror_read_complete, op);
                    mirror_read_complete, op);
     return ret;
     return ret;
 }
 }
@@ -386,8 +386,9 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
         } else if (ret >= 0 && !(ret & BDRV_BLOCK_DATA)) {
         } else if (ret >= 0 && !(ret & BDRV_BLOCK_DATA)) {
             int64_t target_sector_num;
             int64_t target_sector_num;
             int target_nb_sectors;
             int target_nb_sectors;
-            bdrv_round_to_clusters(blk_bs(s->target), sector_num, io_sectors,
-                                   &target_sector_num, &target_nb_sectors);
+            bdrv_round_sectors_to_clusters(blk_bs(s->target), sector_num,
+                                           io_sectors,  &target_sector_num,
+                                           &target_nb_sectors);
             if (target_sector_num == sector_num &&
             if (target_sector_num == sector_num &&
                 target_nb_sectors == io_sectors) {
                 target_nb_sectors == io_sectors) {
                 mirror_method = ret & BDRV_BLOCK_ZERO ?
                 mirror_method = ret & BDRV_BLOCK_ZERO ?
@@ -742,20 +743,26 @@ static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp)
 static void mirror_complete(BlockJob *job, Error **errp)
 static void mirror_complete(BlockJob *job, Error **errp)
 {
 {
     MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
     MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
-    Error *local_err = NULL;
-    int ret;
+    BlockDriverState *src, *target;
+
+    src = blk_bs(job->blk);
+    target = blk_bs(s->target);
 
 
-    ret = bdrv_open_backing_file(blk_bs(s->target), NULL, "backing",
-                                 &local_err);
-    if (ret < 0) {
-        error_propagate(errp, local_err);
-        return;
-    }
     if (!s->synced) {
     if (!s->synced) {
         error_setg(errp, QERR_BLOCK_JOB_NOT_READY, job->id);
         error_setg(errp, QERR_BLOCK_JOB_NOT_READY, job->id);
         return;
         return;
     }
     }
 
 
+    if (s->backing_mode == MIRROR_OPEN_BACKING_CHAIN) {
+        int ret;
+
+        assert(!target->backing);
+        ret = bdrv_open_backing_file(target, NULL, "backing", errp);
+        if (ret < 0) {
+            return;
+        }
+    }
+
     /* check the target bs is not blocked and block all operations on it */
     /* check the target bs is not blocked and block all operations on it */
     if (s->replaces) {
     if (s->replaces) {
         AioContext *replace_aio_context;
         AioContext *replace_aio_context;
@@ -777,6 +784,13 @@ static void mirror_complete(BlockJob *job, Error **errp)
         aio_context_release(replace_aio_context);
         aio_context_release(replace_aio_context);
     }
     }
 
 
+    if (s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
+        BlockDriverState *backing = s->is_none_mode ? src : s->base;
+        if (backing_bs(target) != backing) {
+            bdrv_set_backing_hd(target, backing);
+        }
+    }
+
     s->should_complete = true;
     s->should_complete = true;
     block_job_enter(&s->common);
     block_job_enter(&s->common);
 }
 }
@@ -799,6 +813,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
                              const char *replaces,
                              const char *replaces,
                              int64_t speed, uint32_t granularity,
                              int64_t speed, uint32_t granularity,
                              int64_t buf_size,
                              int64_t buf_size,
+                             BlockMirrorBackingMode backing_mode,
                              BlockdevOnError on_source_error,
                              BlockdevOnError on_source_error,
                              BlockdevOnError on_target_error,
                              BlockdevOnError on_target_error,
                              bool unmap,
                              bool unmap,
@@ -836,6 +851,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
     s->on_source_error = on_source_error;
     s->on_source_error = on_source_error;
     s->on_target_error = on_target_error;
     s->on_target_error = on_target_error;
     s->is_none_mode = is_none_mode;
     s->is_none_mode = is_none_mode;
+    s->backing_mode = backing_mode;
     s->base = base;
     s->base = base;
     s->granularity = granularity;
     s->granularity = granularity;
     s->buf_size = ROUND_UP(buf_size, granularity);
     s->buf_size = ROUND_UP(buf_size, granularity);
@@ -859,7 +875,8 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
 void mirror_start(BlockDriverState *bs, BlockDriverState *target,
 void mirror_start(BlockDriverState *bs, BlockDriverState *target,
                   const char *replaces,
                   const char *replaces,
                   int64_t speed, uint32_t granularity, int64_t buf_size,
                   int64_t speed, uint32_t granularity, int64_t buf_size,
-                  MirrorSyncMode mode, BlockdevOnError on_source_error,
+                  MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
+                  BlockdevOnError on_source_error,
                   BlockdevOnError on_target_error,
                   BlockdevOnError on_target_error,
                   bool unmap,
                   bool unmap,
                   BlockCompletionFunc *cb,
                   BlockCompletionFunc *cb,
@@ -875,7 +892,7 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
     is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
     is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
     base = mode == MIRROR_SYNC_MODE_TOP ? backing_bs(bs) : NULL;
     base = mode == MIRROR_SYNC_MODE_TOP ? backing_bs(bs) : NULL;
     mirror_start_job(bs, target, replaces,
     mirror_start_job(bs, target, replaces,
-                     speed, granularity, buf_size,
+                     speed, granularity, buf_size, backing_mode,
                      on_source_error, on_target_error, unmap, cb, opaque, errp,
                      on_source_error, on_target_error, unmap, cb, opaque, errp,
                      &mirror_job_driver, is_none_mode, base);
                      &mirror_job_driver, is_none_mode, base);
 }
 }
@@ -922,7 +939,7 @@ void commit_active_start(BlockDriverState *bs, BlockDriverState *base,
         }
         }
     }
     }
 
 
-    mirror_start_job(bs, base, NULL, speed, 0, 0,
+    mirror_start_job(bs, base, NULL, speed, 0, 0, MIRROR_LEAVE_BACKING_CHAIN,
                      on_error, on_error, false, cb, opaque, &local_err,
                      on_error, on_error, false, cb, opaque, &local_err,
                      &commit_active_job_driver, false, base);
                      &commit_active_job_driver, false, base);
     if (local_err) {
     if (local_err) {

+ 20 - 0
block/null.c

@@ -12,6 +12,8 @@
 
 
 #include "qemu/osdep.h"
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "qapi/error.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qstring.h"
 #include "block/block_int.h"
 #include "block/block_int.h"
 
 
 #define NULL_OPT_LATENCY "latency-ns"
 #define NULL_OPT_LATENCY "latency-ns"
@@ -223,6 +225,20 @@ static int64_t coroutine_fn null_co_get_block_status(BlockDriverState *bs,
     }
     }
 }
 }
 
 
+static void null_refresh_filename(BlockDriverState *bs, QDict *opts)
+{
+    QINCREF(opts);
+    qdict_del(opts, "filename");
+
+    if (!qdict_size(opts)) {
+        snprintf(bs->exact_filename, sizeof(bs->exact_filename), "%s://",
+                 bs->drv->format_name);
+    }
+
+    qdict_put(opts, "driver", qstring_from_str(bs->drv->format_name));
+    bs->full_open_options = opts;
+}
+
 static BlockDriver bdrv_null_co = {
 static BlockDriver bdrv_null_co = {
     .format_name            = "null-co",
     .format_name            = "null-co",
     .protocol_name          = "null-co",
     .protocol_name          = "null-co",
@@ -238,6 +254,8 @@ static BlockDriver bdrv_null_co = {
     .bdrv_reopen_prepare    = null_reopen_prepare,
     .bdrv_reopen_prepare    = null_reopen_prepare,
 
 
     .bdrv_co_get_block_status   = null_co_get_block_status,
     .bdrv_co_get_block_status   = null_co_get_block_status,
+
+    .bdrv_refresh_filename  = null_refresh_filename,
 };
 };
 
 
 static BlockDriver bdrv_null_aio = {
 static BlockDriver bdrv_null_aio = {
@@ -255,6 +273,8 @@ static BlockDriver bdrv_null_aio = {
     .bdrv_reopen_prepare    = null_reopen_prepare,
     .bdrv_reopen_prepare    = null_reopen_prepare,
 
 
     .bdrv_co_get_block_status   = null_co_get_block_status,
     .bdrv_co_get_block_status   = null_co_get_block_status,
+
+    .bdrv_refresh_filename  = null_refresh_filename,
 };
 };
 
 
 static void bdrv_null_init(void)
 static void bdrv_null_init(void)

+ 10 - 4
block/qcow.c

@@ -162,10 +162,16 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
     if (s->crypt_method_header) {
     if (s->crypt_method_header) {
         if (bdrv_uses_whitelist() &&
         if (bdrv_uses_whitelist() &&
             s->crypt_method_header == QCOW_CRYPT_AES) {
             s->crypt_method_header == QCOW_CRYPT_AES) {
-            error_report("qcow built-in AES encryption is deprecated");
-            error_printf("Support for it will be removed in a future release.\n"
-                         "You can use 'qemu-img convert' to switch to an\n"
-                         "unencrypted qcow image, or a LUKS raw image.\n");
+            error_setg(errp,
+                       "Use of AES-CBC encrypted qcow images is no longer "
+                       "supported in system emulators");
+            error_append_hint(errp,
+                              "You can use 'qemu-img convert' to convert your "
+                              "image to an alternative supported format, such "
+                              "as unencrypted qcow, or raw with the LUKS "
+                              "format instead.\n");
+            ret = -ENOSYS;
+            goto fail;
         }
         }
 
 
         bs->encrypted = 1;
         bs->encrypted = 1;

+ 68 - 79
block/qcow2-cluster.c

@@ -390,22 +390,18 @@ int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num,
     return 0;
     return 0;
 }
 }
 
 
-static int coroutine_fn copy_sectors(BlockDriverState *bs,
-                                     uint64_t start_sect,
-                                     uint64_t cluster_offset,
-                                     int n_start, int n_end)
+static int coroutine_fn do_perform_cow(BlockDriverState *bs,
+                                       uint64_t src_cluster_offset,
+                                       uint64_t cluster_offset,
+                                       int offset_in_cluster,
+                                       int bytes)
 {
 {
     BDRVQcow2State *s = bs->opaque;
     BDRVQcow2State *s = bs->opaque;
     QEMUIOVector qiov;
     QEMUIOVector qiov;
     struct iovec iov;
     struct iovec iov;
-    int n, ret;
-
-    n = n_end - n_start;
-    if (n <= 0) {
-        return 0;
-    }
+    int ret;
 
 
-    iov.iov_len = n * BDRV_SECTOR_SIZE;
+    iov.iov_len = bytes;
     iov.iov_base = qemu_try_blockalign(bs, iov.iov_len);
     iov.iov_base = qemu_try_blockalign(bs, iov.iov_len);
     if (iov.iov_base == NULL) {
     if (iov.iov_base == NULL) {
         return -ENOMEM;
         return -ENOMEM;
@@ -424,17 +420,21 @@ static int coroutine_fn copy_sectors(BlockDriverState *bs,
      * interface.  This avoids double I/O throttling and request tracking,
      * interface.  This avoids double I/O throttling and request tracking,
      * which can lead to deadlock when block layer copy-on-read is enabled.
      * which can lead to deadlock when block layer copy-on-read is enabled.
      */
      */
-    ret = bs->drv->bdrv_co_readv(bs, start_sect + n_start, n, &qiov);
+    ret = bs->drv->bdrv_co_preadv(bs, src_cluster_offset + offset_in_cluster,
+                                  bytes, &qiov, 0);
     if (ret < 0) {
     if (ret < 0) {
         goto out;
         goto out;
     }
     }
 
 
     if (bs->encrypted) {
     if (bs->encrypted) {
         Error *err = NULL;
         Error *err = NULL;
+        int64_t sector = (cluster_offset + offset_in_cluster)
+                         >> BDRV_SECTOR_BITS;
         assert(s->cipher);
         assert(s->cipher);
-        if (qcow2_encrypt_sectors(s, start_sect + n_start,
-                                  iov.iov_base, iov.iov_base, n,
-                                  true, &err) < 0) {
+        assert((offset_in_cluster & ~BDRV_SECTOR_MASK) == 0);
+        assert((bytes & ~BDRV_SECTOR_MASK) == 0);
+        if (qcow2_encrypt_sectors(s, sector, iov.iov_base, iov.iov_base,
+                                  bytes >> BDRV_SECTOR_BITS, true, &err) < 0) {
             ret = -EIO;
             ret = -EIO;
             error_free(err);
             error_free(err);
             goto out;
             goto out;
@@ -442,14 +442,14 @@ static int coroutine_fn copy_sectors(BlockDriverState *bs,
     }
     }
 
 
     ret = qcow2_pre_write_overlap_check(bs, 0,
     ret = qcow2_pre_write_overlap_check(bs, 0,
-            cluster_offset + n_start * BDRV_SECTOR_SIZE, n * BDRV_SECTOR_SIZE);
+            cluster_offset + offset_in_cluster, bytes);
     if (ret < 0) {
     if (ret < 0) {
         goto out;
         goto out;
     }
     }
 
 
     BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE);
     BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE);
-    ret = bdrv_co_writev(bs->file->bs, (cluster_offset >> 9) + n_start, n,
-                         &qiov);
+    ret = bdrv_co_pwritev(bs->file->bs, cluster_offset + offset_in_cluster,
+                          bytes, &qiov, 0);
     if (ret < 0) {
     if (ret < 0) {
         goto out;
         goto out;
     }
     }
@@ -464,47 +464,44 @@ out:
 /*
 /*
  * get_cluster_offset
  * get_cluster_offset
  *
  *
- * For a given offset of the disk image, find the cluster offset in
- * qcow2 file. The offset is stored in *cluster_offset.
+ * For a given offset of the virtual disk, find the cluster type and offset in
+ * the qcow2 file. The offset is stored in *cluster_offset.
  *
  *
- * on entry, *num is the number of contiguous sectors we'd like to
- * access following offset.
+ * On entry, *bytes is the maximum number of contiguous bytes starting at
+ * offset that we are interested in.
  *
  *
- * on exit, *num is the number of contiguous sectors we can read.
+ * On exit, *bytes is the number of bytes starting at offset that have the same
+ * cluster type and (if applicable) are stored contiguously in the image file.
+ * Compressed clusters are always returned one by one.
  *
  *
  * Returns the cluster type (QCOW2_CLUSTER_*) on success, -errno in error
  * Returns the cluster type (QCOW2_CLUSTER_*) on success, -errno in error
  * cases.
  * cases.
  */
  */
 int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
 int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
-    int *num, uint64_t *cluster_offset)
+                             unsigned int *bytes, uint64_t *cluster_offset)
 {
 {
     BDRVQcow2State *s = bs->opaque;
     BDRVQcow2State *s = bs->opaque;
     unsigned int l2_index;
     unsigned int l2_index;
     uint64_t l1_index, l2_offset, *l2_table;
     uint64_t l1_index, l2_offset, *l2_table;
     int l1_bits, c;
     int l1_bits, c;
-    unsigned int index_in_cluster, nb_clusters;
-    uint64_t nb_available, nb_needed;
+    unsigned int offset_in_cluster, nb_clusters;
+    uint64_t bytes_available, bytes_needed;
     int ret;
     int ret;
 
 
-    index_in_cluster = (offset >> 9) & (s->cluster_sectors - 1);
-    nb_needed = *num + index_in_cluster;
+    offset_in_cluster = offset_into_cluster(s, offset);
+    bytes_needed = (uint64_t) *bytes + offset_in_cluster;
 
 
     l1_bits = s->l2_bits + s->cluster_bits;
     l1_bits = s->l2_bits + s->cluster_bits;
 
 
-    /* compute how many bytes there are between the offset and
-     * the end of the l1 entry
-     */
-
-    nb_available = (1ULL << l1_bits) - (offset & ((1ULL << l1_bits) - 1));
+    /* compute how many bytes there are between the start of the cluster
+     * containing offset and the end of the l1 entry */
+    bytes_available = (1ULL << l1_bits) - (offset & ((1ULL << l1_bits) - 1))
+                    + offset_in_cluster;
 
 
-    /* compute the number of available sectors */
-
-    nb_available = (nb_available >> 9) + index_in_cluster;
-
-    if (nb_needed > nb_available) {
-        nb_needed = nb_available;
+    if (bytes_needed > bytes_available) {
+        bytes_needed = bytes_available;
     }
     }
-    assert(nb_needed <= INT_MAX);
+    assert(bytes_needed <= INT_MAX);
 
 
     *cluster_offset = 0;
     *cluster_offset = 0;
 
 
@@ -542,7 +539,7 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
     *cluster_offset = be64_to_cpu(l2_table[l2_index]);
     *cluster_offset = be64_to_cpu(l2_table[l2_index]);
 
 
     /* nb_needed <= INT_MAX, thus nb_clusters <= INT_MAX, too */
     /* nb_needed <= INT_MAX, thus nb_clusters <= INT_MAX, too */
-    nb_clusters = size_to_clusters(s, nb_needed << 9);
+    nb_clusters = size_to_clusters(s, bytes_needed);
 
 
     ret = qcow2_get_cluster_type(*cluster_offset);
     ret = qcow2_get_cluster_type(*cluster_offset);
     switch (ret) {
     switch (ret) {
@@ -589,13 +586,14 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
 
 
     qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
     qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
 
 
-    nb_available = (c * s->cluster_sectors);
+    bytes_available = (c * s->cluster_size);
 
 
 out:
 out:
-    if (nb_available > nb_needed)
-        nb_available = nb_needed;
+    if (bytes_available > bytes_needed) {
+        bytes_available = bytes_needed;
+    }
 
 
-    *num = nb_available - index_in_cluster;
+    *bytes = bytes_available - offset_in_cluster;
 
 
     return ret;
     return ret;
 
 
@@ -741,14 +739,12 @@ static int perform_cow(BlockDriverState *bs, QCowL2Meta *m, Qcow2COWRegion *r)
     BDRVQcow2State *s = bs->opaque;
     BDRVQcow2State *s = bs->opaque;
     int ret;
     int ret;
 
 
-    if (r->nb_sectors == 0) {
+    if (r->nb_bytes == 0) {
         return 0;
         return 0;
     }
     }
 
 
     qemu_co_mutex_unlock(&s->lock);
     qemu_co_mutex_unlock(&s->lock);
-    ret = copy_sectors(bs, m->offset / BDRV_SECTOR_SIZE, m->alloc_offset,
-                       r->offset / BDRV_SECTOR_SIZE,
-                       r->offset / BDRV_SECTOR_SIZE + r->nb_sectors);
+    ret = do_perform_cow(bs, m->offset, m->alloc_offset, r->offset, r->nb_bytes);
     qemu_co_mutex_lock(&s->lock);
     qemu_co_mutex_lock(&s->lock);
 
 
     if (ret < 0) {
     if (ret < 0) {
@@ -810,13 +806,14 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
     assert(l2_index + m->nb_clusters <= s->l2_size);
     assert(l2_index + m->nb_clusters <= s->l2_size);
     for (i = 0; i < m->nb_clusters; i++) {
     for (i = 0; i < m->nb_clusters; i++) {
         /* if two concurrent writes happen to the same unallocated cluster
         /* if two concurrent writes happen to the same unallocated cluster
-	 * each write allocates separate cluster and writes data concurrently.
-	 * The first one to complete updates l2 table with pointer to its
-	 * cluster the second one has to do RMW (which is done above by
-	 * copy_sectors()), update l2 table with its cluster pointer and free
-	 * old cluster. This is what this loop does */
-        if(l2_table[l2_index + i] != 0)
+         * each write allocates separate cluster and writes data concurrently.
+         * The first one to complete updates l2 table with pointer to its
+         * cluster the second one has to do RMW (which is done above by
+         * perform_cow()), update l2 table with its cluster pointer and free
+         * old cluster. This is what this loop does */
+        if (l2_table[l2_index + i] != 0) {
             old_cluster[j++] = l2_table[l2_index + i];
             old_cluster[j++] = l2_table[l2_index + i];
+        }
 
 
         l2_table[l2_index + i] = cpu_to_be64((cluster_offset +
         l2_table[l2_index + i] = cpu_to_be64((cluster_offset +
                     (i << s->cluster_bits)) | QCOW_OFLAG_COPIED);
                     (i << s->cluster_bits)) | QCOW_OFLAG_COPIED);
@@ -1198,25 +1195,20 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
     /*
     /*
      * Save info needed for meta data update.
      * Save info needed for meta data update.
      *
      *
-     * requested_sectors: Number of sectors from the start of the first
+     * requested_bytes: Number of bytes from the start of the first
      * newly allocated cluster to the end of the (possibly shortened
      * newly allocated cluster to the end of the (possibly shortened
      * before) write request.
      * before) write request.
      *
      *
-     * avail_sectors: Number of sectors from the start of the first
+     * avail_bytes: Number of bytes from the start of the first
      * newly allocated to the end of the last newly allocated cluster.
      * newly allocated to the end of the last newly allocated cluster.
      *
      *
-     * nb_sectors: The number of sectors from the start of the first
+     * nb_bytes: The number of bytes from the start of the first
      * newly allocated cluster to the end of the area that the write
      * newly allocated cluster to the end of the area that the write
      * request actually writes to (excluding COW at the end)
      * request actually writes to (excluding COW at the end)
      */
      */
-    int requested_sectors =
-        (*bytes + offset_into_cluster(s, guest_offset))
-        >> BDRV_SECTOR_BITS;
-    int avail_sectors = nb_clusters
-                        << (s->cluster_bits - BDRV_SECTOR_BITS);
-    int alloc_n_start = offset_into_cluster(s, guest_offset)
-                        >> BDRV_SECTOR_BITS;
-    int nb_sectors = MIN(requested_sectors, avail_sectors);
+    uint64_t requested_bytes = *bytes + offset_into_cluster(s, guest_offset);
+    int avail_bytes = MIN(INT_MAX, nb_clusters << s->cluster_bits);
+    int nb_bytes = MIN(requested_bytes, avail_bytes);
     QCowL2Meta *old_m = *m;
     QCowL2Meta *old_m = *m;
 
 
     *m = g_malloc0(sizeof(**m));
     *m = g_malloc0(sizeof(**m));
@@ -1227,23 +1219,21 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
         .alloc_offset   = alloc_cluster_offset,
         .alloc_offset   = alloc_cluster_offset,
         .offset         = start_of_cluster(s, guest_offset),
         .offset         = start_of_cluster(s, guest_offset),
         .nb_clusters    = nb_clusters,
         .nb_clusters    = nb_clusters,
-        .nb_available   = nb_sectors,
 
 
         .cow_start = {
         .cow_start = {
             .offset     = 0,
             .offset     = 0,
-            .nb_sectors = alloc_n_start,
+            .nb_bytes   = offset_into_cluster(s, guest_offset),
         },
         },
         .cow_end = {
         .cow_end = {
-            .offset     = nb_sectors * BDRV_SECTOR_SIZE,
-            .nb_sectors = avail_sectors - nb_sectors,
+            .offset     = nb_bytes,
+            .nb_bytes   = avail_bytes - nb_bytes,
         },
         },
     };
     };
     qemu_co_queue_init(&(*m)->dependent_requests);
     qemu_co_queue_init(&(*m)->dependent_requests);
     QLIST_INSERT_HEAD(&s->cluster_allocs, *m, next_in_flight);
     QLIST_INSERT_HEAD(&s->cluster_allocs, *m, next_in_flight);
 
 
     *host_offset = alloc_cluster_offset + offset_into_cluster(s, guest_offset);
     *host_offset = alloc_cluster_offset + offset_into_cluster(s, guest_offset);
-    *bytes = MIN(*bytes, (nb_sectors * BDRV_SECTOR_SIZE)
-                         - offset_into_cluster(s, guest_offset));
+    *bytes = MIN(*bytes, nb_bytes - offset_into_cluster(s, guest_offset));
     assert(*bytes != 0);
     assert(*bytes != 0);
 
 
     return 1;
     return 1;
@@ -1275,7 +1265,8 @@ fail:
  * Return 0 on success and -errno in error cases
  * Return 0 on success and -errno in error cases
  */
  */
 int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
 int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
-    int *num, uint64_t *host_offset, QCowL2Meta **m)
+                               unsigned int *bytes, uint64_t *host_offset,
+                               QCowL2Meta **m)
 {
 {
     BDRVQcow2State *s = bs->opaque;
     BDRVQcow2State *s = bs->opaque;
     uint64_t start, remaining;
     uint64_t start, remaining;
@@ -1283,13 +1274,11 @@ int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
     uint64_t cur_bytes;
     uint64_t cur_bytes;
     int ret;
     int ret;
 
 
-    trace_qcow2_alloc_clusters_offset(qemu_coroutine_self(), offset, *num);
-
-    assert((offset & ~BDRV_SECTOR_MASK) == 0);
+    trace_qcow2_alloc_clusters_offset(qemu_coroutine_self(), offset, *bytes);
 
 
 again:
 again:
     start = offset;
     start = offset;
-    remaining = (uint64_t)*num << BDRV_SECTOR_BITS;
+    remaining = *bytes;
     cluster_offset = 0;
     cluster_offset = 0;
     *host_offset = 0;
     *host_offset = 0;
     cur_bytes = 0;
     cur_bytes = 0;
@@ -1375,8 +1364,8 @@ again:
         }
         }
     }
     }
 
 
-    *num -= remaining >> BDRV_SECTOR_BITS;
-    assert(*num > 0);
+    *bytes -= remaining;
+    assert(*bytes > 0);
     assert(*host_offset != 0);
     assert(*host_offset != 0);
 
 
     return 0;
     return 0;

+ 116 - 123
block/qcow2.c

@@ -968,13 +968,22 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
     if (s->crypt_method_header) {
     if (s->crypt_method_header) {
         if (bdrv_uses_whitelist() &&
         if (bdrv_uses_whitelist() &&
             s->crypt_method_header == QCOW_CRYPT_AES) {
             s->crypt_method_header == QCOW_CRYPT_AES) {
-            error_report("qcow2 built-in AES encryption is deprecated");
-            error_printf("Support for it will be removed in a future release.\n"
-                         "You can use 'qemu-img convert' to switch to an\n"
-                         "unencrypted qcow2 image, or a LUKS raw image.\n");
+            error_setg(errp,
+                       "Use of AES-CBC encrypted qcow2 images is no longer "
+                       "supported in system emulators");
+            error_append_hint(errp,
+                              "You can use 'qemu-img convert' to convert your "
+                              "image to an alternative supported format, such "
+                              "as unencrypted qcow2, or raw with the LUKS "
+                              "format instead.\n");
+            ret = -ENOSYS;
+            goto fail;
         }
         }
 
 
         bs->encrypted = 1;
         bs->encrypted = 1;
+
+        /* Encryption works on a sector granularity */
+        bs->request_alignment = BDRV_SECTOR_SIZE;
     }
     }
 
 
     s->l2_bits = s->cluster_bits - 3; /* L2 is always one cluster */
     s->l2_bits = s->cluster_bits - 3; /* L2 is always one cluster */
@@ -1331,16 +1340,20 @@ static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs,
     BDRVQcow2State *s = bs->opaque;
     BDRVQcow2State *s = bs->opaque;
     uint64_t cluster_offset;
     uint64_t cluster_offset;
     int index_in_cluster, ret;
     int index_in_cluster, ret;
+    unsigned int bytes;
     int64_t status = 0;
     int64_t status = 0;
 
 
-    *pnum = nb_sectors;
+    bytes = MIN(INT_MAX, nb_sectors * BDRV_SECTOR_SIZE);
     qemu_co_mutex_lock(&s->lock);
     qemu_co_mutex_lock(&s->lock);
-    ret = qcow2_get_cluster_offset(bs, sector_num << 9, pnum, &cluster_offset);
+    ret = qcow2_get_cluster_offset(bs, sector_num << 9, &bytes,
+                                   &cluster_offset);
     qemu_co_mutex_unlock(&s->lock);
     qemu_co_mutex_unlock(&s->lock);
     if (ret < 0) {
     if (ret < 0) {
         return ret;
         return ret;
     }
     }
 
 
+    *pnum = bytes >> BDRV_SECTOR_BITS;
+
     if (cluster_offset != 0 && ret != QCOW2_CLUSTER_COMPRESSED &&
     if (cluster_offset != 0 && ret != QCOW2_CLUSTER_COMPRESSED &&
         !s->cipher) {
         !s->cipher) {
         index_in_cluster = sector_num & (s->cluster_sectors - 1);
         index_in_cluster = sector_num & (s->cluster_sectors - 1);
@@ -1358,28 +1371,34 @@ static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs,
 
 
 /* handle reading after the end of the backing file */
 /* handle reading after the end of the backing file */
 int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
 int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
-                  int64_t sector_num, int nb_sectors)
+                        int64_t offset, int bytes)
 {
 {
+    uint64_t bs_size = bs->total_sectors * BDRV_SECTOR_SIZE;
     int n1;
     int n1;
-    if ((sector_num + nb_sectors) <= bs->total_sectors)
-        return nb_sectors;
-    if (sector_num >= bs->total_sectors)
+
+    if ((offset + bytes) <= bs_size) {
+        return bytes;
+    }
+
+    if (offset >= bs_size) {
         n1 = 0;
         n1 = 0;
-    else
-        n1 = bs->total_sectors - sector_num;
+    } else {
+        n1 = bs_size - offset;
+    }
 
 
-    qemu_iovec_memset(qiov, 512 * n1, 0, 512 * (nb_sectors - n1));
+    qemu_iovec_memset(qiov, n1, 0, bytes - n1);
 
 
     return n1;
     return n1;
 }
 }
 
 
-static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
-                          int remaining_sectors, QEMUIOVector *qiov)
+static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
+                                        uint64_t bytes, QEMUIOVector *qiov,
+                                        int flags)
 {
 {
     BDRVQcow2State *s = bs->opaque;
     BDRVQcow2State *s = bs->opaque;
-    int index_in_cluster, n1;
+    int offset_in_cluster, n1;
     int ret;
     int ret;
-    int cur_nr_sectors; /* number of sectors in current iteration */
+    unsigned int cur_bytes; /* number of bytes in current iteration */
     uint64_t cluster_offset = 0;
     uint64_t cluster_offset = 0;
     uint64_t bytes_done = 0;
     uint64_t bytes_done = 0;
     QEMUIOVector hd_qiov;
     QEMUIOVector hd_qiov;
@@ -1389,26 +1408,24 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
 
 
     qemu_co_mutex_lock(&s->lock);
     qemu_co_mutex_lock(&s->lock);
 
 
-    while (remaining_sectors != 0) {
+    while (bytes != 0) {
 
 
         /* prepare next request */
         /* prepare next request */
-        cur_nr_sectors = remaining_sectors;
+        cur_bytes = MIN(bytes, INT_MAX);
         if (s->cipher) {
         if (s->cipher) {
-            cur_nr_sectors = MIN(cur_nr_sectors,
-                QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors);
+            cur_bytes = MIN(cur_bytes,
+                            QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
         }
         }
 
 
-        ret = qcow2_get_cluster_offset(bs, sector_num << 9,
-            &cur_nr_sectors, &cluster_offset);
+        ret = qcow2_get_cluster_offset(bs, offset, &cur_bytes, &cluster_offset);
         if (ret < 0) {
         if (ret < 0) {
             goto fail;
             goto fail;
         }
         }
 
 
-        index_in_cluster = sector_num & (s->cluster_sectors - 1);
+        offset_in_cluster = offset_into_cluster(s, offset);
 
 
         qemu_iovec_reset(&hd_qiov);
         qemu_iovec_reset(&hd_qiov);
-        qemu_iovec_concat(&hd_qiov, qiov, bytes_done,
-            cur_nr_sectors * 512);
+        qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_bytes);
 
 
         switch (ret) {
         switch (ret) {
         case QCOW2_CLUSTER_UNALLOCATED:
         case QCOW2_CLUSTER_UNALLOCATED:
@@ -1416,18 +1433,17 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
             if (bs->backing) {
             if (bs->backing) {
                 /* read from the base image */
                 /* read from the base image */
                 n1 = qcow2_backing_read1(bs->backing->bs, &hd_qiov,
                 n1 = qcow2_backing_read1(bs->backing->bs, &hd_qiov,
-                    sector_num, cur_nr_sectors);
+                                         offset, cur_bytes);
                 if (n1 > 0) {
                 if (n1 > 0) {
                     QEMUIOVector local_qiov;
                     QEMUIOVector local_qiov;
 
 
                     qemu_iovec_init(&local_qiov, hd_qiov.niov);
                     qemu_iovec_init(&local_qiov, hd_qiov.niov);
-                    qemu_iovec_concat(&local_qiov, &hd_qiov, 0,
-                                      n1 * BDRV_SECTOR_SIZE);
+                    qemu_iovec_concat(&local_qiov, &hd_qiov, 0, n1);
 
 
                     BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
                     BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
                     qemu_co_mutex_unlock(&s->lock);
                     qemu_co_mutex_unlock(&s->lock);
-                    ret = bdrv_co_readv(bs->backing->bs, sector_num,
-                                        n1, &local_qiov);
+                    ret = bdrv_co_preadv(bs->backing->bs, offset, n1,
+                                         &local_qiov, 0);
                     qemu_co_mutex_lock(&s->lock);
                     qemu_co_mutex_lock(&s->lock);
 
 
                     qemu_iovec_destroy(&local_qiov);
                     qemu_iovec_destroy(&local_qiov);
@@ -1438,12 +1454,12 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
                 }
                 }
             } else {
             } else {
                 /* Note: in this case, no need to wait */
                 /* Note: in this case, no need to wait */
-                qemu_iovec_memset(&hd_qiov, 0, 0, 512 * cur_nr_sectors);
+                qemu_iovec_memset(&hd_qiov, 0, 0, cur_bytes);
             }
             }
             break;
             break;
 
 
         case QCOW2_CLUSTER_ZERO:
         case QCOW2_CLUSTER_ZERO:
-            qemu_iovec_memset(&hd_qiov, 0, 0, 512 * cur_nr_sectors);
+            qemu_iovec_memset(&hd_qiov, 0, 0, cur_bytes);
             break;
             break;
 
 
         case QCOW2_CLUSTER_COMPRESSED:
         case QCOW2_CLUSTER_COMPRESSED:
@@ -1454,8 +1470,8 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
             }
             }
 
 
             qemu_iovec_from_buf(&hd_qiov, 0,
             qemu_iovec_from_buf(&hd_qiov, 0,
-                s->cluster_cache + index_in_cluster * 512,
-                512 * cur_nr_sectors);
+                                s->cluster_cache + offset_in_cluster,
+                                cur_bytes);
             break;
             break;
 
 
         case QCOW2_CLUSTER_NORMAL:
         case QCOW2_CLUSTER_NORMAL:
@@ -1482,34 +1498,34 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
                     }
                     }
                 }
                 }
 
 
-                assert(cur_nr_sectors <=
-                    QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors);
+                assert(cur_bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
                 qemu_iovec_reset(&hd_qiov);
                 qemu_iovec_reset(&hd_qiov);
-                qemu_iovec_add(&hd_qiov, cluster_data,
-                    512 * cur_nr_sectors);
+                qemu_iovec_add(&hd_qiov, cluster_data, cur_bytes);
             }
             }
 
 
             BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
             BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
             qemu_co_mutex_unlock(&s->lock);
             qemu_co_mutex_unlock(&s->lock);
-            ret = bdrv_co_readv(bs->file->bs,
-                                (cluster_offset >> 9) + index_in_cluster,
-                                cur_nr_sectors, &hd_qiov);
+            ret = bdrv_co_preadv(bs->file->bs,
+                                 cluster_offset + offset_in_cluster,
+                                 cur_bytes, &hd_qiov, 0);
             qemu_co_mutex_lock(&s->lock);
             qemu_co_mutex_lock(&s->lock);
             if (ret < 0) {
             if (ret < 0) {
                 goto fail;
                 goto fail;
             }
             }
             if (bs->encrypted) {
             if (bs->encrypted) {
                 assert(s->cipher);
                 assert(s->cipher);
+                assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
+                assert((cur_bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
                 Error *err = NULL;
                 Error *err = NULL;
-                if (qcow2_encrypt_sectors(s, sector_num,  cluster_data,
-                                          cluster_data, cur_nr_sectors, false,
-                                          &err) < 0) {
+                if (qcow2_encrypt_sectors(s, offset >> BDRV_SECTOR_BITS,
+                                          cluster_data, cluster_data,
+                                          cur_bytes >> BDRV_SECTOR_BITS,
+                                          false, &err) < 0) {
                     error_free(err);
                     error_free(err);
                     ret = -EIO;
                     ret = -EIO;
                     goto fail;
                     goto fail;
                 }
                 }
-                qemu_iovec_from_buf(qiov, bytes_done,
-                    cluster_data, 512 * cur_nr_sectors);
+                qemu_iovec_from_buf(qiov, bytes_done, cluster_data, cur_bytes);
             }
             }
             break;
             break;
 
 
@@ -1519,9 +1535,9 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
             goto fail;
             goto fail;
         }
         }
 
 
-        remaining_sectors -= cur_nr_sectors;
-        sector_num += cur_nr_sectors;
-        bytes_done += cur_nr_sectors * 512;
+        bytes -= cur_bytes;
+        offset += cur_bytes;
+        bytes_done += cur_bytes;
     }
     }
     ret = 0;
     ret = 0;
 
 
@@ -1534,23 +1550,21 @@ fail:
     return ret;
     return ret;
 }
 }
 
 
-static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
-                           int64_t sector_num,
-                           int remaining_sectors,
-                           QEMUIOVector *qiov)
+static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
+                                         uint64_t bytes, QEMUIOVector *qiov,
+                                         int flags)
 {
 {
     BDRVQcow2State *s = bs->opaque;
     BDRVQcow2State *s = bs->opaque;
-    int index_in_cluster;
+    int offset_in_cluster;
     int ret;
     int ret;
-    int cur_nr_sectors; /* number of sectors in current iteration */
+    unsigned int cur_bytes; /* number of sectors in current iteration */
     uint64_t cluster_offset;
     uint64_t cluster_offset;
     QEMUIOVector hd_qiov;
     QEMUIOVector hd_qiov;
     uint64_t bytes_done = 0;
     uint64_t bytes_done = 0;
     uint8_t *cluster_data = NULL;
     uint8_t *cluster_data = NULL;
     QCowL2Meta *l2meta = NULL;
     QCowL2Meta *l2meta = NULL;
 
 
-    trace_qcow2_writev_start_req(qemu_coroutine_self(), sector_num,
-                                 remaining_sectors);
+    trace_qcow2_writev_start_req(qemu_coroutine_self(), offset, bytes);
 
 
     qemu_iovec_init(&hd_qiov, qiov->niov);
     qemu_iovec_init(&hd_qiov, qiov->niov);
 
 
@@ -1558,22 +1572,21 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
 
 
     qemu_co_mutex_lock(&s->lock);
     qemu_co_mutex_lock(&s->lock);
 
 
-    while (remaining_sectors != 0) {
+    while (bytes != 0) {
 
 
         l2meta = NULL;
         l2meta = NULL;
 
 
         trace_qcow2_writev_start_part(qemu_coroutine_self());
         trace_qcow2_writev_start_part(qemu_coroutine_self());
-        index_in_cluster = sector_num & (s->cluster_sectors - 1);
-        cur_nr_sectors = remaining_sectors;
-        if (bs->encrypted &&
-            cur_nr_sectors >
-            QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors - index_in_cluster) {
-            cur_nr_sectors =
-                QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors - index_in_cluster;
+        offset_in_cluster = offset_into_cluster(s, offset);
+        cur_bytes = MIN(bytes, INT_MAX);
+        if (bs->encrypted) {
+            cur_bytes = MIN(cur_bytes,
+                            QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size
+                            - offset_in_cluster);
         }
         }
 
 
-        ret = qcow2_alloc_cluster_offset(bs, sector_num << 9,
-            &cur_nr_sectors, &cluster_offset, &l2meta);
+        ret = qcow2_alloc_cluster_offset(bs, offset, &cur_bytes,
+                                         &cluster_offset, &l2meta);
         if (ret < 0) {
         if (ret < 0) {
             goto fail;
             goto fail;
         }
         }
@@ -1581,8 +1594,7 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
         assert((cluster_offset & 511) == 0);
         assert((cluster_offset & 511) == 0);
 
 
         qemu_iovec_reset(&hd_qiov);
         qemu_iovec_reset(&hd_qiov);
-        qemu_iovec_concat(&hd_qiov, qiov, bytes_done,
-            cur_nr_sectors * 512);
+        qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_bytes);
 
 
         if (bs->encrypted) {
         if (bs->encrypted) {
             Error *err = NULL;
             Error *err = NULL;
@@ -1601,8 +1613,9 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
                    QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
                    QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
             qemu_iovec_to_buf(&hd_qiov, 0, cluster_data, hd_qiov.size);
             qemu_iovec_to_buf(&hd_qiov, 0, cluster_data, hd_qiov.size);
 
 
-            if (qcow2_encrypt_sectors(s, sector_num, cluster_data,
-                                      cluster_data, cur_nr_sectors,
+            if (qcow2_encrypt_sectors(s, offset >> BDRV_SECTOR_BITS,
+                                      cluster_data, cluster_data,
+                                      cur_bytes >>BDRV_SECTOR_BITS,
                                       true, &err) < 0) {
                                       true, &err) < 0) {
                 error_free(err);
                 error_free(err);
                 ret = -EIO;
                 ret = -EIO;
@@ -1610,13 +1623,11 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
             }
             }
 
 
             qemu_iovec_reset(&hd_qiov);
             qemu_iovec_reset(&hd_qiov);
-            qemu_iovec_add(&hd_qiov, cluster_data,
-                cur_nr_sectors * 512);
+            qemu_iovec_add(&hd_qiov, cluster_data, cur_bytes);
         }
         }
 
 
         ret = qcow2_pre_write_overlap_check(bs, 0,
         ret = qcow2_pre_write_overlap_check(bs, 0,
-                cluster_offset + index_in_cluster * BDRV_SECTOR_SIZE,
-                cur_nr_sectors * BDRV_SECTOR_SIZE);
+                cluster_offset + offset_in_cluster, cur_bytes);
         if (ret < 0) {
         if (ret < 0) {
             goto fail;
             goto fail;
         }
         }
@@ -1624,10 +1635,10 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
         qemu_co_mutex_unlock(&s->lock);
         qemu_co_mutex_unlock(&s->lock);
         BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
         BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
         trace_qcow2_writev_data(qemu_coroutine_self(),
         trace_qcow2_writev_data(qemu_coroutine_self(),
-                                (cluster_offset >> 9) + index_in_cluster);
-        ret = bdrv_co_writev(bs->file->bs,
-                             (cluster_offset >> 9) + index_in_cluster,
-                             cur_nr_sectors, &hd_qiov);
+                                cluster_offset + offset_in_cluster);
+        ret = bdrv_co_pwritev(bs->file->bs,
+                              cluster_offset + offset_in_cluster,
+                              cur_bytes, &hd_qiov, 0);
         qemu_co_mutex_lock(&s->lock);
         qemu_co_mutex_lock(&s->lock);
         if (ret < 0) {
         if (ret < 0) {
             goto fail;
             goto fail;
@@ -1653,10 +1664,10 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
             l2meta = next;
             l2meta = next;
         }
         }
 
 
-        remaining_sectors -= cur_nr_sectors;
-        sector_num += cur_nr_sectors;
-        bytes_done += cur_nr_sectors * 512;
-        trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_nr_sectors);
+        bytes -= cur_bytes;
+        offset += cur_bytes;
+        bytes_done += cur_bytes;
+        trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_bytes);
     }
     }
     ret = 0;
     ret = 0;
 
 
@@ -1998,19 +2009,19 @@ static int qcow2_change_backing_file(BlockDriverState *bs,
 
 
 static int preallocate(BlockDriverState *bs)
 static int preallocate(BlockDriverState *bs)
 {
 {
-    uint64_t nb_sectors;
+    uint64_t bytes;
     uint64_t offset;
     uint64_t offset;
     uint64_t host_offset = 0;
     uint64_t host_offset = 0;
-    int num;
+    unsigned int cur_bytes;
     int ret;
     int ret;
     QCowL2Meta *meta;
     QCowL2Meta *meta;
 
 
-    nb_sectors = bdrv_nb_sectors(bs);
+    bytes = bdrv_getlength(bs);
     offset = 0;
     offset = 0;
 
 
-    while (nb_sectors) {
-        num = MIN(nb_sectors, INT_MAX >> BDRV_SECTOR_BITS);
-        ret = qcow2_alloc_cluster_offset(bs, offset, &num,
+    while (bytes) {
+        cur_bytes = MIN(bytes, INT_MAX);
+        ret = qcow2_alloc_cluster_offset(bs, offset, &cur_bytes,
                                          &host_offset, &meta);
                                          &host_offset, &meta);
         if (ret < 0) {
         if (ret < 0) {
             return ret;
             return ret;
@@ -2036,8 +2047,8 @@ static int preallocate(BlockDriverState *bs)
 
 
         /* TODO Preallocate data if requested */
         /* TODO Preallocate data if requested */
 
 
-        nb_sectors -= num;
-        offset += num << BDRV_SECTOR_BITS;
+        bytes -= cur_bytes;
+        offset += cur_bytes;
     }
     }
 
 
     /*
     /*
@@ -2046,11 +2057,9 @@ static int preallocate(BlockDriverState *bs)
      * EOF). Extend the image to the last allocated sector.
      * EOF). Extend the image to the last allocated sector.
      */
      */
     if (host_offset != 0) {
     if (host_offset != 0) {
-        uint8_t buf[BDRV_SECTOR_SIZE];
-        memset(buf, 0, BDRV_SECTOR_SIZE);
-        ret = bdrv_write(bs->file->bs,
-                         (host_offset >> BDRV_SECTOR_BITS) + num - 1,
-                         buf, 1);
+        uint8_t data = 0;
+        ret = bdrv_pwrite(bs->file->bs, (host_offset + cur_bytes) - 1,
+                          &data, 1);
         if (ret < 0) {
         if (ret < 0) {
             return ret;
             return ret;
         }
         }
@@ -2435,7 +2444,7 @@ static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs,
     if (head || tail) {
     if (head || tail) {
         int64_t cl_start = (offset - head) >> BDRV_SECTOR_BITS;
         int64_t cl_start = (offset - head) >> BDRV_SECTOR_BITS;
         uint64_t off;
         uint64_t off;
-        int nr;
+        unsigned int nr;
 
 
         assert(head + count <= s->cluster_size);
         assert(head + count <= s->cluster_size);
 
 
@@ -2452,7 +2461,7 @@ static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs,
         /* We can have new write after previous check */
         /* We can have new write after previous check */
         offset = cl_start << BDRV_SECTOR_BITS;
         offset = cl_start << BDRV_SECTOR_BITS;
         count = s->cluster_size;
         count = s->cluster_size;
-        nr = s->cluster_sectors;
+        nr = s->cluster_size;
         ret = qcow2_get_cluster_offset(bs, offset, &nr, &off);
         ret = qcow2_get_cluster_offset(bs, offset, &nr, &off);
         if (ret != QCOW2_CLUSTER_UNALLOCATED && ret != QCOW2_CLUSTER_ZERO) {
         if (ret != QCOW2_CLUSTER_UNALLOCATED && ret != QCOW2_CLUSTER_ZERO) {
             qemu_co_mutex_unlock(&s->lock);
             qemu_co_mutex_unlock(&s->lock);
@@ -2900,36 +2909,20 @@ static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
                               int64_t pos)
                               int64_t pos)
 {
 {
     BDRVQcow2State *s = bs->opaque;
     BDRVQcow2State *s = bs->opaque;
-    int64_t total_sectors = bs->total_sectors;
-    bool zero_beyond_eof = bs->zero_beyond_eof;
-    int ret;
 
 
     BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE);
     BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE);
-    bs->zero_beyond_eof = false;
-    ret = bdrv_pwritev(bs, qcow2_vm_state_offset(s) + pos, qiov);
-    bs->zero_beyond_eof = zero_beyond_eof;
-
-    /* bdrv_co_do_writev will have increased the total_sectors value to include
-     * the VM state - the VM state is however not an actual part of the block
-     * device, therefore, we need to restore the old value. */
-    bs->total_sectors = total_sectors;
-
-    return ret;
+    return bs->drv->bdrv_co_pwritev(bs, qcow2_vm_state_offset(s) + pos,
+                                    qiov->size, qiov, 0);
 }
 }
 
 
-static int qcow2_load_vmstate(BlockDriverState *bs, uint8_t *buf,
-                              int64_t pos, int size)
+static int qcow2_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
+                              int64_t pos)
 {
 {
     BDRVQcow2State *s = bs->opaque;
     BDRVQcow2State *s = bs->opaque;
-    bool zero_beyond_eof = bs->zero_beyond_eof;
-    int ret;
 
 
     BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_LOAD);
     BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_LOAD);
-    bs->zero_beyond_eof = false;
-    ret = bdrv_pread(bs, qcow2_vm_state_offset(s) + pos, buf, size);
-    bs->zero_beyond_eof = zero_beyond_eof;
-
-    return ret;
+    return bs->drv->bdrv_co_preadv(bs, qcow2_vm_state_offset(s) + pos,
+                                   qiov->size, qiov, 0);
 }
 }
 
 
 /*
 /*
@@ -3368,8 +3361,8 @@ BlockDriver bdrv_qcow2 = {
     .bdrv_co_get_block_status = qcow2_co_get_block_status,
     .bdrv_co_get_block_status = qcow2_co_get_block_status,
     .bdrv_set_key       = qcow2_set_key,
     .bdrv_set_key       = qcow2_set_key,
 
 
-    .bdrv_co_readv          = qcow2_co_readv,
-    .bdrv_co_writev         = qcow2_co_writev,
+    .bdrv_co_preadv         = qcow2_co_preadv,
+    .bdrv_co_pwritev        = qcow2_co_pwritev,
     .bdrv_co_flush_to_os    = qcow2_co_flush_to_os,
     .bdrv_co_flush_to_os    = qcow2_co_flush_to_os,
 
 
     .bdrv_co_pwrite_zeroes  = qcow2_co_pwrite_zeroes,
     .bdrv_co_pwrite_zeroes  = qcow2_co_pwrite_zeroes,

+ 6 - 12
block/qcow2.h

@@ -302,8 +302,8 @@ typedef struct Qcow2COWRegion {
      */
      */
     uint64_t    offset;
     uint64_t    offset;
 
 
-    /** Number of sectors to copy */
-    int         nb_sectors;
+    /** Number of bytes to copy */
+    int         nb_bytes;
 } Qcow2COWRegion;
 } Qcow2COWRegion;
 
 
 /**
 /**
@@ -318,12 +318,6 @@ typedef struct QCowL2Meta
     /** Host offset of the first newly allocated cluster */
     /** Host offset of the first newly allocated cluster */
     uint64_t alloc_offset;
     uint64_t alloc_offset;
 
 
-    /**
-     * Number of sectors from the start of the first allocated cluster to
-     * the end of the (possibly shortened) request
-     */
-    int nb_available;
-
     /** Number of newly allocated clusters */
     /** Number of newly allocated clusters */
     int nb_clusters;
     int nb_clusters;
 
 
@@ -471,8 +465,7 @@ static inline uint64_t l2meta_cow_start(QCowL2Meta *m)
 
 
 static inline uint64_t l2meta_cow_end(QCowL2Meta *m)
 static inline uint64_t l2meta_cow_end(QCowL2Meta *m)
 {
 {
-    return m->offset + m->cow_end.offset
-        + (m->cow_end.nb_sectors << BDRV_SECTOR_BITS);
+    return m->offset + m->cow_end.offset + m->cow_end.nb_bytes;
 }
 }
 
 
 static inline uint64_t refcount_diff(uint64_t r1, uint64_t r2)
 static inline uint64_t refcount_diff(uint64_t r1, uint64_t r2)
@@ -544,9 +537,10 @@ int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num,
                           int nb_sectors, bool enc, Error **errp);
                           int nb_sectors, bool enc, Error **errp);
 
 
 int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
 int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
-    int *num, uint64_t *cluster_offset);
+                             unsigned int *bytes, uint64_t *cluster_offset);
 int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
 int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
-    int *num, uint64_t *host_offset, QCowL2Meta **m);
+                               unsigned int *bytes, uint64_t *host_offset,
+                               QCowL2Meta **m);
 uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
 uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
                                          uint64_t offset,
                                          uint64_t offset,
                                          int compressed_size);
                                          int compressed_size);

+ 3 - 0
block/raw-aio.h

@@ -15,6 +15,7 @@
 #ifndef QEMU_RAW_AIO_H
 #ifndef QEMU_RAW_AIO_H
 #define QEMU_RAW_AIO_H
 #define QEMU_RAW_AIO_H
 
 
+#include "qemu/coroutine.h"
 #include "qemu/iov.h"
 #include "qemu/iov.h"
 
 
 /* AIO request types */
 /* AIO request types */
@@ -38,6 +39,8 @@
 typedef struct LinuxAioState LinuxAioState;
 typedef struct LinuxAioState LinuxAioState;
 LinuxAioState *laio_init(void);
 LinuxAioState *laio_init(void);
 void laio_cleanup(LinuxAioState *s);
 void laio_cleanup(LinuxAioState *s);
+int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
+                                uint64_t offset, QEMUIOVector *qiov, int type);
 BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
 BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
         BlockCompletionFunc *cb, void *opaque, int type);
         BlockCompletionFunc *cb, void *opaque, int type);

+ 30 - 32
block/raw-posix.c

@@ -1325,14 +1325,13 @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd,
     return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
     return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
 }
 }
 
 
-static BlockAIOCB *raw_aio_submit(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque, int type)
+static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
+                                   uint64_t bytes, QEMUIOVector *qiov, int type)
 {
 {
     BDRVRawState *s = bs->opaque;
     BDRVRawState *s = bs->opaque;
 
 
     if (fd_open(bs) < 0)
     if (fd_open(bs) < 0)
-        return NULL;
+        return -EIO;
 
 
     /*
     /*
      * Check if the underlying device requires requests to be aligned,
      * Check if the underlying device requires requests to be aligned,
@@ -1345,14 +1344,28 @@ static BlockAIOCB *raw_aio_submit(BlockDriverState *bs,
             type |= QEMU_AIO_MISALIGNED;
             type |= QEMU_AIO_MISALIGNED;
 #ifdef CONFIG_LINUX_AIO
 #ifdef CONFIG_LINUX_AIO
         } else if (s->use_aio) {
         } else if (s->use_aio) {
-            return laio_submit(bs, s->aio_ctx, s->fd, sector_num, qiov,
-                               nb_sectors, cb, opaque, type);
+            assert(qiov->size == bytes);
+            return laio_co_submit(bs, s->aio_ctx, s->fd, offset, qiov, type);
 #endif
 #endif
         }
         }
     }
     }
 
 
-    return paio_submit(bs, s->fd, sector_num, qiov, nb_sectors,
-                       cb, opaque, type);
+    return paio_submit_co(bs, s->fd, offset, qiov, bytes, type);
+}
+
+static int coroutine_fn raw_co_preadv(BlockDriverState *bs, uint64_t offset,
+                                      uint64_t bytes, QEMUIOVector *qiov,
+                                      int flags)
+{
+    return raw_co_prw(bs, offset, bytes, qiov, QEMU_AIO_READ);
+}
+
+static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset,
+                                       uint64_t bytes, QEMUIOVector *qiov,
+                                       int flags)
+{
+    assert(flags == 0);
+    return raw_co_prw(bs, offset, bytes, qiov, QEMU_AIO_WRITE);
 }
 }
 
 
 static void raw_aio_plug(BlockDriverState *bs)
 static void raw_aio_plug(BlockDriverState *bs)
@@ -1375,22 +1388,6 @@ static void raw_aio_unplug(BlockDriverState *bs)
 #endif
 #endif
 }
 }
 
 
-static BlockAIOCB *raw_aio_readv(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque)
-{
-    return raw_aio_submit(bs, sector_num, qiov, nb_sectors,
-                          cb, opaque, QEMU_AIO_READ);
-}
-
-static BlockAIOCB *raw_aio_writev(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque)
-{
-    return raw_aio_submit(bs, sector_num, qiov, nb_sectors,
-                          cb, opaque, QEMU_AIO_WRITE);
-}
-
 static BlockAIOCB *raw_aio_flush(BlockDriverState *bs,
 static BlockAIOCB *raw_aio_flush(BlockDriverState *bs,
         BlockCompletionFunc *cb, void *opaque)
         BlockCompletionFunc *cb, void *opaque)
 {
 {
@@ -1957,8 +1954,8 @@ BlockDriver bdrv_file = {
     .bdrv_co_get_block_status = raw_co_get_block_status,
     .bdrv_co_get_block_status = raw_co_get_block_status,
     .bdrv_co_pwrite_zeroes = raw_co_pwrite_zeroes,
     .bdrv_co_pwrite_zeroes = raw_co_pwrite_zeroes,
 
 
-    .bdrv_aio_readv = raw_aio_readv,
-    .bdrv_aio_writev = raw_aio_writev,
+    .bdrv_co_preadv         = raw_co_preadv,
+    .bdrv_co_pwritev        = raw_co_pwritev,
     .bdrv_aio_flush = raw_aio_flush,
     .bdrv_aio_flush = raw_aio_flush,
     .bdrv_aio_discard = raw_aio_discard,
     .bdrv_aio_discard = raw_aio_discard,
     .bdrv_refresh_limits = raw_refresh_limits,
     .bdrv_refresh_limits = raw_refresh_limits,
@@ -2405,8 +2402,8 @@ static BlockDriver bdrv_host_device = {
     .create_opts         = &raw_create_opts,
     .create_opts         = &raw_create_opts,
     .bdrv_co_pwrite_zeroes = hdev_co_pwrite_zeroes,
     .bdrv_co_pwrite_zeroes = hdev_co_pwrite_zeroes,
 
 
-    .bdrv_aio_readv	= raw_aio_readv,
-    .bdrv_aio_writev	= raw_aio_writev,
+    .bdrv_co_preadv         = raw_co_preadv,
+    .bdrv_co_pwritev        = raw_co_pwritev,
     .bdrv_aio_flush	= raw_aio_flush,
     .bdrv_aio_flush	= raw_aio_flush,
     .bdrv_aio_discard   = hdev_aio_discard,
     .bdrv_aio_discard   = hdev_aio_discard,
     .bdrv_refresh_limits = raw_refresh_limits,
     .bdrv_refresh_limits = raw_refresh_limits,
@@ -2535,8 +2532,9 @@ static BlockDriver bdrv_host_cdrom = {
     .bdrv_create         = hdev_create,
     .bdrv_create         = hdev_create,
     .create_opts         = &raw_create_opts,
     .create_opts         = &raw_create_opts,
 
 
-    .bdrv_aio_readv     = raw_aio_readv,
-    .bdrv_aio_writev    = raw_aio_writev,
+
+    .bdrv_co_preadv         = raw_co_preadv,
+    .bdrv_co_pwritev        = raw_co_pwritev,
     .bdrv_aio_flush	= raw_aio_flush,
     .bdrv_aio_flush	= raw_aio_flush,
     .bdrv_refresh_limits = raw_refresh_limits,
     .bdrv_refresh_limits = raw_refresh_limits,
     .bdrv_io_plug = raw_aio_plug,
     .bdrv_io_plug = raw_aio_plug,
@@ -2670,8 +2668,8 @@ static BlockDriver bdrv_host_cdrom = {
     .bdrv_create        = hdev_create,
     .bdrv_create        = hdev_create,
     .create_opts        = &raw_create_opts,
     .create_opts        = &raw_create_opts,
 
 
-    .bdrv_aio_readv     = raw_aio_readv,
-    .bdrv_aio_writev    = raw_aio_writev,
+    .bdrv_co_preadv         = raw_co_preadv,
+    .bdrv_co_pwritev        = raw_co_pwritev,
     .bdrv_aio_flush	= raw_aio_flush,
     .bdrv_aio_flush	= raw_aio_flush,
     .bdrv_refresh_limits = raw_refresh_limits,
     .bdrv_refresh_limits = raw_refresh_limits,
     .bdrv_io_plug = raw_aio_plug,
     .bdrv_io_plug = raw_aio_plug,

+ 23 - 15
block/rbd.c

@@ -290,7 +290,8 @@ static int qemu_rbd_set_conf(rados_t cluster, const char *conf,
             if (only_read_conf_file) {
             if (only_read_conf_file) {
                 ret = rados_conf_read_file(cluster, value);
                 ret = rados_conf_read_file(cluster, value);
                 if (ret < 0) {
                 if (ret < 0) {
-                    error_setg(errp, "error reading conf file %s", value);
+                    error_setg_errno(errp, -ret, "error reading conf file %s",
+                                     value);
                     break;
                     break;
                 }
                 }
             }
             }
@@ -299,7 +300,7 @@ static int qemu_rbd_set_conf(rados_t cluster, const char *conf,
         } else if (!only_read_conf_file) {
         } else if (!only_read_conf_file) {
             ret = rados_conf_set(cluster, name, value);
             ret = rados_conf_set(cluster, name, value);
             if (ret < 0) {
             if (ret < 0) {
-                error_setg(errp, "invalid conf option %s", name);
+                error_setg_errno(errp, -ret, "invalid conf option %s", name);
                 ret = -EINVAL;
                 ret = -EINVAL;
                 break;
                 break;
             }
             }
@@ -354,9 +355,10 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
     }
     }
 
 
     clientname = qemu_rbd_parse_clientname(conf, clientname_buf);
     clientname = qemu_rbd_parse_clientname(conf, clientname_buf);
-    if (rados_create(&cluster, clientname) < 0) {
-        error_setg(errp, "error initializing");
-        return -EIO;
+    ret = rados_create(&cluster, clientname);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "error initializing");
+        return ret;
     }
     }
 
 
     if (strstr(conf, "conf=") == NULL) {
     if (strstr(conf, "conf=") == NULL) {
@@ -381,21 +383,27 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
         return -EIO;
         return -EIO;
     }
     }
 
 
-    if (rados_connect(cluster) < 0) {
-        error_setg(errp, "error connecting");
+    ret = rados_connect(cluster);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "error connecting");
         rados_shutdown(cluster);
         rados_shutdown(cluster);
-        return -EIO;
+        return ret;
     }
     }
 
 
-    if (rados_ioctx_create(cluster, pool, &io_ctx) < 0) {
-        error_setg(errp, "error opening pool %s", pool);
+    ret = rados_ioctx_create(cluster, pool, &io_ctx);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "error opening pool %s", pool);
         rados_shutdown(cluster);
         rados_shutdown(cluster);
-        return -EIO;
+        return ret;
     }
     }
 
 
     ret = rbd_create(io_ctx, name, bytes, &obj_order);
     ret = rbd_create(io_ctx, name, bytes, &obj_order);
     rados_ioctx_destroy(io_ctx);
     rados_ioctx_destroy(io_ctx);
     rados_shutdown(cluster);
     rados_shutdown(cluster);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "error rbd create");
+        return ret;
+    }
 
 
     return ret;
     return ret;
 }
 }
@@ -500,7 +508,7 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
     clientname = qemu_rbd_parse_clientname(conf, clientname_buf);
     clientname = qemu_rbd_parse_clientname(conf, clientname_buf);
     r = rados_create(&s->cluster, clientname);
     r = rados_create(&s->cluster, clientname);
     if (r < 0) {
     if (r < 0) {
-        error_setg(errp, "error initializing");
+        error_setg_errno(errp, -r, "error initializing");
         goto failed_opts;
         goto failed_opts;
     }
     }
 
 
@@ -546,19 +554,19 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
 
 
     r = rados_connect(s->cluster);
     r = rados_connect(s->cluster);
     if (r < 0) {
     if (r < 0) {
-        error_setg(errp, "error connecting");
+        error_setg_errno(errp, -r, "error connecting");
         goto failed_shutdown;
         goto failed_shutdown;
     }
     }
 
 
     r = rados_ioctx_create(s->cluster, pool, &s->io_ctx);
     r = rados_ioctx_create(s->cluster, pool, &s->io_ctx);
     if (r < 0) {
     if (r < 0) {
-        error_setg(errp, "error opening pool %s", pool);
+        error_setg_errno(errp, -r, "error opening pool %s", pool);
         goto failed_shutdown;
         goto failed_shutdown;
     }
     }
 
 
     r = rbd_open(s->io_ctx, s->name, &s->image, s->snap);
     r = rbd_open(s->io_ctx, s->name, &s->image, s->snap);
     if (r < 0) {
     if (r < 0) {
-        error_setg(errp, "error reading header from %s", s->name);
+        error_setg_errno(errp, -r, "error reading header from %s", s->name);
         goto failed_open;
         goto failed_open;
     }
     }
 
 

+ 10 - 3
block/sheepdog.c

@@ -2784,12 +2784,19 @@ static int sd_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
     return ret;
     return ret;
 }
 }
 
 
-static int sd_load_vmstate(BlockDriverState *bs, uint8_t *data,
-                           int64_t pos, int size)
+static int sd_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
+                           int64_t pos)
 {
 {
     BDRVSheepdogState *s = bs->opaque;
     BDRVSheepdogState *s = bs->opaque;
+    void *buf;
+    int ret;
 
 
-    return do_load_save_vmstate(s, data, pos, size, 1);
+    buf = qemu_blockalign(bs, qiov->size);
+    ret = do_load_save_vmstate(s, buf, pos, qiov->size, 1);
+    qemu_iovec_from_buf(qiov, 0, buf, qiov->size);
+    qemu_vfree(buf);
+
+    return ret;
 }
 }
 
 
 
 

+ 25 - 17
blockdev.c

@@ -2544,6 +2544,7 @@ void qmp_blockdev_change_medium(const char *device, const char *filename,
     BlockBackend *blk;
     BlockBackend *blk;
     BlockDriverState *medium_bs = NULL;
     BlockDriverState *medium_bs = NULL;
     int bdrv_flags;
     int bdrv_flags;
+    int rc;
     QDict *options = NULL;
     QDict *options = NULL;
     Error *err = NULL;
     Error *err = NULL;
 
 
@@ -2598,11 +2599,13 @@ void qmp_blockdev_change_medium(const char *device, const char *filename,
         goto fail;
         goto fail;
     }
     }
 
 
-    qmp_blockdev_open_tray(device, false, false, &err);
-    if (err) {
+    rc = do_open_tray(device, false, &err);
+    if (rc && rc != -ENOSYS) {
         error_propagate(errp, err);
         error_propagate(errp, err);
         goto fail;
         goto fail;
     }
     }
+    error_free(err);
+    err = NULL;
 
 
     qmp_x_blockdev_remove_medium(device, &err);
     qmp_x_blockdev_remove_medium(device, &err);
     if (err) {
     if (err) {
@@ -3423,6 +3426,7 @@ static void blockdev_mirror_common(BlockDriverState *bs,
                                    BlockDriverState *target,
                                    BlockDriverState *target,
                                    bool has_replaces, const char *replaces,
                                    bool has_replaces, const char *replaces,
                                    enum MirrorSyncMode sync,
                                    enum MirrorSyncMode sync,
+                                   BlockMirrorBackingMode backing_mode,
                                    bool has_speed, int64_t speed,
                                    bool has_speed, int64_t speed,
                                    bool has_granularity, uint32_t granularity,
                                    bool has_granularity, uint32_t granularity,
                                    bool has_buf_size, int64_t buf_size,
                                    bool has_buf_size, int64_t buf_size,
@@ -3480,7 +3484,7 @@ static void blockdev_mirror_common(BlockDriverState *bs,
      */
      */
     mirror_start(bs, target,
     mirror_start(bs, target,
                  has_replaces ? replaces : NULL,
                  has_replaces ? replaces : NULL,
-                 speed, granularity, buf_size, sync,
+                 speed, granularity, buf_size, sync, backing_mode,
                  on_source_error, on_target_error, unmap,
                  on_source_error, on_target_error, unmap,
                  block_job_cb, bs, errp);
                  block_job_cb, bs, errp);
 }
 }
@@ -3503,6 +3507,7 @@ void qmp_drive_mirror(const char *device, const char *target,
     BlockBackend *blk;
     BlockBackend *blk;
     BlockDriverState *source, *target_bs;
     BlockDriverState *source, *target_bs;
     AioContext *aio_context;
     AioContext *aio_context;
+    BlockMirrorBackingMode backing_mode;
     Error *local_err = NULL;
     Error *local_err = NULL;
     QDict *options = NULL;
     QDict *options = NULL;
     int flags;
     int flags;
@@ -3576,6 +3581,12 @@ void qmp_drive_mirror(const char *device, const char *target,
         }
         }
     }
     }
 
 
+    if (mode == NEW_IMAGE_MODE_ABSOLUTE_PATHS) {
+        backing_mode = MIRROR_SOURCE_BACKING_CHAIN;
+    } else {
+        backing_mode = MIRROR_OPEN_BACKING_CHAIN;
+    }
+
     if ((sync == MIRROR_SYNC_MODE_FULL || !source)
     if ((sync == MIRROR_SYNC_MODE_FULL || !source)
         && mode != NEW_IMAGE_MODE_EXISTING)
         && mode != NEW_IMAGE_MODE_EXISTING)
     {
     {
@@ -3624,7 +3635,7 @@ void qmp_drive_mirror(const char *device, const char *target,
     bdrv_set_aio_context(target_bs, aio_context);
     bdrv_set_aio_context(target_bs, aio_context);
 
 
     blockdev_mirror_common(bs, target_bs,
     blockdev_mirror_common(bs, target_bs,
-                           has_replaces, replaces, sync,
+                           has_replaces, replaces, sync, backing_mode,
                            has_speed, speed,
                            has_speed, speed,
                            has_granularity, granularity,
                            has_granularity, granularity,
                            has_buf_size, buf_size,
                            has_buf_size, buf_size,
@@ -3656,6 +3667,7 @@ void qmp_blockdev_mirror(const char *device, const char *target,
     BlockBackend *blk;
     BlockBackend *blk;
     BlockDriverState *target_bs;
     BlockDriverState *target_bs;
     AioContext *aio_context;
     AioContext *aio_context;
+    BlockMirrorBackingMode backing_mode = MIRROR_LEAVE_BACKING_CHAIN;
     Error *local_err = NULL;
     Error *local_err = NULL;
 
 
     blk = blk_by_name(device);
     blk = blk_by_name(device);
@@ -3681,7 +3693,7 @@ void qmp_blockdev_mirror(const char *device, const char *target,
     bdrv_set_aio_context(target_bs, aio_context);
     bdrv_set_aio_context(target_bs, aio_context);
 
 
     blockdev_mirror_common(bs, target_bs,
     blockdev_mirror_common(bs, target_bs,
-                           has_replaces, replaces, sync,
+                           has_replaces, replaces, sync, backing_mode,
                            has_speed, speed,
                            has_speed, speed,
                            has_granularity, granularity,
                            has_granularity, granularity,
                            has_buf_size, buf_size,
                            has_buf_size, buf_size,
@@ -4154,22 +4166,18 @@ void qmp_x_blockdev_change(const char *parent, bool has_child,
 BlockJobInfoList *qmp_query_block_jobs(Error **errp)
 BlockJobInfoList *qmp_query_block_jobs(Error **errp)
 {
 {
     BlockJobInfoList *head = NULL, **p_next = &head;
     BlockJobInfoList *head = NULL, **p_next = &head;
-    BlockDriverState *bs;
-    BdrvNextIterator it;
+    BlockJob *job;
 
 
-    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
-        AioContext *aio_context = bdrv_get_aio_context(bs);
+    for (job = block_job_next(NULL); job; job = block_job_next(job)) {
+        BlockJobInfoList *elem = g_new0(BlockJobInfoList, 1);
+        AioContext *aio_context = blk_get_aio_context(job->blk);
 
 
         aio_context_acquire(aio_context);
         aio_context_acquire(aio_context);
-
-        if (bs->job) {
-            BlockJobInfoList *elem = g_new0(BlockJobInfoList, 1);
-            elem->value = block_job_query(bs->job);
-            *p_next = elem;
-            p_next = &elem->next;
-        }
-
+        elem->value = block_job_query(job);
         aio_context_release(aio_context);
         aio_context_release(aio_context);
+
+        *p_next = elem;
+        p_next = &elem->next;
     }
     }
 
 
     return head;
     return head;

+ 4 - 2
blockjob.c

@@ -361,10 +361,12 @@ void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns)
     }
     }
 
 
     job->busy = false;
     job->busy = false;
+    if (!block_job_is_paused(job)) {
+        co_aio_sleep_ns(blk_get_aio_context(job->blk), type, ns);
+    }
+    /* The job can be paused while sleeping, so check this again */
     if (block_job_is_paused(job)) {
     if (block_job_is_paused(job)) {
         qemu_coroutine_yield();
         qemu_coroutine_yield();
-    } else {
-        co_aio_sleep_ns(blk_get_aio_context(job->blk), type, ns);
     }
     }
     job->busy = true;
     job->busy = true;
 }
 }

+ 5 - 0
hmp.c

@@ -1955,7 +1955,12 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict)
 
 
     blk = blk_by_name(device);
     blk = blk_by_name(device);
     if (blk) {
     if (blk) {
+        AioContext *aio_context = blk_get_aio_context(blk);
+        aio_context_acquire(aio_context);
+
         qemuio_command(blk, command);
         qemuio_command(blk, command);
+
+        aio_context_release(aio_context);
     } else {
     } else {
         error_set(&err, ERROR_CLASS_DEVICE_NOT_FOUND,
         error_set(&err, ERROR_CLASS_DEVICE_NOT_FOUND,
                   "Device '%s' not found", device);
                   "Device '%s' not found", device);

+ 1 - 1
hw/block/m25p80.c

@@ -900,7 +900,7 @@ static int m25p80_init(SSISlave *ss)
         s->storage = blk_blockalign(s->blk, s->size);
         s->storage = blk_blockalign(s->blk, s->size);
 
 
         /* FIXME: Move to late init */
         /* FIXME: Move to late init */
-        if (blk_pread(s->blk, 0, s->storage, s->size)) {
+        if (blk_pread(s->blk, 0, s->storage, s->size) != s->size) {
             fprintf(stderr, "Failed to initialize SPI flash!\n");
             fprintf(stderr, "Failed to initialize SPI flash!\n");
             return 1;
             return 1;
         }
         }

+ 12 - 3
include/block/block.h

@@ -65,6 +65,9 @@ typedef enum {
     BDRV_REQ_MAY_UNMAP          = 0x4,
     BDRV_REQ_MAY_UNMAP          = 0x4,
     BDRV_REQ_NO_SERIALISING     = 0x8,
     BDRV_REQ_NO_SERIALISING     = 0x8,
     BDRV_REQ_FUA                = 0x10,
     BDRV_REQ_FUA                = 0x10,
+
+    /* Mask of valid flags */
+    BDRV_REQ_MASK               = 0x1f,
 } BdrvRequestFlags;
 } BdrvRequestFlags;
 
 
 typedef struct BlockSizes {
 typedef struct BlockSizes {
@@ -232,6 +235,7 @@ int bdrv_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
 int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags);
 int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags);
 int bdrv_pread(BlockDriverState *bs, int64_t offset,
 int bdrv_pread(BlockDriverState *bs, int64_t offset,
                void *buf, int count);
                void *buf, int count);
+int bdrv_preadv(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov);
 int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
 int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
                 const void *buf, int count);
                 const void *buf, int count);
 int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov);
 int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov);
@@ -401,10 +405,14 @@ int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
                           const uint8_t *buf, int nb_sectors);
                           const uint8_t *buf, int nb_sectors);
 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi);
 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi);
 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs);
 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs);
+void bdrv_round_sectors_to_clusters(BlockDriverState *bs,
+                                    int64_t sector_num, int nb_sectors,
+                                    int64_t *cluster_sector_num,
+                                    int *cluster_nb_sectors);
 void bdrv_round_to_clusters(BlockDriverState *bs,
 void bdrv_round_to_clusters(BlockDriverState *bs,
-                            int64_t sector_num, int nb_sectors,
-                            int64_t *cluster_sector_num,
-                            int *cluster_nb_sectors);
+                            int64_t offset, unsigned int bytes,
+                            int64_t *cluster_offset,
+                            unsigned int *cluster_bytes);
 
 
 const char *bdrv_get_encrypted_filename(BlockDriverState *bs);
 const char *bdrv_get_encrypted_filename(BlockDriverState *bs);
 void bdrv_get_backing_filename(BlockDriverState *bs,
 void bdrv_get_backing_filename(BlockDriverState *bs,
@@ -423,6 +431,7 @@ void path_combine(char *dest, int dest_size,
                   const char *base_path,
                   const char *base_path,
                   const char *filename);
                   const char *filename);
 
 
+int bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
 int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
 int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
 int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
 int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
                       int64_t pos, int size);
                       int64_t pos, int size);

+ 23 - 8
include/block/block_int.h

@@ -224,10 +224,12 @@ struct BlockDriver {
     int (*bdrv_get_info)(BlockDriverState *bs, BlockDriverInfo *bdi);
     int (*bdrv_get_info)(BlockDriverState *bs, BlockDriverInfo *bdi);
     ImageInfoSpecific *(*bdrv_get_specific_info)(BlockDriverState *bs);
     ImageInfoSpecific *(*bdrv_get_specific_info)(BlockDriverState *bs);
 
 
-    int (*bdrv_save_vmstate)(BlockDriverState *bs, QEMUIOVector *qiov,
-                             int64_t pos);
-    int (*bdrv_load_vmstate)(BlockDriverState *bs, uint8_t *buf,
-                             int64_t pos, int size);
+    int coroutine_fn (*bdrv_save_vmstate)(BlockDriverState *bs,
+                                          QEMUIOVector *qiov,
+                                          int64_t pos);
+    int coroutine_fn (*bdrv_load_vmstate)(BlockDriverState *bs,
+                                          QEMUIOVector *qiov,
+                                          int64_t pos);
 
 
     int (*bdrv_change_backing_file)(BlockDriverState *bs,
     int (*bdrv_change_backing_file)(BlockDriverState *bs,
         const char *backing_file, const char *backing_fmt);
         const char *backing_file, const char *backing_fmt);
@@ -449,9 +451,6 @@ struct BlockDriverState {
     /* I/O Limits */
     /* I/O Limits */
     BlockLimits bl;
     BlockLimits bl;
 
 
-    /* Whether produces zeros when read beyond eof */
-    bool zero_beyond_eof;
-
     /* Alignment requirement for offset/length of I/O requests */
     /* Alignment requirement for offset/length of I/O requests */
     unsigned int request_alignment;
     unsigned int request_alignment;
     /* Flags honored during pwrite (so far: BDRV_REQ_FUA) */
     /* Flags honored during pwrite (so far: BDRV_REQ_FUA) */
@@ -510,6 +509,20 @@ struct BlockBackendRootState {
     BlockdevDetectZeroesOptions detect_zeroes;
     BlockdevDetectZeroesOptions detect_zeroes;
 };
 };
 
 
+typedef enum BlockMirrorBackingMode {
+    /* Reuse the existing backing chain from the source for the target.
+     * - sync=full: Set backing BDS to NULL.
+     * - sync=top:  Use source's backing BDS.
+     * - sync=none: Use source as the backing BDS. */
+    MIRROR_SOURCE_BACKING_CHAIN,
+
+    /* Open the target's backing chain completely anew */
+    MIRROR_OPEN_BACKING_CHAIN,
+
+    /* Do not change the target's backing BDS after job completion */
+    MIRROR_LEAVE_BACKING_CHAIN,
+} BlockMirrorBackingMode;
+
 static inline BlockDriverState *backing_bs(BlockDriverState *bs)
 static inline BlockDriverState *backing_bs(BlockDriverState *bs)
 {
 {
     return bs->backing ? bs->backing->bs : NULL;
     return bs->backing ? bs->backing->bs : NULL;
@@ -672,6 +685,7 @@ void commit_active_start(BlockDriverState *bs, BlockDriverState *base,
  * @granularity: The chosen granularity for the dirty bitmap.
  * @granularity: The chosen granularity for the dirty bitmap.
  * @buf_size: The amount of data that can be in flight at one time.
  * @buf_size: The amount of data that can be in flight at one time.
  * @mode: Whether to collapse all images in the chain to the target.
  * @mode: Whether to collapse all images in the chain to the target.
+ * @backing_mode: How to establish the target's backing chain after completion.
  * @on_source_error: The action to take upon error reading from the source.
  * @on_source_error: The action to take upon error reading from the source.
  * @on_target_error: The action to take upon error writing to the target.
  * @on_target_error: The action to take upon error writing to the target.
  * @unmap: Whether to unmap target where source sectors only contain zeroes.
  * @unmap: Whether to unmap target where source sectors only contain zeroes.
@@ -687,7 +701,8 @@ void commit_active_start(BlockDriverState *bs, BlockDriverState *base,
 void mirror_start(BlockDriverState *bs, BlockDriverState *target,
 void mirror_start(BlockDriverState *bs, BlockDriverState *target,
                   const char *replaces,
                   const char *replaces,
                   int64_t speed, uint32_t granularity, int64_t buf_size,
                   int64_t speed, uint32_t granularity, int64_t buf_size,
-                  MirrorSyncMode mode, BlockdevOnError on_source_error,
+                  MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
+                  BlockdevOnError on_source_error,
                   BlockdevOnError on_target_error,
                   BlockdevOnError on_target_error,
                   bool unmap,
                   bool unmap,
                   BlockCompletionFunc *cb,
                   BlockCompletionFunc *cb,

+ 1 - 1
qemu-img.c

@@ -3570,7 +3570,7 @@ static int img_bench(int argc, char **argv)
     BlockBackend *blk = NULL;
     BlockBackend *blk = NULL;
     BenchData data = {};
     BenchData data = {};
     int flags = 0;
     int flags = 0;
-    bool writethrough;
+    bool writethrough = false;
     struct timeval t1, t2;
     struct timeval t1, t2;
     int i;
     int i;
 
 

+ 2 - 10
tests/qemu-iotests/087.out

@@ -42,22 +42,14 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 encryption=on
 Testing: -S
 Testing: -S
 QMP_VERSION
 QMP_VERSION
 {"return": {}}
 {"return": {}}
-IMGFMT built-in AES encryption is deprecated
-Support for it will be removed in a future release.
-You can use 'qemu-img convert' to switch to an
-unencrypted IMGFMT image, or a LUKS raw image.
-{"error": {"class": "GenericError", "desc": "blockdev-add doesn't support encrypted devices"}}
+{"error": {"class": "GenericError", "desc": "Use of AES-CBC encrypted IMGFMT images is no longer supported in system emulators"}}
 {"return": {}}
 {"return": {}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN"}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN"}
 
 
 Testing:
 Testing:
 QMP_VERSION
 QMP_VERSION
 {"return": {}}
 {"return": {}}
-IMGFMT built-in AES encryption is deprecated
-Support for it will be removed in a future release.
-You can use 'qemu-img convert' to switch to an
-unencrypted IMGFMT image, or a LUKS raw image.
-{"error": {"class": "GenericError", "desc": "Guest must be stopped for opening of encrypted image"}}
+{"error": {"class": "GenericError", "desc": "Use of AES-CBC encrypted IMGFMT images is no longer supported in system emulators"}}
 {"return": {}}
 {"return": {}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN"}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN"}
 
 

+ 2 - 0
tests/qemu-iotests/095

@@ -74,6 +74,8 @@ _send_qemu_cmd $h "{ 'execute': 'block-commit',
                                  'arguments': { 'device': 'test',
                                  'arguments': { 'device': 'test',
                                  'top': '"${TEST_IMG}.snp1"' } }" "BLOCK_JOB_COMPLETED"
                                  'top': '"${TEST_IMG}.snp1"' } }" "BLOCK_JOB_COMPLETED"
 
 
+_cleanup_qemu
+
 echo
 echo
 echo "=== Base image info after commit and resize ==="
 echo "=== Base image info after commit and resize ==="
 TEST_IMG="${TEST_IMG}.base" _img_info | _filter_img_info
 TEST_IMG="${TEST_IMG}.base" _img_info | _filter_img_info

+ 261 - 0
tests/qemu-iotests/155

@@ -0,0 +1,261 @@
+#!/usr/bin/env python
+#
+# Test whether the backing BDSs are correct after completion of a
+# mirror block job; in "existing" modes (drive-mirror with
+# mode=existing and blockdev-mirror) the backing chain should not be
+# overridden.
+#
+# Copyright (C) 2016 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import os
+import iotests
+from iotests import qemu_img
+
+back0_img = os.path.join(iotests.test_dir, 'back0.' + iotests.imgfmt)
+back1_img = os.path.join(iotests.test_dir, 'back1.' + iotests.imgfmt)
+back2_img = os.path.join(iotests.test_dir, 'back2.' + iotests.imgfmt)
+source_img = os.path.join(iotests.test_dir, 'source.' + iotests.imgfmt)
+target_img = os.path.join(iotests.test_dir, 'target.' + iotests.imgfmt)
+
+
+# Class variables for controlling its behavior:
+#
+# existing: If True, explicitly create the target image and blockdev-add it
+# target_backing: If existing is True: Use this filename as the backing file
+#                 of the target image
+#                 (None: no backing file)
+# target_blockdev_backing: If existing is True: Pass this dict as "backing"
+#                          for the blockdev-add command
+#                          (None: do not pass "backing")
+# target_real_backing: If existing is True: The real filename of the backing
+#                      image during runtime, only makes sense if
+#                      target_blockdev_backing is not None
+#                      (None: same as target_backing)
+
+class BaseClass(iotests.QMPTestCase):
+    target_blockdev_backing = None
+    target_real_backing = None
+
+    def setUp(self):
+        qemu_img('create', '-f', iotests.imgfmt, back0_img, '1M')
+        qemu_img('create', '-f', iotests.imgfmt, '-b', back0_img, back1_img)
+        qemu_img('create', '-f', iotests.imgfmt, '-b', back1_img, back2_img)
+        qemu_img('create', '-f', iotests.imgfmt, '-b', back2_img, source_img)
+
+        self.vm = iotests.VM()
+        self.vm.add_drive(None, '', 'none')
+        self.vm.launch()
+
+        # Add the BDS via blockdev-add so it stays around after the mirror block
+        # job has been completed
+        result = self.vm.qmp('blockdev-add',
+                             options={'node-name': 'source',
+                                      'driver': iotests.imgfmt,
+                                      'file': {'driver': 'file',
+                                               'filename': source_img}})
+        self.assert_qmp(result, 'return', {})
+
+        result = self.vm.qmp('x-blockdev-insert-medium',
+                             device='drive0', node_name='source')
+        self.assert_qmp(result, 'return', {})
+
+        self.assertIntactSourceBackingChain()
+
+        if self.existing:
+            if self.target_backing:
+                qemu_img('create', '-f', iotests.imgfmt,
+                         '-b', self.target_backing, target_img, '1M')
+            else:
+                qemu_img('create', '-f', iotests.imgfmt, target_img, '1M')
+
+            if self.cmd == 'blockdev-mirror':
+                options = { 'node-name': 'target',
+                            'driver': iotests.imgfmt,
+                            'file': { 'driver': 'file',
+                                      'filename': target_img } }
+                if self.target_blockdev_backing:
+                    options['backing'] = self.target_blockdev_backing
+
+                result = self.vm.qmp('blockdev-add', options=options)
+                self.assert_qmp(result, 'return', {})
+
+    def tearDown(self):
+        self.vm.shutdown()
+        os.remove(source_img)
+        os.remove(back2_img)
+        os.remove(back1_img)
+        os.remove(back0_img)
+        try:
+            os.remove(target_img)
+        except OSError:
+            pass
+
+    def findBlockNode(self, node_name, id=None):
+        if id:
+            result = self.vm.qmp('query-block')
+            for device in result['return']:
+                if device['device'] == id:
+                    if node_name:
+                        self.assert_qmp(device, 'inserted/node-name', node_name)
+                    return device['inserted']
+        else:
+            result = self.vm.qmp('query-named-block-nodes')
+            for node in result['return']:
+                if node['node-name'] == node_name:
+                    return node
+
+        self.fail('Cannot find node %s/%s' % (id, node_name))
+
+    def assertIntactSourceBackingChain(self):
+        node = self.findBlockNode('source')
+
+        self.assert_qmp(node, 'image' + '/backing-image' * 0 + '/filename',
+                        source_img)
+        self.assert_qmp(node, 'image' + '/backing-image' * 1 + '/filename',
+                        back2_img)
+        self.assert_qmp(node, 'image' + '/backing-image' * 2 + '/filename',
+                        back1_img)
+        self.assert_qmp(node, 'image' + '/backing-image' * 3 + '/filename',
+                        back0_img)
+        self.assert_qmp_absent(node, 'image' + '/backing-image' * 4)
+
+    def assertCorrectBackingImage(self, node, default_image):
+        if self.existing:
+            if self.target_real_backing:
+                image = self.target_real_backing
+            else:
+                image = self.target_backing
+        else:
+            image = default_image
+
+        if image:
+            self.assert_qmp(node, 'image/backing-image/filename', image)
+        else:
+            self.assert_qmp_absent(node, 'image/backing-image')
+
+
+# Class variables for controlling its behavior:
+#
+# cmd: Mirroring command to execute, either drive-mirror or blockdev-mirror
+
+class MirrorBaseClass(BaseClass):
+    def runMirror(self, sync):
+        if self.cmd == 'blockdev-mirror':
+            result = self.vm.qmp(self.cmd, device='drive0', sync=sync,
+                                 target='target')
+        else:
+            if self.existing:
+                mode = 'existing'
+            else:
+                mode = 'absolute-paths'
+            result = self.vm.qmp(self.cmd, device='drive0', sync=sync,
+                                 target=target_img, format=iotests.imgfmt,
+                                 mode=mode, node_name='target')
+
+        self.assert_qmp(result, 'return', {})
+
+        self.vm.event_wait('BLOCK_JOB_READY')
+
+        result = self.vm.qmp('block-job-complete', device='drive0')
+        self.assert_qmp(result, 'return', {})
+
+        self.vm.event_wait('BLOCK_JOB_COMPLETED')
+
+    def testFull(self):
+        self.runMirror('full')
+
+        node = self.findBlockNode('target', 'drive0')
+        self.assertCorrectBackingImage(node, None)
+        self.assertIntactSourceBackingChain()
+
+    def testTop(self):
+        self.runMirror('top')
+
+        node = self.findBlockNode('target', 'drive0')
+        self.assertCorrectBackingImage(node, back2_img)
+        self.assertIntactSourceBackingChain()
+
+    def testNone(self):
+        self.runMirror('none')
+
+        node = self.findBlockNode('target', 'drive0')
+        self.assertCorrectBackingImage(node, source_img)
+        self.assertIntactSourceBackingChain()
+
+
+class TestDriveMirrorAbsolutePaths(MirrorBaseClass):
+    cmd = 'drive-mirror'
+    existing = False
+
+class TestDriveMirrorExistingNoBacking(MirrorBaseClass):
+    cmd = 'drive-mirror'
+    existing = True
+    target_backing = None
+
+class TestDriveMirrorExistingBacking(MirrorBaseClass):
+    cmd = 'drive-mirror'
+    existing = True
+    target_backing = 'null-co://'
+
+class TestBlockdevMirrorNoBacking(MirrorBaseClass):
+    cmd = 'blockdev-mirror'
+    existing = True
+    target_backing = None
+
+class TestBlockdevMirrorBacking(MirrorBaseClass):
+    cmd = 'blockdev-mirror'
+    existing = True
+    target_backing = 'null-co://'
+
+class TestBlockdevMirrorForcedBacking(MirrorBaseClass):
+    cmd = 'blockdev-mirror'
+    existing = True
+    target_backing = None
+    target_blockdev_backing = { 'driver': 'null-co' }
+    target_real_backing = 'null-co://'
+
+
+class TestCommit(BaseClass):
+    existing = False
+
+    def testCommit(self):
+        result = self.vm.qmp('block-commit', device='drive0', base=back1_img)
+        self.assert_qmp(result, 'return', {})
+
+        self.vm.event_wait('BLOCK_JOB_READY')
+
+        result = self.vm.qmp('block-job-complete', device='drive0')
+        self.assert_qmp(result, 'return', {})
+
+        self.vm.event_wait('BLOCK_JOB_COMPLETED')
+
+        node = self.findBlockNode(None, 'drive0')
+        self.assert_qmp(node, 'image' + '/backing-image' * 0 + '/filename',
+                        back1_img)
+        self.assert_qmp(node, 'image' + '/backing-image' * 1 + '/filename',
+                        back0_img)
+        self.assert_qmp_absent(node, 'image' + '/backing-image' * 2 +
+                               '/filename')
+
+        self.assertIntactSourceBackingChain()
+
+
+BaseClass = None
+MirrorBaseClass = None
+
+if __name__ == '__main__':
+    iotests.main(supported_fmts=['qcow2'])

+ 5 - 0
tests/qemu-iotests/155.out

@@ -0,0 +1,5 @@
+...................
+----------------------------------------------------------------------
+Ran 19 tests
+
+OK

+ 174 - 0
tests/qemu-iotests/156

@@ -0,0 +1,174 @@
+#!/bin/bash
+#
+# Tests oVirt-like storage migration:
+#  - Create snapshot
+#  - Create target image with (not yet existing) target backing chain
+#    (i.e. just write the name of a soon-to-be-copied-over backing file into it)
+#  - drive-mirror the snapshot to the target with mode=existing and sync=top
+#  - In the meantime, copy the original source files to the destination via
+#    conventional means (i.e. outside of qemu)
+#  - Complete the drive-mirror job
+#  - Delete all source images
+#
+# Copyright (C) 2016 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=mreitz@redhat.com
+
+seq="$(basename $0)"
+echo "QA output created by $seq"
+
+here="$PWD"
+status=1	# failure is the default!
+
+_cleanup()
+{
+    rm -f "$TEST_IMG{,.target}{,.backing,.overlay}"
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+. ./common.qemu
+
+_supported_fmt qcow2 qed
+_supported_proto generic
+_supported_os Linux
+
+# Create source disk
+TEST_IMG="$TEST_IMG.backing" _make_test_img 1M
+_make_test_img -b "$TEST_IMG.backing" 1M
+
+$QEMU_IO -c 'write -P 1 0 256k' "$TEST_IMG.backing" | _filter_qemu_io
+$QEMU_IO -c 'write -P 2 64k 192k' "$TEST_IMG" | _filter_qemu_io
+
+_launch_qemu -drive if=none,id=source,file="$TEST_IMG"
+
+_send_qemu_cmd $QEMU_HANDLE \
+    "{ 'execute': 'qmp_capabilities' }" \
+    'return'
+
+# Create snapshot
+TEST_IMG="$TEST_IMG.overlay" _make_test_img -b "$TEST_IMG" 1M
+_send_qemu_cmd $QEMU_HANDLE \
+    "{ 'execute': 'blockdev-snapshot-sync',
+       'arguments': { 'device': 'source',
+                      'snapshot-file': '$TEST_IMG.overlay',
+                      'format': '$IMGFMT',
+                      'mode': 'existing' } }" \
+    'return'
+
+# Write something to the snapshot
+_send_qemu_cmd $QEMU_HANDLE \
+    "{ 'execute': 'human-monitor-command',
+       'arguments': { 'command-line':
+                      'qemu-io source \"write -P 3 128k 128k\"' } }" \
+    'return'
+
+# Create target image
+TEST_IMG="$TEST_IMG.target.overlay" _make_test_img -b "$TEST_IMG.target" 1M
+
+# Mirror snapshot
+_send_qemu_cmd $QEMU_HANDLE \
+    "{ 'execute': 'drive-mirror',
+       'arguments': { 'device': 'source',
+                      'target': '$TEST_IMG.target.overlay',
+                      'mode': 'existing',
+                      'sync': 'top' } }" \
+    'return'
+
+# Wait for convergence
+_send_qemu_cmd $QEMU_HANDLE \
+    '' \
+    'BLOCK_JOB_READY'
+
+# Write some more
+_send_qemu_cmd $QEMU_HANDLE \
+    "{ 'execute': 'human-monitor-command',
+       'arguments': { 'command-line':
+                      'qemu-io source \"write -P 4 192k 64k\"' } }" \
+    'return'
+
+# Copy source backing chain to the target before completing the job
+cp "$TEST_IMG.backing" "$TEST_IMG.target.backing"
+cp "$TEST_IMG" "$TEST_IMG.target"
+$QEMU_IMG rebase -u -b "$TEST_IMG.target.backing" "$TEST_IMG.target"
+
+# Complete block job
+_send_qemu_cmd $QEMU_HANDLE \
+    "{ 'execute': 'block-job-complete',
+       'arguments': { 'device': 'source' } }" \
+    ''
+
+_send_qemu_cmd $QEMU_HANDLE \
+    '' \
+    'BLOCK_JOB_COMPLETED'
+
+# Remove the source images
+rm -f "$TEST_IMG{,.backing,.overlay}"
+
+echo
+
+# Check online disk contents
+_send_qemu_cmd $QEMU_HANDLE \
+    "{ 'execute': 'human-monitor-command',
+       'arguments': { 'command-line':
+                      'qemu-io source \"read -P 1 0k 64k\"' } }" \
+    'return'
+
+_send_qemu_cmd $QEMU_HANDLE \
+    "{ 'execute': 'human-monitor-command',
+       'arguments': { 'command-line':
+                      'qemu-io source \"read -P 2 64k 64k\"' } }" \
+    'return'
+
+_send_qemu_cmd $QEMU_HANDLE \
+    "{ 'execute': 'human-monitor-command',
+       'arguments': { 'command-line':
+                      'qemu-io source \"read -P 3 128k 64k\"' } }" \
+    'return'
+
+_send_qemu_cmd $QEMU_HANDLE \
+    "{ 'execute': 'human-monitor-command',
+       'arguments': { 'command-line':
+                      'qemu-io source \"read -P 4 192k 64k\"' } }" \
+    'return'
+
+echo
+
+_send_qemu_cmd $QEMU_HANDLE \
+    "{ 'execute': 'quit' }" \
+    'return'
+
+wait=1 _cleanup_qemu
+
+echo
+
+# Check offline disk contents
+$QEMU_IO -c 'read -P 1 0k 64k' \
+         -c 'read -P 2 64k 64k' \
+         -c 'read -P 3 128k 64k' \
+         -c 'read -P 4 192k 64k' \
+         "$TEST_IMG.target.overlay" | _filter_qemu_io
+
+echo
+
+# success, all done
+echo '*** done'
+rm -f $seq.full
+status=0

+ 48 - 0
tests/qemu-iotests/156.out

@@ -0,0 +1,48 @@
+QA output created by 156
+Formatting 'TEST_DIR/t.IMGFMT.backing', fmt=IMGFMT size=1048576
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.IMGFMT.backing
+wrote 262144/262144 bytes at offset 0
+256 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 196608/196608 bytes at offset 65536
+192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+{"return": {}}
+Formatting 'TEST_DIR/t.IMGFMT.overlay', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.IMGFMT
+{"return": {}}
+wrote 131072/131072 bytes at offset 131072
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+{"return": ""}
+Formatting 'TEST_DIR/t.IMGFMT.target.overlay', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.IMGFMT.target
+{"return": {}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "source", "len": 131072, "offset": 131072, "speed": 0, "type": "mirror"}}
+wrote 65536/65536 bytes at offset 196608
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+{"return": ""}
+{"return": {}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "source", "len": 196608, "offset": 196608, "speed": 0, "type": "mirror"}}
+
+read 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+{"return": ""}
+read 65536/65536 bytes at offset 65536
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+{"return": ""}
+read 65536/65536 bytes at offset 131072
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+{"return": ""}
+read 65536/65536 bytes at offset 196608
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+{"return": ""}
+
+{"return": {}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN"}
+
+read 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 65536
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 131072
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 196608
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+*** done

+ 2 - 1
tests/qemu-iotests/README

@@ -17,4 +17,5 @@ additional options to test further image formats or I/O methods.
 * Feedback and patches
 * Feedback and patches
 
 
 Please send improvements to the test suite, general feedback or just
 Please send improvements to the test suite, general feedback or just
-reports of failing tests cases to qemu-devel@savannah.nongnu.org.
+reports of failing tests cases to qemu-devel@nongnu.org with a CC:
+to qemu-block@nongnu.org.

+ 2 - 0
tests/qemu-iotests/group

@@ -154,3 +154,5 @@
 150 rw auto quick
 150 rw auto quick
 152 rw auto quick
 152 rw auto quick
 154 rw auto backing quick
 154 rw auto backing quick
+155 rw auto
+156 rw auto quick

+ 4 - 4
trace-events

@@ -73,7 +73,7 @@ bdrv_aio_writev(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs
 bdrv_co_readv(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d"
 bdrv_co_readv(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d"
 bdrv_co_writev(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d"
 bdrv_co_writev(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d"
 bdrv_co_pwrite_zeroes(void *bs, int64_t offset, int count, int flags) "bs %p offset %"PRId64" count %d flags %#x"
 bdrv_co_pwrite_zeroes(void *bs, int64_t offset, int count, int flags) "bs %p offset %"PRId64" count %d flags %#x"
-bdrv_co_do_copy_on_readv(void *bs, int64_t sector_num, int nb_sectors, int64_t cluster_sector_num, int cluster_nb_sectors) "bs %p sector_num %"PRId64" nb_sectors %d cluster_sector_num %"PRId64" cluster_nb_sectors %d"
+bdrv_co_do_copy_on_readv(void *bs, int64_t offset, unsigned int bytes, int64_t cluster_offset, unsigned int cluster_bytes) "bs %p offset %"PRId64" bytes %u cluster_offset %"PRId64" cluster_bytes %u"
 
 
 # block/stream.c
 # block/stream.c
 stream_one_iteration(void *s, int64_t sector_num, int nb_sectors, int is_allocated) "s %p sector_num %"PRId64" nb_sectors %d is_allocated %d"
 stream_one_iteration(void *s, int64_t sector_num, int nb_sectors, int is_allocated) "s %p sector_num %"PRId64" nb_sectors %d is_allocated %d"
@@ -606,16 +606,16 @@ qemu_system_shutdown_request(void) ""
 qemu_system_powerdown_request(void) ""
 qemu_system_powerdown_request(void) ""
 
 
 # block/qcow2.c
 # block/qcow2.c
-qcow2_writev_start_req(void *co, int64_t sector, int nb_sectors) "co %p sector %" PRIx64 " nb_sectors %d"
+qcow2_writev_start_req(void *co, int64_t offset, int bytes) "co %p offset %" PRIx64 " bytes %d"
 qcow2_writev_done_req(void *co, int ret) "co %p ret %d"
 qcow2_writev_done_req(void *co, int ret) "co %p ret %d"
 qcow2_writev_start_part(void *co) "co %p"
 qcow2_writev_start_part(void *co) "co %p"
-qcow2_writev_done_part(void *co, int cur_nr_sectors) "co %p cur_nr_sectors %d"
+qcow2_writev_done_part(void *co, int cur_bytes) "co %p cur_bytes %d"
 qcow2_writev_data(void *co, uint64_t offset) "co %p offset %" PRIx64
 qcow2_writev_data(void *co, uint64_t offset) "co %p offset %" PRIx64
 qcow2_pwrite_zeroes_start_req(void *co, int64_t offset, int count) "co %p offset %" PRIx64 " count %d"
 qcow2_pwrite_zeroes_start_req(void *co, int64_t offset, int count) "co %p offset %" PRIx64 " count %d"
 qcow2_pwrite_zeroes(void *co, int64_t offset, int count) "co %p offset %" PRIx64 " count %d"
 qcow2_pwrite_zeroes(void *co, int64_t offset, int count) "co %p offset %" PRIx64 " count %d"
 
 
 # block/qcow2-cluster.c
 # block/qcow2-cluster.c
-qcow2_alloc_clusters_offset(void *co, uint64_t offset, int num) "co %p offset %" PRIx64 " num %d"
+qcow2_alloc_clusters_offset(void *co, uint64_t offset, int bytes) "co %p offset %" PRIx64 " bytes %d"
 qcow2_handle_copied(void *co, uint64_t guest_offset, uint64_t host_offset, uint64_t bytes) "co %p guest_offset %" PRIx64 " host_offset %" PRIx64 " bytes %" PRIx64
 qcow2_handle_copied(void *co, uint64_t guest_offset, uint64_t host_offset, uint64_t bytes) "co %p guest_offset %" PRIx64 " host_offset %" PRIx64 " bytes %" PRIx64
 qcow2_handle_alloc(void *co, uint64_t guest_offset, uint64_t host_offset, uint64_t bytes) "co %p guest_offset %" PRIx64 " host_offset %" PRIx64 " bytes %" PRIx64
 qcow2_handle_alloc(void *co, uint64_t guest_offset, uint64_t host_offset, uint64_t bytes) "co %p guest_offset %" PRIx64 " host_offset %" PRIx64 " bytes %" PRIx64
 qcow2_do_alloc_clusters_offset(void *co, uint64_t guest_offset, uint64_t host_offset, int nb_clusters) "co %p guest_offset %" PRIx64 " host_offset %" PRIx64 " nb_clusters %d"
 qcow2_do_alloc_clusters_offset(void *co, uint64_t guest_offset, uint64_t host_offset, int nb_clusters) "co %p guest_offset %" PRIx64 " host_offset %" PRIx64 " nb_clusters %d"

+ 3 - 0
util/hbitmap.c

@@ -269,6 +269,7 @@ void hbitmap_set(HBitmap *hb, uint64_t start, uint64_t count)
     start >>= hb->granularity;
     start >>= hb->granularity;
     last >>= hb->granularity;
     last >>= hb->granularity;
     count = last - start + 1;
     count = last - start + 1;
+    assert(last < hb->size);
 
 
     hb->count += count - hb_count_between(hb, start, last);
     hb->count += count - hb_count_between(hb, start, last);
     hb_set_between(hb, HBITMAP_LEVELS - 1, start, last);
     hb_set_between(hb, HBITMAP_LEVELS - 1, start, last);
@@ -348,6 +349,7 @@ void hbitmap_reset(HBitmap *hb, uint64_t start, uint64_t count)
 
 
     start >>= hb->granularity;
     start >>= hb->granularity;
     last >>= hb->granularity;
     last >>= hb->granularity;
+    assert(last < hb->size);
 
 
     hb->count -= hb_count_between(hb, start, last);
     hb->count -= hb_count_between(hb, start, last);
     hb_reset_between(hb, HBITMAP_LEVELS - 1, start, last);
     hb_reset_between(hb, HBITMAP_LEVELS - 1, start, last);
@@ -371,6 +373,7 @@ bool hbitmap_get(const HBitmap *hb, uint64_t item)
     /* Compute position and bit in the last layer.  */
     /* Compute position and bit in the last layer.  */
     uint64_t pos = item >> hb->granularity;
     uint64_t pos = item >> hb->granularity;
     unsigned long bit = 1UL << (pos & (BITS_PER_LONG - 1));
     unsigned long bit = 1UL << (pos & (BITS_PER_LONG - 1));
+    assert(pos < hb->size);
 
 
     return (hb->levels[HBITMAP_LEVELS - 1][pos >> BITS_PER_LEVEL] & bit) != 0;
     return (hb->levels[HBITMAP_LEVELS - 1][pos >> BITS_PER_LEVEL] & bit) != 0;
 }
 }