Browse Source

Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging

Block layer fixes for 2.9.0-rc4

# gpg: Signature made Fri 07 Apr 2017 13:44:17 BST
# gpg:                using RSA key 0x7F09B272C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>"
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74  56FE 7F09 B272 C88F 2FD6

* remotes/kevin/tags/for-upstream:
  mirror: Fix aio context of mirror_top_bs
  block: Assert attached child node has right aio context
  block: Fix unpaired aio_disable_external in external snapshot
  block: Don't check permissions for copy on read
  qemu-img: img_create does not support image-opts, fix docs
  iotests: Add mirror tests for orphaned source
  block/mirror: Fix use-after-free
  commit: Set commit_top_bs->total_sectors
  commit: Set commit_top_bs->aio_context
  block: Ignore guest dev permissions during incoming migration

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Peter Maydell 8 năm trước cách đây
mục cha
commit
5daf9b3025

+ 4 - 0
block.c

@@ -1752,6 +1752,9 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
 {
     BlockDriverState *old_bs = child->bs;
 
+    if (old_bs && new_bs) {
+        assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
+    }
     if (old_bs) {
         if (old_bs->quiesce_counter && child->role->drained_end) {
             child->role->drained_end(child);
@@ -1852,6 +1855,7 @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
     bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm);
 
     assert(parent_bs->drv);
+    assert(bdrv_get_aio_context(parent_bs) == bdrv_get_aio_context(child_bs));
     parent_bs->drv->bdrv_child_perm(parent_bs, NULL, child_role,
                                     perm, shared_perm, &perm, &shared_perm);
 

+ 39 - 1
block/block-backend.c

@@ -61,6 +61,7 @@ struct BlockBackend {
 
     uint64_t perm;
     uint64_t shared_perm;
+    bool disable_perm;
 
     bool allow_write_beyond_eof;
 
@@ -578,7 +579,7 @@ int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
 {
     int ret;
 
-    if (blk->root) {
+    if (blk->root && !blk->disable_perm) {
         ret = bdrv_child_try_set_perm(blk->root, perm, shared_perm, errp);
         if (ret < 0) {
             return ret;
@@ -597,15 +598,52 @@ void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm)
     *shared_perm = blk->shared_perm;
 }
 
+/*
+ * Notifies the user of all BlockBackends that migration has completed. qdev
+ * devices can tighten their permissions in response (specifically revoke
+ * shared write permissions that we needed for storage migration).
+ *
+ * If an error is returned, the VM cannot be allowed to be resumed.
+ */
+void blk_resume_after_migration(Error **errp)
+{
+    BlockBackend *blk;
+    Error *local_err = NULL;
+
+    for (blk = blk_all_next(NULL); blk; blk = blk_all_next(blk)) {
+        if (!blk->disable_perm) {
+            continue;
+        }
+
+        blk->disable_perm = false;
+
+        blk_set_perm(blk, blk->perm, blk->shared_perm, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            blk->disable_perm = true;
+            return;
+        }
+    }
+}
+
 static int blk_do_attach_dev(BlockBackend *blk, void *dev)
 {
     if (blk->dev) {
         return -EBUSY;
     }
+
+    /* While migration is still incoming, we don't need to apply the
+     * permissions of guest device BlockBackends. We might still have a block
+     * job or NBD server writing to the image for storage migration. */
+    if (runstate_check(RUN_STATE_INMIGRATE)) {
+        blk->disable_perm = true;
+    }
+
     blk_ref(blk);
     blk->dev = dev;
     blk->legacy_dev = false;
     blk_iostatus_reset(blk);
+
     return 0;
 }
 

+ 3 - 0
block/commit.c

@@ -335,6 +335,8 @@ void commit_start(const char *job_id, BlockDriverState *bs,
     if (commit_top_bs == NULL) {
         goto fail;
     }
+    commit_top_bs->total_sectors = top->total_sectors;
+    bdrv_set_aio_context(commit_top_bs, bdrv_get_aio_context(top));
 
     bdrv_set_backing_hd(commit_top_bs, top, &local_err);
     if (local_err) {
@@ -482,6 +484,7 @@ int bdrv_commit(BlockDriverState *bs)
         error_report_err(local_err);
         goto ro_cleanup;
     }
+    bdrv_set_aio_context(commit_top_bs, bdrv_get_aio_context(backing_file_bs));
 
     bdrv_set_backing_hd(commit_top_bs, backing_file_bs, &error_abort);
     bdrv_set_backing_hd(bs, commit_top_bs, &error_abort);

+ 8 - 1
block/io.c

@@ -945,7 +945,14 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
     size_t skip_bytes;
     int ret;
 
-    assert(child->perm & (BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE));
+    /* FIXME We cannot require callers to have write permissions when all they
+     * are doing is a read request. If we did things right, write permissions
+     * would be obtained anyway, but internally by the copy-on-read code. As
+     * long as it is implemented here rather than in a separat filter driver,
+     * the copy-on-read code doesn't have its own BdrvChild, however, for which
+     * it could request permissions. Therefore we have to bypass the permission
+     * system for the moment. */
+    // assert(child->perm & (BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE));
 
     /* Cover entire cluster so no additional backing file I/O is required when
      * allocating cluster in the image file.

+ 11 - 2
block/mirror.c

@@ -1148,9 +1148,10 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
         return;
     }
     mirror_top_bs->total_sectors = bs->total_sectors;
+    bdrv_set_aio_context(mirror_top_bs, bdrv_get_aio_context(bs));
 
     /* bdrv_append takes ownership of the mirror_top_bs reference, need to keep
-     * it alive until block_job_create() even if bs has no parent. */
+     * it alive until block_job_create() succeeds even if bs has no parent. */
     bdrv_ref(mirror_top_bs);
     bdrv_drained_begin(bs);
     bdrv_append(mirror_top_bs, bs, &local_err);
@@ -1168,10 +1169,12 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
                          BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
                          BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD, speed,
                          creation_flags, cb, opaque, errp);
-    bdrv_unref(mirror_top_bs);
     if (!s) {
         goto fail;
     }
+    /* The block job now has a reference to this node */
+    bdrv_unref(mirror_top_bs);
+
     s->source = bs;
     s->mirror_top_bs = mirror_top_bs;
 
@@ -1242,6 +1245,10 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
 
 fail:
     if (s) {
+        /* Make sure this BDS does not go away until we have completed the graph
+         * changes below */
+        bdrv_ref(mirror_top_bs);
+
         g_free(s->replaces);
         blk_unref(s->target);
         block_job_unref(&s->common);
@@ -1250,6 +1257,8 @@ fail:
     bdrv_child_try_set_perm(mirror_top_bs->backing, 0, BLK_PERM_ALL,
                             &error_abort);
     bdrv_replace_node(mirror_top_bs, backing_bs(mirror_top_bs), &error_abort);
+
+    bdrv_unref(mirror_top_bs);
 }
 
 void mirror_start(const char *job_id, BlockDriverState *bs,

+ 2 - 2
blockdev.c

@@ -1772,6 +1772,8 @@ static void external_snapshot_prepare(BlkActionState *common,
         return;
     }
 
+    bdrv_set_aio_context(state->new_bs, state->aio_context);
+
     /* This removes our old bs and adds the new bs. This is an operation that
      * can fail, so we need to do it in .prepare; undoing it for abort is
      * always possible. */
@@ -1789,8 +1791,6 @@ static void external_snapshot_commit(BlkActionState *common)
     ExternalSnapshotState *state =
                              DO_UPCAST(ExternalSnapshotState, common, common);
 
-    bdrv_set_aio_context(state->new_bs, state->aio_context);
-
     /* We don't need (or want) to use the transactional
      * bdrv_reopen_multiple() across all the entries at once, because we
      * don't want to abort all of them if one of them fails the reopen */

+ 2 - 0
include/block/block.h

@@ -366,6 +366,8 @@ void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp);
 void bdrv_invalidate_cache_all(Error **errp);
 int bdrv_inactivate_all(void);
 
+void blk_resume_after_migration(Error **errp);
+
 /* Ensure contents are flushed to disk.  */
 int bdrv_flush(BlockDriverState *bs);
 int coroutine_fn bdrv_co_flush(BlockDriverState *bs);

+ 8 - 0
migration/migration.c

@@ -349,6 +349,14 @@ static void process_incoming_migration_bh(void *opaque)
         exit(EXIT_FAILURE);
     }
 
+    /* If we get an error here, just don't restart the VM yet. */
+    blk_resume_after_migration(&local_err);
+    if (local_err) {
+        error_free(local_err);
+        local_err = NULL;
+        autostart = false;
+    }
+
     /*
      * This must happen after all error conditions are dealt with and
      * we're sure the VM is going to be running on this host.

+ 2 - 2
qemu-img-cmds.hx

@@ -22,9 +22,9 @@ STEXI
 ETEXI
 
 DEF("create", img_create,
-    "create [-q] [--object objectdef] [--image-opts] [-f fmt] [-o options] filename [size]")
+    "create [-q] [--object objectdef] [-f fmt] [-o options] filename [size]")
 STEXI
-@item create [--object @var{objectdef}] [--image-opts] [-q] [-f @var{fmt}] [-o @var{options}] @var{filename} [@var{size}]
+@item create [--object @var{objectdef}] [-q] [-f @var{fmt}] [-o @var{options}] @var{filename} [@var{size}]
 ETEXI
 
 DEF("commit", img_commit,

+ 6 - 0
qmp.c

@@ -207,6 +207,12 @@ void qmp_cont(Error **errp)
         }
     }
 
+    blk_resume_after_migration(&local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
     if (runstate_check(RUN_STATE_INMIGRATE)) {
         autostart = 1;
     } else {

+ 46 - 0
tests/qemu-iotests/041

@@ -966,5 +966,51 @@ class TestRepairQuorum(iotests.QMPTestCase):
         #       to check that this file is really driven by quorum
         self.vm.shutdown()
 
+# Test mirroring with a source that does not have any parents (not even a
+# BlockBackend)
+class TestOrphanedSource(iotests.QMPTestCase):
+    def setUp(self):
+        blk0 = { 'node-name': 'src',
+                 'driver': 'null-co' }
+
+        blk1 = { 'node-name': 'dest',
+                 'driver': 'null-co' }
+
+        blk2 = { 'node-name': 'dest-ro',
+                 'driver': 'null-co',
+                 'read-only': 'on' }
+
+        self.vm = iotests.VM()
+        self.vm.add_blockdev(self.qmp_to_opts(blk0))
+        self.vm.add_blockdev(self.qmp_to_opts(blk1))
+        self.vm.add_blockdev(self.qmp_to_opts(blk2))
+        self.vm.launch()
+
+    def tearDown(self):
+        self.vm.shutdown()
+
+    def test_no_job_id(self):
+        self.assert_no_active_block_jobs()
+
+        result = self.vm.qmp('blockdev-mirror', device='src', sync='full',
+                             target='dest')
+        self.assert_qmp(result, 'error/class', 'GenericError')
+
+    def test_success(self):
+        self.assert_no_active_block_jobs()
+
+        result = self.vm.qmp('blockdev-mirror', job_id='job', device='src',
+                             sync='full', target='dest')
+        self.assert_qmp(result, 'return', {})
+
+        self.complete_and_wait('job')
+
+    def test_failing_permissions(self):
+        self.assert_no_active_block_jobs()
+
+        result = self.vm.qmp('blockdev-mirror', device='src', sync='full',
+                             target='dest-ro')
+        self.assert_qmp(result, 'error/class', 'GenericError')
+
 if __name__ == '__main__':
     iotests.main(supported_fmts=['qcow2', 'qed'])

+ 2 - 2
tests/qemu-iotests/041.out

@@ -1,5 +1,5 @@
-............................................................................
+...............................................................................
 ----------------------------------------------------------------------
-Ran 76 tests
+Ran 79 tests
 
 OK

+ 15 - 0
tests/qemu-iotests/iotests.py

@@ -177,6 +177,14 @@ def add_drive(self, path, opts='', interface='virtio', format=imgfmt):
         self._num_drives += 1
         return self
 
+    def add_blockdev(self, opts):
+        self._args.append('-blockdev')
+        if isinstance(opts, str):
+            self._args.append(opts)
+        else:
+            self._args.append(','.join(opts))
+        return self
+
     def pause_drive(self, drive, event=None):
         '''Pause drive r/w operations'''
         if not event:
@@ -235,6 +243,13 @@ def flatten_qmp_object(self, obj, output=None, basestr=''):
             output[basestr[:-1]] = obj # Strip trailing '.'
         return output
 
+    def qmp_to_opts(self, obj):
+        obj = self.flatten_qmp_object(obj)
+        output_list = list()
+        for key in obj:
+            output_list += [key + '=' + obj[key]]
+        return ','.join(output_list)
+
     def assert_qmp_absent(self, d, path):
         try:
             result = self.dictpath(d, path)