Selaa lähdekoodia

Merge remote-tracking branch 'remotes/ericb/tags/pull-nbd-2021-02-12' into staging

nbd patches for 2021-02-12

- let qemu-nbd handle larger backlog of connecting clients
- fix a few NBD-related iotest failures
- add block cancellation hook for faster response to NBD failures

# gpg: Signature made Fri 12 Feb 2021 19:57:56 GMT
# gpg:                using RSA key 71C2CC22B1C4602927D2F3AAA7A16B4A2527436A
# gpg: Good signature from "Eric Blake <eblake@redhat.com>" [full]
# gpg:                 aka "Eric Blake (Free Software Programmer) <ebb9@byu.net>" [full]
# gpg:                 aka "[jpeg image of size 6874]" [full]
# Primary key fingerprint: 71C2 CC22 B1C4 6029 27D2  F3AA A7A1 6B4A 2527 436A

* remotes/ericb/tags/pull-nbd-2021-02-12:
  iotests/264: add backup-cancel test-case
  block/backup: implement .cancel job handler
  iotests/264: add mirror-cancel test-case
  iotests.py: qemu_nbd_popen: remove pid file after use
  iotests/264: move to python unittest
  block/mirror: implement .cancel job handler
  job: add .cancel handler for the driver
  block/raw-format: implement .bdrv_cancel_in_flight handler
  block/nbd: implement .bdrv_cancel_in_flight
  block: add new BlockDriver handler: bdrv_cancel_in_flight
  io: error_prepend() in qio_channel_readv_full_all() causes segfault
  iotests/210: Fix reference output
  qemu-nbd: Permit --shared=0 for unlimited clients
  qemu-nbd: Use SOMAXCONN for socket listen() backlog

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Peter Maydell 4 vuotta sitten
vanhempi
commit
abb8b29aff

+ 10 - 0
block/backup.c

@@ -35,6 +35,7 @@ typedef struct BackupBlockJob {
     BlockJob common;
     BlockDriverState *backup_top;
     BlockDriverState *source_bs;
+    BlockDriverState *target_bs;
 
     BdrvDirtyBitmap *sync_bitmap;
 
@@ -329,6 +330,13 @@ static void coroutine_fn backup_set_speed(BlockJob *job, int64_t speed)
     }
 }
 
+static void backup_cancel(Job *job)
+{
+    BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
+
+    bdrv_cancel_in_flight(s->target_bs);
+}
+
 static const BlockJobDriver backup_job_driver = {
     .job_driver = {
         .instance_size          = sizeof(BackupBlockJob),
@@ -340,6 +348,7 @@ static const BlockJobDriver backup_job_driver = {
         .abort                  = backup_abort,
         .clean                  = backup_clean,
         .pause                  = backup_pause,
+        .cancel                 = backup_cancel,
     },
     .set_speed = backup_set_speed,
 };
@@ -528,6 +537,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
 
     job->backup_top = backup_top;
     job->source_bs = bs;
+    job->target_bs = target;
     job->on_source_error = on_source_error;
     job->on_target_error = on_target_error;
     job->sync_mode = sync_mode;

+ 11 - 0
block/io.c

@@ -3460,3 +3460,14 @@ out:
 
     return ret;
 }
+
+void bdrv_cancel_in_flight(BlockDriverState *bs)
+{
+    if (!bs || !bs->drv) {
+        return;
+    }
+
+    if (bs->drv->bdrv_cancel_in_flight) {
+        bs->drv->bdrv_cancel_in_flight(bs);
+    }
+}

+ 9 - 0
block/mirror.c

@@ -1179,6 +1179,14 @@ static bool mirror_drained_poll(BlockJob *job)
     return !!s->in_flight;
 }
 
+static void mirror_cancel(Job *job)
+{
+    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job);
+    BlockDriverState *target = blk_bs(s->target);
+
+    bdrv_cancel_in_flight(target);
+}
+
 static const BlockJobDriver mirror_job_driver = {
     .job_driver = {
         .instance_size          = sizeof(MirrorBlockJob),
@@ -1190,6 +1198,7 @@ static const BlockJobDriver mirror_job_driver = {
         .abort                  = mirror_abort,
         .pause                  = mirror_pause,
         .complete               = mirror_complete,
+        .cancel                 = mirror_cancel,
     },
     .drained_poll           = mirror_drained_poll,
 };

+ 15 - 0
block/nbd.c

@@ -2458,6 +2458,18 @@ static const char *const nbd_strong_runtime_opts[] = {
     NULL
 };
 
+static void nbd_cancel_in_flight(BlockDriverState *bs)
+{
+    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
+
+    reconnect_delay_timer_del(s);
+
+    if (s->state == NBD_CLIENT_CONNECTING_WAIT) {
+        s->state = NBD_CLIENT_CONNECTING_NOWAIT;
+        qemu_co_queue_restart_all(&s->free_sema);
+    }
+}
+
 static BlockDriver bdrv_nbd = {
     .format_name                = "nbd",
     .protocol_name              = "nbd",
@@ -2484,6 +2496,7 @@ static BlockDriver bdrv_nbd = {
     .bdrv_co_block_status       = nbd_client_co_block_status,
     .bdrv_dirname               = nbd_dirname,
     .strong_runtime_opts        = nbd_strong_runtime_opts,
+    .bdrv_cancel_in_flight      = nbd_cancel_in_flight,
 };
 
 static BlockDriver bdrv_nbd_tcp = {
@@ -2512,6 +2525,7 @@ static BlockDriver bdrv_nbd_tcp = {
     .bdrv_co_block_status       = nbd_client_co_block_status,
     .bdrv_dirname               = nbd_dirname,
     .strong_runtime_opts        = nbd_strong_runtime_opts,
+    .bdrv_cancel_in_flight      = nbd_cancel_in_flight,
 };
 
 static BlockDriver bdrv_nbd_unix = {
@@ -2540,6 +2554,7 @@ static BlockDriver bdrv_nbd_unix = {
     .bdrv_co_block_status       = nbd_client_co_block_status,
     .bdrv_dirname               = nbd_dirname,
     .strong_runtime_opts        = nbd_strong_runtime_opts,
+    .bdrv_cancel_in_flight      = nbd_cancel_in_flight,
 };
 
 static void bdrv_nbd_init(void)

+ 6 - 0
block/raw-format.c

@@ -575,6 +575,11 @@ static const char *const raw_strong_runtime_opts[] = {
     NULL
 };
 
+static void raw_cancel_in_flight(BlockDriverState *bs)
+{
+    bdrv_cancel_in_flight(bs->file->bs);
+}
+
 BlockDriver bdrv_raw = {
     .format_name          = "raw",
     .instance_size        = sizeof(BDRVRawState),
@@ -608,6 +613,7 @@ BlockDriver bdrv_raw = {
     .bdrv_has_zero_init   = &raw_has_zero_init,
     .strong_runtime_opts  = raw_strong_runtime_opts,
     .mutable_opts         = mutable_opts,
+    .bdrv_cancel_in_flight = raw_cancel_in_flight,
 };
 
 static void bdrv_raw_init(void)

+ 6 - 1
blockdev-nbd.c

@@ -134,7 +134,12 @@ void nbd_server_start(SocketAddress *addr, const char *tls_creds,
     qio_net_listener_set_name(nbd_server->listener,
                               "nbd-listener");
 
-    if (qio_net_listener_open_sync(nbd_server->listener, addr, 1, errp) < 0) {
+    /*
+     * Because this server is persistent, a backlog of SOMAXCONN is
+     * better than trying to size it to max_connections.
+     */
+    if (qio_net_listener_open_sync(nbd_server->listener, addr, SOMAXCONN,
+                                   errp) < 0) {
         goto error;
     }
 

+ 2 - 2
docs/tools/qemu-nbd.rst

@@ -136,8 +136,8 @@ driver options if ``--image-opts`` is specified.
 .. option:: -e, --shared=NUM
 
   Allow up to *NUM* clients to share the device (default
-  ``1``). Safe for readers, but for now, consistency is not
-  guaranteed between multiple writers.
+  ``1``), 0 for unlimited. Safe for readers, but for now,
+  consistency is not guaranteed between multiple writers.
 
 .. option:: -t, --persistent
 

+ 3 - 0
include/block/block.h

@@ -849,4 +849,7 @@ int coroutine_fn bdrv_co_copy_range(BdrvChild *src, int64_t src_offset,
                                     BdrvChild *dst, int64_t dst_offset,
                                     int64_t bytes, BdrvRequestFlags read_flags,
                                     BdrvRequestFlags write_flags);
+
+void bdrv_cancel_in_flight(BlockDriverState *bs);
+
 #endif

+ 9 - 0
include/block/block_int.h

@@ -352,6 +352,15 @@ struct BlockDriver {
         bool want_zero, int64_t offset, int64_t bytes, int64_t *pnum,
         int64_t *map, BlockDriverState **file);
 
+    /*
+     * This informs the driver that we are no longer interested in the result
+     * of in-flight requests, so don't waste the time if possible.
+     *
+     * One example usage is to avoid waiting for an nbd target node reconnect
+     * timeout during job-cancel.
+     */
+    void (*bdrv_cancel_in_flight)(BlockDriverState *bs);
+
     /*
      * Invalidate any cached meta-data.
      */

+ 5 - 0
include/qemu/job.h

@@ -251,6 +251,11 @@ struct JobDriver {
      */
     void (*clean)(Job *job);
 
+    /**
+     * If the callback is not NULL, it will be invoked in job_cancel_async
+     */
+    void (*cancel)(Job *job);
+
 
     /** Called when the job is freed */
     void (*free)(Job *job);

+ 1 - 2
io/channel.c

@@ -202,8 +202,7 @@ int qio_channel_readv_full_all(QIOChannel *ioc,
     int ret = qio_channel_readv_full_all_eof(ioc, iov, niov, fds, nfds, errp);
 
     if (ret == 0) {
-        error_prepend(errp,
-                      "Unexpected end-of-file before all data were read.");
+        error_setg(errp, "Unexpected end-of-file before all data were read");
         return -1;
     }
     if (ret == 1) {

+ 3 - 0
job.c

@@ -715,6 +715,9 @@ static int job_finalize_single(Job *job)
 
 static void job_cancel_async(Job *job, bool force)
 {
+    if (job->driver->cancel) {
+        job->driver->cancel(job);
+    }
     if (job->user_paused) {
         /* Do not call job_enter here, the caller will handle it.  */
         if (job->driver->user_resume) {

+ 11 - 3
qemu-nbd.c

@@ -328,7 +328,7 @@ static void *nbd_client_thread(void *arg)
 
 static int nbd_can_accept(void)
 {
-    return state == RUNNING && nb_fds < shared;
+    return state == RUNNING && (shared == 0 || nb_fds < shared);
 }
 
 static void nbd_update_server_watch(void);
@@ -707,7 +707,7 @@ int main(int argc, char **argv)
             break;
         case 'e':
             if (qemu_strtoi(optarg, NULL, 0, &shared) < 0 ||
-                shared < 1) {
+                shared < 0) {
                 error_report("Invalid shared device number '%s'", optarg);
                 exit(EXIT_FAILURE);
             }
@@ -964,8 +964,16 @@ int main(int argc, char **argv)
 
     server = qio_net_listener_new();
     if (socket_activation == 0) {
+        int backlog;
+
+        if (persistent || shared == 0) {
+            backlog = SOMAXCONN;
+        } else {
+            backlog = MIN(shared, SOMAXCONN);
+        }
         saddr = nbd_build_socket_address(sockpath, bindto, port);
-        if (qio_net_listener_open_sync(server, saddr, 1, &local_err) < 0) {
+        if (qio_net_listener_open_sync(server, saddr, backlog,
+                                       &local_err) < 0) {
             object_unref(OBJECT(server));
             error_report_err(local_err);
             exit(EXIT_FAILURE);

+ 1 - 1
tests/qemu-iotests/210.out

@@ -182,7 +182,7 @@ Job failed: The requested file size is too large
 === Resize image with invalid sizes ===
 
 {"execute": "block_resize", "arguments": {"node-name": "node1", "size": 9223372036854775296}}
-{"error": {"class": "GenericError", "desc": "Required too big image size, it must be not greater than 9223372035781033984"}}
+{"error": {"class": "GenericError", "desc": "offset(9223372036854775296) exceeds maximum(9223372035781033984)"}}
 {"execute": "block_resize", "arguments": {"node-name": "node1", "size": 9223372036854775808}}
 {"error": {"class": "GenericError", "desc": "Invalid parameter type for 'size', expected: integer"}}
 {"execute": "block_resize", "arguments": {"node-name": "node1", "size": 18446744073709551104}}

+ 91 - 49
tests/qemu-iotests/264

@@ -20,60 +20,102 @@
 #
 
 import time
+import os
 
 import iotests
-from iotests import qemu_img_create, file_path, qemu_nbd_popen, log
-
-iotests.script_initialize(
-    supported_fmts=['qcow2'],
-)
+from iotests import qemu_img_create, file_path, qemu_nbd_popen
 
 disk_a, disk_b, nbd_sock = file_path('disk_a', 'disk_b', 'nbd-sock')
 nbd_uri = 'nbd+unix:///?socket=' + nbd_sock
-size = 5 * 1024 * 1024
 wait_limit = 3.0
 wait_step = 0.2
 
-qemu_img_create('-f', iotests.imgfmt, disk_a, str(size))
-qemu_img_create('-f', iotests.imgfmt, disk_b, str(size))
-
-with qemu_nbd_popen('-k', nbd_sock, '-f', iotests.imgfmt, disk_b):
-    vm = iotests.VM().add_drive(disk_a)
-    vm.launch()
-    vm.hmp_qemu_io('drive0', 'write 0 {}'.format(size))
-
-    vm.qmp_log('blockdev-add', filters=[iotests.filter_qmp_testfiles],
-               **{'node_name': 'backup0',
-                  'driver': 'raw',
-                  'file': {'driver': 'nbd',
-                           'server': {'type': 'unix', 'path': nbd_sock},
-                           'reconnect-delay': 10}})
-    vm.qmp_log('blockdev-backup', device='drive0', sync='full',
-               target='backup0', speed=(1 * 1024 * 1024))
-
-    # Wait for some progress
-    t = 0.0
-    while t < wait_limit:
-        jobs = vm.qmp('query-block-jobs')['return']
-        if jobs and jobs[0]['offset'] > 0:
-            break
-        time.sleep(wait_step)
-        t += wait_step
-
-    if jobs and jobs[0]['offset'] > 0:
-        log('Backup job is started')
-
-jobs = vm.qmp('query-block-jobs')['return']
-if jobs and jobs[0]['offset'] < jobs[0]['len']:
-    log('Backup job is still in progress')
-
-vm.qmp_log('block-job-set-speed', device='drive0', speed=0)
-
-# Emulate server down time for 1 second
-time.sleep(1)
-
-with qemu_nbd_popen('-k', nbd_sock, '-f', iotests.imgfmt, disk_b):
-    e = vm.event_wait('BLOCK_JOB_COMPLETED')
-    log('Backup completed: {}'.format(e['data']['offset']))
-    vm.qmp_log('blockdev-del', node_name='backup0')
-    vm.shutdown()
+
+class TestNbdReconnect(iotests.QMPTestCase):
+    def init_vm(self, disk_size):
+        qemu_img_create('-f', iotests.imgfmt, disk_a, str(disk_size))
+        qemu_img_create('-f', iotests.imgfmt, disk_b, str(disk_size))
+        self.vm = iotests.VM().add_drive(disk_a)
+        self.vm.launch()
+        self.vm.hmp_qemu_io('drive0', 'write 0 {}'.format(disk_size))
+
+    def tearDown(self):
+        self.vm.shutdown()
+        os.remove(disk_a)
+        os.remove(disk_b)
+
+    def start_job(self, job):
+        """Stat job with nbd target and kill the server"""
+        assert job in ('blockdev-backup', 'blockdev-mirror')
+        with qemu_nbd_popen('-k', nbd_sock, '-f', iotests.imgfmt, disk_b):
+            result = self.vm.qmp('blockdev-add',
+                                 **{'node_name': 'backup0',
+                                    'driver': 'raw',
+                                    'file': {'driver': 'nbd',
+                                             'server': {'type': 'unix',
+                                                        'path': nbd_sock},
+                                             'reconnect-delay': 10}})
+            self.assert_qmp(result, 'return', {})
+            result = self.vm.qmp(job, device='drive0',
+                                 sync='full', target='backup0',
+                                 speed=(1 * 1024 * 1024))
+            self.assert_qmp(result, 'return', {})
+
+            # Wait for some progress
+            t = 0.0
+            while t < wait_limit:
+                jobs = self.vm.qmp('query-block-jobs')['return']
+                if jobs and jobs[0]['offset'] > 0:
+                    break
+                time.sleep(wait_step)
+                t += wait_step
+
+            self.assertTrue(jobs and jobs[0]['offset'] > 0)  # job started
+
+        jobs = self.vm.qmp('query-block-jobs')['return']
+        # Check that job is still in progress
+        self.assertTrue(jobs)
+        self.assertTrue(jobs[0]['offset'] < jobs[0]['len'])
+
+        result = self.vm.qmp('block-job-set-speed', device='drive0', speed=0)
+        self.assert_qmp(result, 'return', {})
+
+        # Emulate server down time for 1 second
+        time.sleep(1)
+
+    def test_backup(self):
+        size = 5 * 1024 * 1024
+        self.init_vm(size)
+        self.start_job('blockdev-backup')
+
+        with qemu_nbd_popen('-k', nbd_sock, '-f', iotests.imgfmt, disk_b):
+            e = self.vm.event_wait('BLOCK_JOB_COMPLETED')
+            self.assertEqual(e['data']['offset'], size)
+            result = self.vm.qmp('blockdev-del', node_name='backup0')
+            self.assert_qmp(result, 'return', {})
+
+    def cancel_job(self):
+        result = self.vm.qmp('block-job-cancel', device='drive0')
+        self.assert_qmp(result, 'return', {})
+
+        start_t = time.time()
+        self.vm.event_wait('BLOCK_JOB_CANCELLED')
+        delta_t = time.time() - start_t
+        self.assertTrue(delta_t < 2.0)
+
+    def test_mirror_cancel(self):
+        # Mirror speed limit doesn't work well enough, it seems that mirror
+        # will run many parallel requests anyway. MAX_IN_FLIGHT is 16 and
+        # MAX_IO_BYTES is 1M in mirror.c, so let's use 20M disk.
+        self.init_vm(20 * 1024 * 1024)
+        self.start_job('blockdev-mirror')
+        self.cancel_job()
+
+    def test_backup_cancel(self):
+        self.init_vm(5 * 1024 * 1024)
+        self.start_job('blockdev-backup')
+        self.cancel_job()
+
+
+if __name__ == '__main__':
+    iotests.main(supported_fmts=['qcow2'])

+ 5 - 15
tests/qemu-iotests/264.out

@@ -1,15 +1,5 @@
-Start NBD server
-{"execute": "blockdev-add", "arguments": {"driver": "raw", "file": {"driver": "nbd", "reconnect-delay": 10, "server": {"path": "TEST_DIR/PID-nbd-sock", "type": "unix"}}, "node-name": "backup0"}}
-{"return": {}}
-{"execute": "blockdev-backup", "arguments": {"device": "drive0", "speed": 1048576, "sync": "full", "target": "backup0"}}
-{"return": {}}
-Backup job is started
-Kill NBD server
-Backup job is still in progress
-{"execute": "block-job-set-speed", "arguments": {"device": "drive0", "speed": 0}}
-{"return": {}}
-Start NBD server
-Backup completed: 5242880
-{"execute": "blockdev-del", "arguments": {"node-name": "backup0"}}
-{"return": {}}
-Kill NBD server
+...
+----------------------------------------------------------------------
+Ran 3 tests
+
+OK

+ 5 - 1
tests/qemu-iotests/iotests.py

@@ -296,7 +296,9 @@ def qemu_nbd_list_log(*args: str) -> str:
 @contextmanager
 def qemu_nbd_popen(*args):
     '''Context manager running qemu-nbd within the context'''
-    pid_file = file_path("pid")
+    pid_file = file_path("qemu_nbd_popen-nbd-pid-file")
+
+    assert not os.path.exists(pid_file)
 
     cmd = list(qemu_nbd_args)
     cmd.extend(('--persistent', '--pid-file', pid_file))
@@ -314,6 +316,8 @@ def qemu_nbd_popen(*args):
             time.sleep(0.01)
         yield
     finally:
+        if os.path.exists(pid_file):
+            os.remove(pid_file)
         log('Kill NBD server')
         p.kill()
         p.wait()