Browse Source

Merge tag 'for-upstream' of https://repo.or.cz/qemu/kevin into staging

Block layer patches

- Allow concurrent BB context changes
- virtio: Re-enable notifications after drain
- virtio-blk: Fix missing use of irqfd
- scsi: Don't ignore most usb-storage properties
- blkio: Respect memory-alignment for bounce buffer allocations
- iotests tmpdir fixes
- virtio-blk: Code cleanups

# -----BEGIN PGP SIGNATURE-----
#
# iQJFBAABCAAvFiEE3D3rFZqa+V09dFb+fwmycsiPL9YFAmXEkwgRHGt3b2xmQHJl
# ZGhhdC5jb20ACgkQfwmycsiPL9Y3jA//TmSBVqHljauyImYOgCt8qCXACttV0xhQ
# Q5ldUNx/JmIFMoUR7OlpVAL2MtvdwE0jjY+sDlEmWtz4IFJcCsCTUCHZZb8blreb
# +mnMkqrQ6Nb3tPR2jeIknrXqNy1ffyjZItktjWXVcl6jaHB8YabHHqszs9DIaf4n
# lcKovBKxula8ckMgvm48wCwTtS7VEPeuC5FrOqUqTtuhg+QKp5ZVoyVFHtf6GKTD
# iuXzCd4yxu4fDKAthJJj4N1bQaOmCKU7K9N/665wj9P2TyfmwlBAfNwNAlYbdX1E
# Sv7eSioQs2+oUxmfD/PUsF7wTYtDCrSAUFn1kP/XdRyXPJR3dHGiBKV9w9CaWNrU
# y8rqOhxVcuoBLRljTF32BK4HniAREjRngtpT2FnQQIyedZrXIwyTAWjs+LW12T6O
# NMiU603Nl9ZYhO1et2+qspsVpNIfEpQWpK+OCon6E+ggj1ea+pfqU30VPx4JU05I
# VLiydluIbehSkRlTHgFcTgApmx843OGW7CvWfRyen86Cexgx3DEjJUQ4/bYqaCha
# yLIi91rToSDmtlzJrg9eYiMs5Y6vz+ORvvX5im1RlbUUb7Kx/LaA4BU/uArEbBt8
# xXm/grO4hFUGqtLgd2LIjWaHSsLoW4jKeEiExFUUfvH5DG9Zl5HmzFwu+DYxX+im
# MJLLetDJAWI=
# =8tc0
# -----END PGP SIGNATURE-----
# gpg: Signature made Thu 08 Feb 2024 08:38:32 GMT
# gpg:                using RSA key DC3DEB159A9AF95D3D7456FE7F09B272C88F2FD6
# gpg:                issuer "kwolf@redhat.com"
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>" [full]
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74  56FE 7F09 B272 C88F 2FD6

* tag 'for-upstream' of https://repo.or.cz/qemu/kevin:
  virtio-blk: avoid using ioeventfd state in irqfd conditional
  virtio-blk: Use ioeventfd_attach in start_ioeventfd
  virtio: Re-enable notifications after drain
  virtio-scsi: Attach event vq notifier with no_poll
  blkio: Respect memory-alignment for bounce buffer allocations
  scsi: Don't ignore most usb-storage properties
  virtio-blk: do not use C99 mixed declarations
  iotests: give tempdir an identifying name
  iotests: fix leak of tmpdir in dry-run mode
  scsi: Await request purging
  block-backend: Allow concurrent context changes
  monitor: use aio_co_reschedule_self()
  virtio-blk: declare VirtIOBlock::rq with a type
  virtio-blk: add vq_rq[] bounds check in virtio_blk_dma_restart_cb()
  virtio-blk: clarify that there is at least 1 virtqueue
  virtio-blk: enforce iothread-vq-mapping validation

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Peter Maydell 1 year ago
parent
commit
e2beaf7bad

+ 3 - 0
block/blkio.c

@@ -89,6 +89,9 @@ static int blkio_resize_bounce_pool(BDRVBlkioState *s, int64_t bytes)
     /* Pad size to reduce frequency of resize calls */
     /* Pad size to reduce frequency of resize calls */
     bytes += 128 * 1024;
     bytes += 128 * 1024;
 
 
+    /* Align the pool size to avoid blkio_alloc_mem_region() failure */
+    bytes = QEMU_ALIGN_UP(bytes, s->mem_region_alignment);
+
     WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
     WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
         int ret;
         int ret;
 
 

+ 11 - 11
block/block-backend.c

@@ -44,7 +44,7 @@ struct BlockBackend {
     char *name;
     char *name;
     int refcnt;
     int refcnt;
     BdrvChild *root;
     BdrvChild *root;
-    AioContext *ctx;
+    AioContext *ctx; /* access with atomic operations only */
     DriveInfo *legacy_dinfo;    /* null unless created by drive_new() */
     DriveInfo *legacy_dinfo;    /* null unless created by drive_new() */
     QTAILQ_ENTRY(BlockBackend) link;         /* for block_backends */
     QTAILQ_ENTRY(BlockBackend) link;         /* for block_backends */
     QTAILQ_ENTRY(BlockBackend) monitor_link; /* for monitor_block_backends */
     QTAILQ_ENTRY(BlockBackend) monitor_link; /* for monitor_block_backends */
@@ -2414,22 +2414,22 @@ void blk_op_unblock_all(BlockBackend *blk, Error *reason)
     }
     }
 }
 }
 
 
+/**
+ * Return BB's current AioContext.  Note that this context may change
+ * concurrently at any time, with one exception: If the BB has a root node
+ * attached, its context will only change through bdrv_try_change_aio_context(),
+ * which creates a drained section.  Therefore, incrementing such a BB's
+ * in-flight counter will prevent its context from changing.
+ */
 AioContext *blk_get_aio_context(BlockBackend *blk)
 AioContext *blk_get_aio_context(BlockBackend *blk)
 {
 {
-    BlockDriverState *bs;
     IO_CODE();
     IO_CODE();
 
 
     if (!blk) {
     if (!blk) {
         return qemu_get_aio_context();
         return qemu_get_aio_context();
     }
     }
 
 
-    bs = blk_bs(blk);
-    if (bs) {
-        AioContext *ctx = bdrv_get_aio_context(blk_bs(blk));
-        assert(ctx == blk->ctx);
-    }
-
-    return blk->ctx;
+    return qatomic_read(&blk->ctx);
 }
 }
 
 
 int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
 int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
@@ -2442,7 +2442,7 @@ int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
     GLOBAL_STATE_CODE();
     GLOBAL_STATE_CODE();
 
 
     if (!bs) {
     if (!bs) {
-        blk->ctx = new_context;
+        qatomic_set(&blk->ctx, new_context);
         return 0;
         return 0;
     }
     }
 
 
@@ -2471,7 +2471,7 @@ static void blk_root_set_aio_ctx_commit(void *opaque)
     AioContext *new_context = s->new_ctx;
     AioContext *new_context = s->new_ctx;
     ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
     ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
 
 
-    blk->ctx = new_context;
+    qatomic_set(&blk->ctx, new_context);
     if (tgm->throttle_state) {
     if (tgm->throttle_state) {
         throttle_group_detach_aio_context(tgm);
         throttle_group_detach_aio_context(tgm);
         throttle_group_attach_aio_context(tgm, new_context);
         throttle_group_attach_aio_context(tgm, new_context);

+ 126 - 100
hw/block/virtio-blk.c

@@ -37,6 +37,8 @@
 #include "hw/virtio/virtio-blk-common.h"
 #include "hw/virtio/virtio-blk-common.h"
 #include "qemu/coroutine.h"
 #include "qemu/coroutine.h"
 
 
+static void virtio_blk_ioeventfd_attach(VirtIOBlock *s);
+
 static void virtio_blk_init_request(VirtIOBlock *s, VirtQueue *vq,
 static void virtio_blk_init_request(VirtIOBlock *s, VirtQueue *vq,
                                     VirtIOBlockReq *req)
                                     VirtIOBlockReq *req)
 {
 {
@@ -64,7 +66,7 @@ static void virtio_blk_req_complete(VirtIOBlockReq *req, unsigned char status)
     iov_discard_undo(&req->inhdr_undo);
     iov_discard_undo(&req->inhdr_undo);
     iov_discard_undo(&req->outhdr_undo);
     iov_discard_undo(&req->outhdr_undo);
     virtqueue_push(req->vq, &req->elem, req->in_len);
     virtqueue_push(req->vq, &req->elem, req->in_len);
-    if (s->ioeventfd_started && !s->ioeventfd_disabled) {
+    if (qemu_in_iothread()) {
         virtio_notify_irqfd(vdev, req->vq);
         virtio_notify_irqfd(vdev, req->vq);
     } else {
     } else {
         virtio_notify(vdev, req->vq);
         virtio_notify(vdev, req->vq);
@@ -661,6 +663,9 @@ static void virtio_blk_zone_report_complete(void *opaque, int ret)
     int64_t zrp_size, n, j = 0;
     int64_t zrp_size, n, j = 0;
     int64_t nz = data->zone_report_data.nr_zones;
     int64_t nz = data->zone_report_data.nr_zones;
     int8_t err_status = VIRTIO_BLK_S_OK;
     int8_t err_status = VIRTIO_BLK_S_OK;
+    struct virtio_blk_zone_report zrp_hdr = (struct virtio_blk_zone_report) {
+        .nr_zones = cpu_to_le64(nz),
+    };
 
 
     trace_virtio_blk_zone_report_complete(vdev, req, nz, ret);
     trace_virtio_blk_zone_report_complete(vdev, req, nz, ret);
     if (ret) {
     if (ret) {
@@ -668,9 +673,6 @@ static void virtio_blk_zone_report_complete(void *opaque, int ret)
         goto out;
         goto out;
     }
     }
 
 
-    struct virtio_blk_zone_report zrp_hdr = (struct virtio_blk_zone_report) {
-        .nr_zones = cpu_to_le64(nz),
-    };
     zrp_size = sizeof(struct virtio_blk_zone_report)
     zrp_size = sizeof(struct virtio_blk_zone_report)
                + sizeof(struct virtio_blk_zone_descriptor) * nz;
                + sizeof(struct virtio_blk_zone_descriptor) * nz;
     n = iov_from_buf(in_iov, in_num, 0, &zrp_hdr, sizeof(zrp_hdr));
     n = iov_from_buf(in_iov, in_num, 0, &zrp_hdr, sizeof(zrp_hdr));
@@ -898,13 +900,14 @@ static int virtio_blk_handle_zone_append(VirtIOBlockReq *req,
 
 
     int64_t offset = virtio_ldq_p(vdev, &req->out.sector) << BDRV_SECTOR_BITS;
     int64_t offset = virtio_ldq_p(vdev, &req->out.sector) << BDRV_SECTOR_BITS;
     int64_t len = iov_size(out_iov, out_num);
     int64_t len = iov_size(out_iov, out_num);
+    ZoneCmdData *data;
 
 
     trace_virtio_blk_handle_zone_append(vdev, req, offset >> BDRV_SECTOR_BITS);
     trace_virtio_blk_handle_zone_append(vdev, req, offset >> BDRV_SECTOR_BITS);
     if (!check_zoned_request(s, offset, len, true, &err_status)) {
     if (!check_zoned_request(s, offset, len, true, &err_status)) {
         goto out;
         goto out;
     }
     }
 
 
-    ZoneCmdData *data = g_malloc(sizeof(ZoneCmdData));
+    data = g_malloc(sizeof(ZoneCmdData));
     data->req = req;
     data->req = req;
     data->in_iov = in_iov;
     data->in_iov = in_iov;
     data->in_num = in_num;
     data->in_num = in_num;
@@ -1191,14 +1194,15 @@ static void virtio_blk_dma_restart_cb(void *opaque, bool running,
 {
 {
     VirtIOBlock *s = opaque;
     VirtIOBlock *s = opaque;
     uint16_t num_queues = s->conf.num_queues;
     uint16_t num_queues = s->conf.num_queues;
+    g_autofree VirtIOBlockReq **vq_rq = NULL;
+    VirtIOBlockReq *rq;
 
 
     if (!running) {
     if (!running) {
         return;
         return;
     }
     }
 
 
     /* Split the device-wide s->rq request list into per-vq request lists */
     /* Split the device-wide s->rq request list into per-vq request lists */
-    g_autofree VirtIOBlockReq **vq_rq = g_new0(VirtIOBlockReq *, num_queues);
-    VirtIOBlockReq *rq;
+    vq_rq = g_new0(VirtIOBlockReq *, num_queues);
 
 
     WITH_QEMU_LOCK_GUARD(&s->rq_lock) {
     WITH_QEMU_LOCK_GUARD(&s->rq_lock) {
         rq = s->rq;
         rq = s->rq;
@@ -1209,6 +1213,8 @@ static void virtio_blk_dma_restart_cb(void *opaque, bool running,
         VirtIOBlockReq *next = rq->next;
         VirtIOBlockReq *next = rq->next;
         uint16_t idx = virtio_get_queue_index(rq->vq);
         uint16_t idx = virtio_get_queue_index(rq->vq);
 
 
+        /* Only num_queues vqs were created so vq_rq[idx] is within bounds */
+        assert(idx < num_queues);
         rq->next = vq_rq[idx];
         rq->next = vq_rq[idx];
         vq_rq[idx] = rq;
         vq_rq[idx] = rq;
         rq = next;
         rq = next;
@@ -1485,68 +1491,6 @@ static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f,
     return 0;
     return 0;
 }
 }
 
 
-static bool
-validate_iothread_vq_mapping_list(IOThreadVirtQueueMappingList *list,
-        uint16_t num_queues, Error **errp)
-{
-    g_autofree unsigned long *vqs = bitmap_new(num_queues);
-    g_autoptr(GHashTable) iothreads =
-        g_hash_table_new(g_str_hash, g_str_equal);
-
-    for (IOThreadVirtQueueMappingList *node = list; node; node = node->next) {
-        const char *name = node->value->iothread;
-        uint16List *vq;
-
-        if (!iothread_by_id(name)) {
-            error_setg(errp, "IOThread \"%s\" object does not exist", name);
-            return false;
-        }
-
-        if (!g_hash_table_add(iothreads, (gpointer)name)) {
-            error_setg(errp,
-                    "duplicate IOThread name \"%s\" in iothread-vq-mapping",
-                    name);
-            return false;
-        }
-
-        if (node != list) {
-            if (!!node->value->vqs != !!list->value->vqs) {
-                error_setg(errp, "either all items in iothread-vq-mapping "
-                                 "must have vqs or none of them must have it");
-                return false;
-            }
-        }
-
-        for (vq = node->value->vqs; vq; vq = vq->next) {
-            if (vq->value >= num_queues) {
-                error_setg(errp, "vq index %u for IOThread \"%s\" must be "
-                        "less than num_queues %u in iothread-vq-mapping",
-                        vq->value, name, num_queues);
-                return false;
-            }
-
-            if (test_and_set_bit(vq->value, vqs)) {
-                error_setg(errp, "cannot assign vq %u to IOThread \"%s\" "
-                        "because it is already assigned", vq->value, name);
-                return false;
-            }
-        }
-    }
-
-    if (list->value->vqs) {
-        for (uint16_t i = 0; i < num_queues; i++) {
-            if (!test_bit(i, vqs)) {
-                error_setg(errp,
-                        "missing vq %u IOThread assignment in iothread-vq-mapping",
-                        i);
-                return false;
-            }
-        }
-    }
-
-    return true;
-}
-
 static void virtio_resize_cb(void *opaque)
 static void virtio_resize_cb(void *opaque)
 {
 {
     VirtIODevice *vdev = opaque;
     VirtIODevice *vdev = opaque;
@@ -1613,15 +1557,95 @@ static const BlockDevOps virtio_block_ops = {
     .drained_end   = virtio_blk_drained_end,
     .drained_end   = virtio_blk_drained_end,
 };
 };
 
 
-/* Generate vq:AioContext mappings from a validated iothread-vq-mapping list */
-static void
-apply_vq_mapping(IOThreadVirtQueueMappingList *iothread_vq_mapping_list,
-                 AioContext **vq_aio_context, uint16_t num_queues)
+static bool
+validate_iothread_vq_mapping_list(IOThreadVirtQueueMappingList *list,
+        uint16_t num_queues, Error **errp)
+{
+    g_autofree unsigned long *vqs = bitmap_new(num_queues);
+    g_autoptr(GHashTable) iothreads =
+        g_hash_table_new(g_str_hash, g_str_equal);
+
+    for (IOThreadVirtQueueMappingList *node = list; node; node = node->next) {
+        const char *name = node->value->iothread;
+        uint16List *vq;
+
+        if (!iothread_by_id(name)) {
+            error_setg(errp, "IOThread \"%s\" object does not exist", name);
+            return false;
+        }
+
+        if (!g_hash_table_add(iothreads, (gpointer)name)) {
+            error_setg(errp,
+                    "duplicate IOThread name \"%s\" in iothread-vq-mapping",
+                    name);
+            return false;
+        }
+
+        if (node != list) {
+            if (!!node->value->vqs != !!list->value->vqs) {
+                error_setg(errp, "either all items in iothread-vq-mapping "
+                                 "must have vqs or none of them must have it");
+                return false;
+            }
+        }
+
+        for (vq = node->value->vqs; vq; vq = vq->next) {
+            if (vq->value >= num_queues) {
+                error_setg(errp, "vq index %u for IOThread \"%s\" must be "
+                        "less than num_queues %u in iothread-vq-mapping",
+                        vq->value, name, num_queues);
+                return false;
+            }
+
+            if (test_and_set_bit(vq->value, vqs)) {
+                error_setg(errp, "cannot assign vq %u to IOThread \"%s\" "
+                        "because it is already assigned", vq->value, name);
+                return false;
+            }
+        }
+    }
+
+    if (list->value->vqs) {
+        for (uint16_t i = 0; i < num_queues; i++) {
+            if (!test_bit(i, vqs)) {
+                error_setg(errp,
+                        "missing vq %u IOThread assignment in iothread-vq-mapping",
+                        i);
+                return false;
+            }
+        }
+    }
+
+    return true;
+}
+
+/**
+ * apply_iothread_vq_mapping:
+ * @iothread_vq_mapping_list: The mapping of virtqueues to IOThreads.
+ * @vq_aio_context: The array of AioContext pointers to fill in.
+ * @num_queues: The length of @vq_aio_context.
+ * @errp: If an error occurs, a pointer to the area to store the error.
+ *
+ * Fill in the AioContext for each virtqueue in the @vq_aio_context array given
+ * the iothread-vq-mapping parameter in @iothread_vq_mapping_list.
+ *
+ * Returns: %true on success, %false on failure.
+ **/
+static bool apply_iothread_vq_mapping(
+        IOThreadVirtQueueMappingList *iothread_vq_mapping_list,
+        AioContext **vq_aio_context,
+        uint16_t num_queues,
+        Error **errp)
 {
 {
     IOThreadVirtQueueMappingList *node;
     IOThreadVirtQueueMappingList *node;
     size_t num_iothreads = 0;
     size_t num_iothreads = 0;
     size_t cur_iothread = 0;
     size_t cur_iothread = 0;
 
 
+    if (!validate_iothread_vq_mapping_list(iothread_vq_mapping_list,
+                                           num_queues, errp)) {
+        return false;
+    }
+
     for (node = iothread_vq_mapping_list; node; node = node->next) {
     for (node = iothread_vq_mapping_list; node; node = node->next) {
         num_iothreads++;
         num_iothreads++;
     }
     }
@@ -1638,6 +1662,7 @@ apply_vq_mapping(IOThreadVirtQueueMappingList *iothread_vq_mapping_list,
 
 
             /* Explicit vq:IOThread assignment */
             /* Explicit vq:IOThread assignment */
             for (vq = node->value->vqs; vq; vq = vq->next) {
             for (vq = node->value->vqs; vq; vq = vq->next) {
+                assert(vq->value < num_queues);
                 vq_aio_context[vq->value] = ctx;
                 vq_aio_context[vq->value] = ctx;
             }
             }
         } else {
         } else {
@@ -1650,6 +1675,8 @@ apply_vq_mapping(IOThreadVirtQueueMappingList *iothread_vq_mapping_list,
 
 
         cur_iothread++;
         cur_iothread++;
     }
     }
+
+    return true;
 }
 }
 
 
 /* Context: BQL held */
 /* Context: BQL held */
@@ -1660,6 +1687,13 @@ static bool virtio_blk_vq_aio_context_init(VirtIOBlock *s, Error **errp)
     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
 
 
+    if (conf->iothread && conf->iothread_vq_mapping_list) {
+        error_setg(errp,
+                   "iothread and iothread-vq-mapping properties cannot be set "
+                   "at the same time");
+        return false;
+    }
+
     if (conf->iothread || conf->iothread_vq_mapping_list) {
     if (conf->iothread || conf->iothread_vq_mapping_list) {
         if (!k->set_guest_notifiers || !k->ioeventfd_assign) {
         if (!k->set_guest_notifiers || !k->ioeventfd_assign) {
             error_setg(errp,
             error_setg(errp,
@@ -1685,8 +1719,14 @@ static bool virtio_blk_vq_aio_context_init(VirtIOBlock *s, Error **errp)
     s->vq_aio_context = g_new(AioContext *, conf->num_queues);
     s->vq_aio_context = g_new(AioContext *, conf->num_queues);
 
 
     if (conf->iothread_vq_mapping_list) {
     if (conf->iothread_vq_mapping_list) {
-        apply_vq_mapping(conf->iothread_vq_mapping_list, s->vq_aio_context,
-                         conf->num_queues);
+        if (!apply_iothread_vq_mapping(conf->iothread_vq_mapping_list,
+                                       s->vq_aio_context,
+                                       conf->num_queues,
+                                       errp)) {
+            g_free(s->vq_aio_context);
+            s->vq_aio_context = NULL;
+            return false;
+        }
     } else if (conf->iothread) {
     } else if (conf->iothread) {
         AioContext *ctx = iothread_get_aio_context(conf->iothread);
         AioContext *ctx = iothread_get_aio_context(conf->iothread);
         for (unsigned i = 0; i < conf->num_queues; i++) {
         for (unsigned i = 0; i < conf->num_queues; i++) {
@@ -1790,6 +1830,7 @@ static int virtio_blk_start_ioeventfd(VirtIODevice *vdev)
      * Try to change the AioContext so that block jobs and other operations can
      * Try to change the AioContext so that block jobs and other operations can
      * co-locate their activity in the same AioContext. If it fails, nevermind.
      * co-locate their activity in the same AioContext. If it fails, nevermind.
      */
      */
+    assert(nvqs > 0); /* enforced during ->realize() */
     r = blk_set_aio_context(s->conf.conf.blk, s->vq_aio_context[0],
     r = blk_set_aio_context(s->conf.conf.blk, s->vq_aio_context[0],
                             &local_err);
                             &local_err);
     if (r < 0) {
     if (r < 0) {
@@ -1808,17 +1849,14 @@ static int virtio_blk_start_ioeventfd(VirtIODevice *vdev)
     s->ioeventfd_started = true;
     s->ioeventfd_started = true;
     smp_wmb(); /* paired with aio_notify_accept() on the read side */
     smp_wmb(); /* paired with aio_notify_accept() on the read side */
 
 
-    /* Get this show started by hooking up our callbacks */
-    for (i = 0; i < nvqs; i++) {
-        VirtQueue *vq = virtio_get_queue(vdev, i);
-        AioContext *ctx = s->vq_aio_context[i];
-
-        /* Kick right away to begin processing requests already in vring */
-        event_notifier_set(virtio_queue_get_host_notifier(vq));
-
-        if (!blk_in_drain(s->conf.conf.blk)) {
-            virtio_queue_aio_attach_host_notifier(vq, ctx);
-        }
+    /*
+     * Get this show started by hooking up our callbacks.  If drained now,
+     * virtio_blk_drained_end() will do this later.
+     * Attaching the notifier also kicks the virtqueues, processing any requests
+     * they may already have.
+     */
+    if (!blk_in_drain(s->conf.conf.blk)) {
+        virtio_blk_ioeventfd_attach(s);
     }
     }
     return 0;
     return 0;
 
 
@@ -1924,6 +1962,7 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
     VirtIOBlock *s = VIRTIO_BLK(dev);
     VirtIOBlock *s = VIRTIO_BLK(dev);
     VirtIOBlkConf *conf = &s->conf;
     VirtIOBlkConf *conf = &s->conf;
+    BlockDriverState *bs;
     Error *err = NULL;
     Error *err = NULL;
     unsigned i;
     unsigned i;
 
 
@@ -1969,7 +2008,7 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
         return;
         return;
     }
     }
 
 
-    BlockDriverState *bs = blk_bs(conf->conf.blk);
+    bs = blk_bs(conf->conf.blk);
     if (bs->bl.zoned != BLK_Z_NONE) {
     if (bs->bl.zoned != BLK_Z_NONE) {
         virtio_add_feature(&s->host_features, VIRTIO_BLK_F_ZONED);
         virtio_add_feature(&s->host_features, VIRTIO_BLK_F_ZONED);
         if (bs->bl.zoned == BLK_Z_HM) {
         if (bs->bl.zoned == BLK_Z_HM) {
@@ -1996,19 +2035,6 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
         return;
         return;
     }
     }
 
 
-    if (conf->iothread_vq_mapping_list) {
-        if (conf->iothread) {
-            error_setg(errp, "iothread and iothread-vq-mapping properties "
-                             "cannot be set at the same time");
-            return;
-        }
-
-        if (!validate_iothread_vq_mapping_list(conf->iothread_vq_mapping_list,
-                                               conf->num_queues, errp)) {
-            return;
-        }
-    }
-
     s->config_size = virtio_get_config_size(&virtio_blk_cfg_size_params,
     s->config_size = virtio_get_config_size(&virtio_blk_cfg_size_params,
                                             s->host_features);
                                             s->host_features);
     virtio_init(vdev, VIRTIO_ID_BLOCK, s->config_size);
     virtio_init(vdev, VIRTIO_ID_BLOCK, s->config_size);

+ 34 - 29
hw/scsi/scsi-bus.c

@@ -120,17 +120,13 @@ static void scsi_device_for_each_req_async_bh(void *opaque)
     SCSIRequest *next;
     SCSIRequest *next;
 
 
     /*
     /*
-     * If the AioContext changed before this BH was called then reschedule into
-     * the new AioContext before accessing ->requests. This can happen when
-     * scsi_device_for_each_req_async() is called and then the AioContext is
-     * changed before BHs are run.
+     * The BB cannot have changed contexts between this BH being scheduled and
+     * now: BBs' AioContexts, when they have a node attached, can only be
+     * changed via bdrv_try_change_aio_context(), in a drained section.  While
+     * we have the in-flight counter incremented, that drain must block.
      */
      */
     ctx = blk_get_aio_context(s->conf.blk);
     ctx = blk_get_aio_context(s->conf.blk);
-    if (ctx != qemu_get_current_aio_context()) {
-        aio_bh_schedule_oneshot(ctx, scsi_device_for_each_req_async_bh,
-                                g_steal_pointer(&data));
-        return;
-    }
+    assert(ctx == qemu_get_current_aio_context());
 
 
     QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) {
     QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) {
         data->fn(req, data->fn_opaque);
         data->fn(req, data->fn_opaque);
@@ -138,11 +134,16 @@ static void scsi_device_for_each_req_async_bh(void *opaque)
 
 
     /* Drop the reference taken by scsi_device_for_each_req_async() */
     /* Drop the reference taken by scsi_device_for_each_req_async() */
     object_unref(OBJECT(s));
     object_unref(OBJECT(s));
+
+    /* Paired with blk_inc_in_flight() in scsi_device_for_each_req_async() */
+    blk_dec_in_flight(s->conf.blk);
 }
 }
 
 
 /*
 /*
  * Schedule @fn() to be invoked for each enqueued request in device @s. @fn()
  * Schedule @fn() to be invoked for each enqueued request in device @s. @fn()
  * runs in the AioContext that is executing the request.
  * runs in the AioContext that is executing the request.
+ * Keeps the BlockBackend's in-flight counter incremented until everything is
+ * done, so draining it will settle all scheduled @fn() calls.
  */
  */
 static void scsi_device_for_each_req_async(SCSIDevice *s,
 static void scsi_device_for_each_req_async(SCSIDevice *s,
                                            void (*fn)(SCSIRequest *, void *),
                                            void (*fn)(SCSIRequest *, void *),
@@ -163,6 +164,8 @@ static void scsi_device_for_each_req_async(SCSIDevice *s,
      */
      */
     object_ref(OBJECT(s));
     object_ref(OBJECT(s));
 
 
+    /* Paired with blk_dec_in_flight() in scsi_device_for_each_req_async_bh() */
+    blk_inc_in_flight(s->conf.blk);
     aio_bh_schedule_oneshot(blk_get_aio_context(s->conf.blk),
     aio_bh_schedule_oneshot(blk_get_aio_context(s->conf.blk),
                             scsi_device_for_each_req_async_bh,
                             scsi_device_for_each_req_async_bh,
                             data);
                             data);
@@ -373,15 +376,13 @@ static void scsi_qdev_unrealize(DeviceState *qdev)
 
 
 /* handle legacy '-drive if=scsi,...' cmd line args */
 /* handle legacy '-drive if=scsi,...' cmd line args */
 SCSIDevice *scsi_bus_legacy_add_drive(SCSIBus *bus, BlockBackend *blk,
 SCSIDevice *scsi_bus_legacy_add_drive(SCSIBus *bus, BlockBackend *blk,
-                                      int unit, bool removable, int bootindex,
-                                      bool share_rw,
-                                      BlockdevOnError rerror,
-                                      BlockdevOnError werror,
+                                      int unit, bool removable, BlockConf *conf,
                                       const char *serial, Error **errp)
                                       const char *serial, Error **errp)
 {
 {
     const char *driver;
     const char *driver;
     char *name;
     char *name;
     DeviceState *dev;
     DeviceState *dev;
+    SCSIDevice *s;
     DriveInfo *dinfo;
     DriveInfo *dinfo;
 
 
     if (blk_is_sg(blk)) {
     if (blk_is_sg(blk)) {
@@ -399,11 +400,10 @@ SCSIDevice *scsi_bus_legacy_add_drive(SCSIBus *bus, BlockBackend *blk,
     object_property_add_child(OBJECT(bus), name, OBJECT(dev));
     object_property_add_child(OBJECT(bus), name, OBJECT(dev));
     g_free(name);
     g_free(name);
 
 
+    s = SCSI_DEVICE(dev);
+    s->conf = *conf;
+
     qdev_prop_set_uint32(dev, "scsi-id", unit);
     qdev_prop_set_uint32(dev, "scsi-id", unit);
-    if (bootindex >= 0) {
-        object_property_set_int(OBJECT(dev), "bootindex", bootindex,
-                                &error_abort);
-    }
     if (object_property_find(OBJECT(dev), "removable")) {
     if (object_property_find(OBJECT(dev), "removable")) {
         qdev_prop_set_bit(dev, "removable", removable);
         qdev_prop_set_bit(dev, "removable", removable);
     }
     }
@@ -414,19 +414,12 @@ SCSIDevice *scsi_bus_legacy_add_drive(SCSIBus *bus, BlockBackend *blk,
         object_unparent(OBJECT(dev));
         object_unparent(OBJECT(dev));
         return NULL;
         return NULL;
     }
     }
-    if (!object_property_set_bool(OBJECT(dev), "share-rw", share_rw, errp)) {
-        object_unparent(OBJECT(dev));
-        return NULL;
-    }
-
-    qdev_prop_set_enum(dev, "rerror", rerror);
-    qdev_prop_set_enum(dev, "werror", werror);
 
 
     if (!qdev_realize_and_unref(dev, &bus->qbus, errp)) {
     if (!qdev_realize_and_unref(dev, &bus->qbus, errp)) {
         object_unparent(OBJECT(dev));
         object_unparent(OBJECT(dev));
         return NULL;
         return NULL;
     }
     }
-    return SCSI_DEVICE(dev);
+    return s;
 }
 }
 
 
 void scsi_bus_legacy_handle_cmdline(SCSIBus *bus)
 void scsi_bus_legacy_handle_cmdline(SCSIBus *bus)
@@ -434,6 +427,12 @@ void scsi_bus_legacy_handle_cmdline(SCSIBus *bus)
     Location loc;
     Location loc;
     DriveInfo *dinfo;
     DriveInfo *dinfo;
     int unit;
     int unit;
+    BlockConf conf = {
+        .bootindex = -1,
+        .share_rw = false,
+        .rerror = BLOCKDEV_ON_ERROR_AUTO,
+        .werror = BLOCKDEV_ON_ERROR_AUTO,
+    };
 
 
     loc_push_none(&loc);
     loc_push_none(&loc);
     for (unit = 0; unit <= bus->info->max_target; unit++) {
     for (unit = 0; unit <= bus->info->max_target; unit++) {
@@ -443,10 +442,7 @@ void scsi_bus_legacy_handle_cmdline(SCSIBus *bus)
         }
         }
         qemu_opts_loc_restore(dinfo->opts);
         qemu_opts_loc_restore(dinfo->opts);
         scsi_bus_legacy_add_drive(bus, blk_by_legacy_dinfo(dinfo),
         scsi_bus_legacy_add_drive(bus, blk_by_legacy_dinfo(dinfo),
-                                  unit, false, -1, false,
-                                  BLOCKDEV_ON_ERROR_AUTO,
-                                  BLOCKDEV_ON_ERROR_AUTO,
-                                  NULL, &error_fatal);
+                                  unit, false, &conf, NULL, &error_fatal);
     }
     }
     loc_pop(&loc);
     loc_pop(&loc);
 }
 }
@@ -1728,11 +1724,20 @@ static void scsi_device_purge_one_req(SCSIRequest *req, void *opaque)
     scsi_req_cancel_async(req, NULL);
     scsi_req_cancel_async(req, NULL);
 }
 }
 
 
+/**
+ * Cancel all requests, and block until they are deleted.
+ */
 void scsi_device_purge_requests(SCSIDevice *sdev, SCSISense sense)
 void scsi_device_purge_requests(SCSIDevice *sdev, SCSISense sense)
 {
 {
     scsi_device_for_each_req_async(sdev, scsi_device_purge_one_req, NULL);
     scsi_device_for_each_req_async(sdev, scsi_device_purge_one_req, NULL);
 
 
+    /*
+     * Await all the scsi_device_purge_one_req() calls scheduled by
+     * scsi_device_for_each_req_async(), and all I/O requests that were
+     * cancelled this way, but may still take a bit of time to settle.
+     */
     blk_drain(sdev->conf.blk);
     blk_drain(sdev->conf.blk);
+
     scsi_device_set_ua(sdev, sense);
     scsi_device_set_ua(sdev, sense);
 }
 }
 
 

+ 6 - 1
hw/scsi/virtio-scsi.c

@@ -1149,6 +1149,7 @@ static void virtio_scsi_drained_begin(SCSIBus *bus)
 static void virtio_scsi_drained_end(SCSIBus *bus)
 static void virtio_scsi_drained_end(SCSIBus *bus)
 {
 {
     VirtIOSCSI *s = container_of(bus, VirtIOSCSI, bus);
     VirtIOSCSI *s = container_of(bus, VirtIOSCSI, bus);
+    VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s);
     VirtIODevice *vdev = VIRTIO_DEVICE(s);
     VirtIODevice *vdev = VIRTIO_DEVICE(s);
     uint32_t total_queues = VIRTIO_SCSI_VQ_NUM_FIXED +
     uint32_t total_queues = VIRTIO_SCSI_VQ_NUM_FIXED +
                             s->parent_obj.conf.num_queues;
                             s->parent_obj.conf.num_queues;
@@ -1166,7 +1167,11 @@ static void virtio_scsi_drained_end(SCSIBus *bus)
 
 
     for (uint32_t i = 0; i < total_queues; i++) {
     for (uint32_t i = 0; i < total_queues; i++) {
         VirtQueue *vq = virtio_get_queue(vdev, i);
         VirtQueue *vq = virtio_get_queue(vdev, i);
-        virtio_queue_aio_attach_host_notifier(vq, s->ctx);
+        if (vq == vs->event_vq) {
+            virtio_queue_aio_attach_host_notifier_no_poll(vq, s->ctx);
+        } else {
+            virtio_queue_aio_attach_host_notifier(vq, s->ctx);
+        }
     }
     }
 }
 }
 
 

+ 1 - 4
hw/usb/dev-storage-classic.c

@@ -67,10 +67,7 @@ static void usb_msd_storage_realize(USBDevice *dev, Error **errp)
     scsi_bus_init(&s->bus, sizeof(s->bus), DEVICE(dev),
     scsi_bus_init(&s->bus, sizeof(s->bus), DEVICE(dev),
                  &usb_msd_scsi_info_storage);
                  &usb_msd_scsi_info_storage);
     scsi_dev = scsi_bus_legacy_add_drive(&s->bus, blk, 0, !!s->removable,
     scsi_dev = scsi_bus_legacy_add_drive(&s->bus, blk, 0, !!s->removable,
-                                         s->conf.bootindex, s->conf.share_rw,
-                                         s->conf.rerror, s->conf.werror,
-                                         dev->serial,
-                                         errp);
+                                         &s->conf, dev->serial, errp);
     blk_unref(blk);
     blk_unref(blk);
     if (!scsi_dev) {
     if (!scsi_dev) {
         return;
         return;

+ 42 - 0
hw/virtio/virtio.c

@@ -3556,6 +3556,17 @@ static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n)
 
 
 void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx)
 void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx)
 {
 {
+    /*
+     * virtio_queue_aio_detach_host_notifier() can leave notifications disabled.
+     * Re-enable them.  (And if detach has not been used before, notifications
+     * being enabled is still the default state while a notifier is attached;
+     * see virtio_queue_host_notifier_aio_poll_end(), which will always leave
+     * notifications enabled once the polling section is left.)
+     */
+    if (!virtio_queue_get_notification(vq)) {
+        virtio_queue_set_notification(vq, 1);
+    }
+
     aio_set_event_notifier(ctx, &vq->host_notifier,
     aio_set_event_notifier(ctx, &vq->host_notifier,
                            virtio_queue_host_notifier_read,
                            virtio_queue_host_notifier_read,
                            virtio_queue_host_notifier_aio_poll,
                            virtio_queue_host_notifier_aio_poll,
@@ -3563,6 +3574,13 @@ void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx)
     aio_set_event_notifier_poll(ctx, &vq->host_notifier,
     aio_set_event_notifier_poll(ctx, &vq->host_notifier,
                                 virtio_queue_host_notifier_aio_poll_begin,
                                 virtio_queue_host_notifier_aio_poll_begin,
                                 virtio_queue_host_notifier_aio_poll_end);
                                 virtio_queue_host_notifier_aio_poll_end);
+
+    /*
+     * We will have ignored notifications about new requests from the guest
+     * while no notifiers were attached, so "kick" the virt queue to process
+     * those requests now.
+     */
+    event_notifier_set(&vq->host_notifier);
 }
 }
 
 
 /*
 /*
@@ -3573,14 +3591,38 @@ void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx)
  */
  */
 void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ctx)
 void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ctx)
 {
 {
+    /* See virtio_queue_aio_attach_host_notifier() */
+    if (!virtio_queue_get_notification(vq)) {
+        virtio_queue_set_notification(vq, 1);
+    }
+
     aio_set_event_notifier(ctx, &vq->host_notifier,
     aio_set_event_notifier(ctx, &vq->host_notifier,
                            virtio_queue_host_notifier_read,
                            virtio_queue_host_notifier_read,
                            NULL, NULL);
                            NULL, NULL);
+
+    /*
+     * See virtio_queue_aio_attach_host_notifier().
+     * Note that this may be unnecessary for the type of virtqueues this
+     * function is used for.  Still, it will not hurt to have a quick look into
+     * whether we can/should process any of the virtqueue elements.
+     */
+    event_notifier_set(&vq->host_notifier);
 }
 }
 
 
 void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx)
 void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx)
 {
 {
     aio_set_event_notifier(ctx, &vq->host_notifier, NULL, NULL, NULL);
     aio_set_event_notifier(ctx, &vq->host_notifier, NULL, NULL, NULL);
+
+    /*
+     * aio_set_event_notifier_poll() does not guarantee whether io_poll_end()
+     * will run after io_poll_begin(), so by removing the notifier, we do not
+     * know whether virtio_queue_host_notifier_aio_poll_end() has run after a
+     * previous virtio_queue_host_notifier_aio_poll_begin(), i.e. whether
+     * notifications are enabled or disabled.  It does not really matter anyway;
+     * we just removed the notifier, so we do not care about notifications until
+     * we potentially re-attach it.  The attach_host_notifier functions will
+     * ensure that notifications are enabled again when they are needed.
+     */
 }
 }
 
 
 void virtio_queue_host_notifier_read(EventNotifier *n)
 void virtio_queue_host_notifier_read(EventNotifier *n)

+ 6 - 1
include/block/aio.h

@@ -480,9 +480,14 @@ void aio_set_event_notifier(AioContext *ctx,
                             AioPollFn *io_poll,
                             AioPollFn *io_poll,
                             EventNotifierHandler *io_poll_ready);
                             EventNotifierHandler *io_poll_ready);
 
 
-/* Set polling begin/end callbacks for an event notifier that has already been
+/*
+ * Set polling begin/end callbacks for an event notifier that has already been
  * registered with aio_set_event_notifier.  Do nothing if the event notifier is
  * registered with aio_set_event_notifier.  Do nothing if the event notifier is
  * not registered.
  * not registered.
+ *
+ * Note that if the io_poll_end() callback (or the entire notifier) is removed
+ * during polling, it will not be called, so an io_poll_begin() is not
+ * necessarily always followed by an io_poll_end().
  */
  */
 void aio_set_event_notifier_poll(AioContext *ctx,
 void aio_set_event_notifier_poll(AioContext *ctx,
                                  EventNotifier *notifier,
                                  EventNotifier *notifier,

+ 1 - 4
include/hw/scsi/scsi.h

@@ -199,10 +199,7 @@ static inline SCSIBus *scsi_bus_from_device(SCSIDevice *d)
 }
 }
 
 
 SCSIDevice *scsi_bus_legacy_add_drive(SCSIBus *bus, BlockBackend *blk,
 SCSIDevice *scsi_bus_legacy_add_drive(SCSIBus *bus, BlockBackend *blk,
-                                      int unit, bool removable, int bootindex,
-                                      bool share_rw,
-                                      BlockdevOnError rerror,
-                                      BlockdevOnError werror,
+                                      int unit, bool removable, BlockConf *conf,
                                       const char *serial, Error **errp);
                                       const char *serial, Error **errp);
 void scsi_bus_set_ua(SCSIBus *bus, SCSISense sense);
 void scsi_bus_set_ua(SCSIBus *bus, SCSISense sense);
 void scsi_bus_legacy_handle_cmdline(SCSIBus *bus);
 void scsi_bus_legacy_handle_cmdline(SCSIBus *bus);

+ 1 - 1
include/hw/virtio/virtio-blk.h

@@ -55,7 +55,7 @@ struct VirtIOBlock {
     VirtIODevice parent_obj;
     VirtIODevice parent_obj;
     BlockBackend *blk;
     BlockBackend *blk;
     QemuMutex rq_lock;
     QemuMutex rq_lock;
-    void *rq; /* protected by rq_lock */
+    struct VirtIOBlockReq *rq; /* protected by rq_lock */
     VirtIOBlkConf conf;
     VirtIOBlkConf conf;
     unsigned short sector_mask;
     unsigned short sector_mask;
     bool original_wce;
     bool original_wce;

+ 2 - 5
qapi/qmp-dispatch.c

@@ -212,8 +212,7 @@ QDict *coroutine_mixed_fn qmp_dispatch(const QmpCommandList *cmds, QObject *requ
              * executing the command handler so that it can make progress if it
              * executing the command handler so that it can make progress if it
              * involves an AIO_WAIT_WHILE().
              * involves an AIO_WAIT_WHILE().
              */
              */
-            aio_co_schedule(qemu_get_aio_context(), qemu_coroutine_self());
-            qemu_coroutine_yield();
+            aio_co_reschedule_self(qemu_get_aio_context());
         }
         }
 
 
         monitor_set_cur(qemu_coroutine_self(), cur_mon);
         monitor_set_cur(qemu_coroutine_self(), cur_mon);
@@ -227,9 +226,7 @@ QDict *coroutine_mixed_fn qmp_dispatch(const QmpCommandList *cmds, QObject *requ
              * Move back to iohandler_ctx so that nested event loops for
              * Move back to iohandler_ctx so that nested event loops for
              * qemu_aio_context don't start new monitor commands.
              * qemu_aio_context don't start new monitor commands.
              */
              */
-            aio_co_schedule(iohandler_get_aio_context(),
-                            qemu_coroutine_self());
-            qemu_coroutine_yield();
+            aio_co_reschedule_self(iohandler_get_aio_context());
         }
         }
     } else {
     } else {
        /*
        /*

+ 2 - 1
tests/qemu-iotests/check

@@ -184,7 +184,8 @@ if __name__ == '__main__':
         sys.exit(str(e))
         sys.exit(str(e))
 
 
     if args.dry_run:
     if args.dry_run:
-        print('\n'.join([os.path.basename(t) for t in tests]))
+        with env:
+            print('\n'.join([os.path.basename(t) for t in tests]))
     else:
     else:
         with TestRunner(env, tap=args.tap,
         with TestRunner(env, tap=args.tap,
                         color=args.color) as tr:
                         color=args.color) as tr:

+ 1 - 1
tests/qemu-iotests/testenv.py

@@ -126,7 +126,7 @@ def init_directories(self) -> None:
             self.tmp_sock_dir = False
             self.tmp_sock_dir = False
             Path(self.sock_dir).mkdir(parents=True, exist_ok=True)
             Path(self.sock_dir).mkdir(parents=True, exist_ok=True)
         except KeyError:
         except KeyError:
-            self.sock_dir = tempfile.mkdtemp()
+            self.sock_dir = tempfile.mkdtemp(prefix="qemu-iotests-")
             self.tmp_sock_dir = True
             self.tmp_sock_dir = True
 
 
         self.sample_img_dir = os.getenv('SAMPLE_IMG_DIR',
         self.sample_img_dir = os.getenv('SAMPLE_IMG_DIR',