1 年之前 · 6c9ae1ce82
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2755,12 +2755,13 @@ S: Supported
 
				 F: util/async.c
			
 
				 F: util/aio-*.c
			
 
				 F: util/aio-*.h
			
 
				+F: util/defer-call.c
			
 
				 F: util/fdmon-*.c
			
 
				 F: block/io.c
			
 
				-F: block/plug.c
			
 
				 F: migration/block*
			
 
				 F: include/block/aio.h
			
 
				 F: include/block/aio-wait.h
			
 
				+F: include/qemu/defer-call.h
			
 
				 F: scripts/qemugdb/aio.py
			
 
				 F: tests/unit/test-fdmon-epoll.c
			
 
				 T: git https://github.com/stefanha/qemu.git block
			
--- a/block.c
+++ b/block.c
@@ -5200,7 +5200,7 @@ static void bdrv_close(BlockDriverState *bs)
 
				         bs->drv = NULL;
			
 
				     }
			
 
				 
			
 
				-    bdrv_graph_wrlock(NULL);
			
 
				+    bdrv_graph_wrlock(bs);
			
 
				     QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
			
 
				         bdrv_unref_child(bs, child);
			
 
				     }
			
--- a/block/blkio.c
+++ b/block/blkio.c
@@ -13,6 +13,7 @@
 
				 #include "block/block_int.h"
			
 
				 #include "exec/memory.h"
			
 
				 #include "exec/cpu-common.h" /* for qemu_ram_get_fd() */
			
 
				+#include "qemu/defer-call.h"
			
 
				 #include "qapi/error.h"
			
 
				 #include "qemu/error-report.h"
			
 
				 #include "qapi/qmp/qdict.h"
			
@@ -312,10 +313,10 @@ static void blkio_detach_aio_context(BlockDriverState *bs)
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * Called by blk_io_unplug() or immediately if not plugged. Called without
			
 
				- * blkio_lock.
			
 
				+ * Called by defer_call_end() or immediately if not in a deferred section.
			
 
				+ * Called without blkio_lock.
			
 
				  */
			
 
				-static void blkio_unplug_fn(void *opaque)
			
 
				+static void blkio_deferred_fn(void *opaque)
			
 
				 {
			
 
				     BDRVBlkioState *s = opaque;
			
 
				 
			
@@ -332,7 +333,7 @@ static void blkio_submit_io(BlockDriverState *bs)
 
				 {
			
 
				     BDRVBlkioState *s = bs->opaque;
			
 
				 
			
 
				-    blk_io_plug_call(blkio_unplug_fn, s);
			
 
				+    defer_call(blkio_deferred_fn, s);
			
 
				 }
			
 
				 
			
 
				 static int coroutine_fn
			
--- a/block/io_uring.c
+++ b/block/io_uring.c
@@ -15,6 +15,7 @@
 
				 #include "block/block.h"
			
 
				 #include "block/raw-aio.h"
			
 
				 #include "qemu/coroutine.h"
			
 
				+#include "qemu/defer-call.h"
			
 
				 #include "qapi/error.h"
			
 
				 #include "sysemu/block-backend.h"
			
 
				 #include "trace.h"
			
@@ -124,6 +125,9 @@ static void luring_process_completions(LuringState *s)
 
				 {
			
 
				     struct io_uring_cqe *cqes;
			
 
				     int total_bytes;
			
 
				+
			
 
				+    defer_call_begin();
			
 
				+
			
 
				     /*
			
 
				      * Request completion callbacks can run the nested event loop.
			
 
				      * Schedule ourselves so the nested event loop will "see" remaining
			
@@ -216,7 +220,10 @@ end:
 
				             aio_co_wake(luringcb->co);
			
 
				         }
			
 
				     }
			
 
				+
			
 
				     qemu_bh_cancel(s->completion_bh);
			
 
				+
			
 
				+    defer_call_end();
			
 
				 }
			
 
				 
			
 
				 static int ioq_submit(LuringState *s)
			
@@ -306,7 +313,7 @@ static void ioq_init(LuringQueue *io_q)
 
				     io_q->blocked = false;
			
 
				 }
			
 
				 
			
 
				-static void luring_unplug_fn(void *opaque)
			
 
				+static void luring_deferred_fn(void *opaque)
			
 
				 {
			
 
				     LuringState *s = opaque;
			
 
				     trace_luring_unplug_fn(s, s->io_q.blocked, s->io_q.in_queue,
			
@@ -367,7 +374,7 @@ static int luring_do_submit(int fd, LuringAIOCB *luringcb, LuringState *s,
 
				             return ret;
			
 
				         }
			
 
				 
			
 
				-        blk_io_plug_call(luring_unplug_fn, s);
			
 
				+        defer_call(luring_deferred_fn, s);
			
 
				     }
			
 
				     return 0;
			
 
				 }
			
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -14,6 +14,7 @@
 
				 #include "block/raw-aio.h"
			
 
				 #include "qemu/event_notifier.h"
			
 
				 #include "qemu/coroutine.h"
			
 
				+#include "qemu/defer-call.h"
			
 
				 #include "qapi/error.h"
			
 
				 #include "sysemu/block-backend.h"
			
 
				 
			
@@ -204,6 +205,8 @@ static void qemu_laio_process_completions(LinuxAioState *s)
 
				 {
			
 
				     struct io_event *events;
			
 
				 
			
 
				+    defer_call_begin();
			
 
				+
			
 
				     /* Reschedule so nested event loops see currently pending completions */
			
 
				     qemu_bh_schedule(s->completion_bh);
			
 
				 
			
@@ -230,6 +233,8 @@ static void qemu_laio_process_completions(LinuxAioState *s)
 
				      * own `for` loop.  If we are the last all counters dropped to zero. */
			
 
				     s->event_max = 0;
			
 
				     s->event_idx = 0;
			
 
				+
			
 
				+    defer_call_end();
			
 
				 }
			
 
				 
			
 
				 static void qemu_laio_process_completions_and_submit(LinuxAioState *s)
			
@@ -353,7 +358,7 @@ static uint64_t laio_max_batch(LinuxAioState *s, uint64_t dev_max_batch)
 
				     return max_batch;
			
 
				 }
			
 
				 
			
 
				-static void laio_unplug_fn(void *opaque)
			
 
				+static void laio_deferred_fn(void *opaque)
			
 
				 {
			
 
				     LinuxAioState *s = opaque;
			
 
				 
			
@@ -393,7 +398,7 @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
 
				         if (s->io_q.in_queue >= laio_max_batch(s, dev_max_batch)) {
			
 
				             ioq_submit(s);
			
 
				         } else {
			
 
				-            blk_io_plug_call(laio_unplug_fn, s);
			
 
				+            defer_call(laio_deferred_fn, s);
			
 
				         }
			
 
				     }
			
 
				 
			
--- a/block/meson.build
+++ b/block/meson.build
@@ -21,7 +21,6 @@ block_ss.add(files(
 
				   'mirror.c',
			
 
				   'nbd.c',
			
 
				   'null.c',
			
 
				-  'plug.c',
			
 
				   'preallocate.c',
			
 
				   'progress_meter.c',
			
 
				   'qapi.c',
			
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -55,10 +55,18 @@ typedef struct MirrorBlockJob {
 
				     BlockMirrorBackingMode backing_mode;
			
 
				     /* Whether the target image requires explicit zero-initialization */
			
 
				     bool zero_target;
			
 
				+    /*
			
 
				+     * To be accesssed with atomics. Written only under the BQL (required by the
			
 
				+     * current implementation of mirror_change()).
			
 
				+     */
			
 
				     MirrorCopyMode copy_mode;
			
 
				     BlockdevOnError on_source_error, on_target_error;
			
 
				-    /* Set when the target is synced (dirty bitmap is clean, nothing
			
 
				-     * in flight) and the job is running in active mode */
			
 
				+    /*
			
 
				+     * To be accessed with atomics.
			
 
				+     *
			
 
				+     * Set when the target is synced (dirty bitmap is clean, nothing in flight)
			
 
				+     * and the job is running in active mode.
			
 
				+     */
			
 
				     bool actively_synced;
			
 
				     bool should_complete;
			
 
				     int64_t granularity;
			
@@ -122,7 +130,7 @@ typedef enum MirrorMethod {
 
				 static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read,
			
 
				                                             int error)
			
 
				 {
			
 
				-    s->actively_synced = false;
			
 
				+    qatomic_set(&s->actively_synced, false);
			
 
				     if (read) {
			
 
				         return block_job_error_action(&s->common, s->on_source_error,
			
 
				                                       true, error);
			
@@ -962,7 +970,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
 
				     if (s->bdev_length == 0) {
			
 
				         /* Transition to the READY state and wait for complete. */
			
 
				         job_transition_to_ready(&s->common.job);
			
 
				-        s->actively_synced = true;
			
 
				+        qatomic_set(&s->actively_synced, true);
			
 
				         while (!job_cancel_requested(&s->common.job) && !s->should_complete) {
			
 
				             job_yield(&s->common.job);
			
 
				         }
			
@@ -1074,9 +1082,9 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
 
				                  * the target in a consistent state.
			
 
				                  */
			
 
				                 job_transition_to_ready(&s->common.job);
			
 
				-                if (s->copy_mode != MIRROR_COPY_MODE_BACKGROUND) {
			
 
				-                    s->actively_synced = true;
			
 
				-                }
			
 
				+            }
			
 
				+            if (qatomic_read(&s->copy_mode) != MIRROR_COPY_MODE_BACKGROUND) {
			
 
				+                qatomic_set(&s->actively_synced, true);
			
 
				             }
			
 
				 
			
 
				             should_complete = s->should_complete ||
			
@@ -1246,6 +1254,48 @@ static bool commit_active_cancel(Job *job, bool force)
 
				     return force || !job_is_ready(job);
			
 
				 }
			
 
				 
			
 
				+static void mirror_change(BlockJob *job, BlockJobChangeOptions *opts,
			
 
				+                          Error **errp)
			
 
				+{
			
 
				+    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
			
 
				+    BlockJobChangeOptionsMirror *change_opts = &opts->u.mirror;
			
 
				+    MirrorCopyMode current;
			
 
				+
			
 
				+    /*
			
 
				+     * The implementation relies on the fact that copy_mode is only written
			
 
				+     * under the BQL. Otherwise, further synchronization would be required.
			
 
				+     */
			
 
				+
			
 
				+    GLOBAL_STATE_CODE();
			
 
				+
			
 
				+    if (qatomic_read(&s->copy_mode) == change_opts->copy_mode) {
			
 
				+        return;
			
 
				+    }
			
 
				+
			
 
				+    if (change_opts->copy_mode != MIRROR_COPY_MODE_WRITE_BLOCKING) {
			
 
				+        error_setg(errp, "Change to copy mode '%s' is not implemented",
			
 
				+                   MirrorCopyMode_str(change_opts->copy_mode));
			
 
				+        return;
			
 
				+    }
			
 
				+
			
 
				+    current = qatomic_cmpxchg(&s->copy_mode, MIRROR_COPY_MODE_BACKGROUND,
			
 
				+                              change_opts->copy_mode);
			
 
				+    if (current != MIRROR_COPY_MODE_BACKGROUND) {
			
 
				+        error_setg(errp, "Expected current copy mode '%s', got '%s'",
			
 
				+                   MirrorCopyMode_str(MIRROR_COPY_MODE_BACKGROUND),
			
 
				+                   MirrorCopyMode_str(current));
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+static void mirror_query(BlockJob *job, BlockJobInfo *info)
			
 
				+{
			
 
				+    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
			
 
				+
			
 
				+    info->u.mirror = (BlockJobInfoMirror) {
			
 
				+        .actively_synced = qatomic_read(&s->actively_synced),
			
 
				+    };
			
 
				+}
			
 
				+
			
 
				 static const BlockJobDriver mirror_job_driver = {
			
 
				     .job_driver = {
			
 
				         .instance_size          = sizeof(MirrorBlockJob),
			
@@ -1260,6 +1310,8 @@ static const BlockJobDriver mirror_job_driver = {
 
				         .cancel                 = mirror_cancel,
			
 
				     },
			
 
				     .drained_poll           = mirror_drained_poll,
			
 
				+    .change                 = mirror_change,
			
 
				+    .query                  = mirror_query,
			
 
				 };
			
 
				 
			
 
				 static const BlockJobDriver commit_active_job_driver = {
			
@@ -1378,7 +1430,7 @@ do_sync_target_write(MirrorBlockJob *job, MirrorMethod method,
 
				         bitmap_end = QEMU_ALIGN_UP(offset + bytes, job->granularity);
			
 
				         bdrv_set_dirty_bitmap(job->dirty_bitmap, bitmap_offset,
			
 
				                               bitmap_end - bitmap_offset);
			
 
				-        job->actively_synced = false;
			
 
				+        qatomic_set(&job->actively_synced, false);
			
 
				 
			
 
				         action = mirror_error_action(job, false, -ret);
			
 
				         if (action == BLOCK_ERROR_ACTION_REPORT) {
			
@@ -1437,7 +1489,8 @@ static void coroutine_fn GRAPH_RDLOCK active_write_settle(MirrorOp *op)
 
				     uint64_t end_chunk = DIV_ROUND_UP(op->offset + op->bytes,
			
 
				                                       op->s->granularity);
			
 
				 
			
 
				-    if (!--op->s->in_active_write_counter && op->s->actively_synced) {
			
 
				+    if (!--op->s->in_active_write_counter &&
			
 
				+        qatomic_read(&op->s->actively_synced)) {
			
 
				         BdrvChild *source = op->s->mirror_top_bs->backing;
			
 
				 
			
 
				         if (QLIST_FIRST(&source->bs->parents) == source &&
			
@@ -1463,21 +1516,21 @@ bdrv_mirror_top_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
 
				     return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags);
			
 
				 }
			
 
				 
			
 
				+static bool should_copy_to_target(MirrorBDSOpaque *s)
			
 
				+{
			
 
				+    return s->job && s->job->ret >= 0 &&
			
 
				+        !job_is_cancelled(&s->job->common.job) &&
			
 
				+        qatomic_read(&s->job->copy_mode) == MIRROR_COPY_MODE_WRITE_BLOCKING;
			
 
				+}
			
 
				+
			
 
				 static int coroutine_fn GRAPH_RDLOCK
			
 
				 bdrv_mirror_top_do_write(BlockDriverState *bs, MirrorMethod method,
			
 
				-                         uint64_t offset, uint64_t bytes, QEMUIOVector *qiov,
			
 
				-                         int flags)
			
 
				+                         bool copy_to_target, uint64_t offset, uint64_t bytes,
			
 
				+                         QEMUIOVector *qiov, int flags)
			
 
				 {
			
 
				     MirrorOp *op = NULL;
			
 
				     MirrorBDSOpaque *s = bs->opaque;
			
 
				     int ret = 0;
			
 
				-    bool copy_to_target = false;
			
 
				-
			
 
				-    if (s->job) {
			
 
				-        copy_to_target = s->job->ret >= 0 &&
			
 
				-                         !job_is_cancelled(&s->job->common.job) &&
			
 
				-                         s->job->copy_mode == MIRROR_COPY_MODE_WRITE_BLOCKING;
			
 
				-    }
			
 
				 
			
 
				     if (copy_to_target) {
			
 
				         op = active_write_prepare(s->job, offset, bytes);
			
@@ -1500,6 +1553,11 @@ bdrv_mirror_top_do_write(BlockDriverState *bs, MirrorMethod method,
 
				         abort();
			
 
				     }
			
 
				 
			
 
				+    if (!copy_to_target && s->job && s->job->dirty_bitmap) {
			
 
				+        qatomic_set(&s->job->actively_synced, false);
			
 
				+        bdrv_set_dirty_bitmap(s->job->dirty_bitmap, offset, bytes);
			
 
				+    }
			
 
				+
			
 
				     if (ret < 0) {
			
 
				         goto out;
			
 
				     }
			
@@ -1519,17 +1577,10 @@ static int coroutine_fn GRAPH_RDLOCK
 
				 bdrv_mirror_top_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
			
 
				                         QEMUIOVector *qiov, BdrvRequestFlags flags)
			
 
				 {
			
 
				-    MirrorBDSOpaque *s = bs->opaque;
			
 
				     QEMUIOVector bounce_qiov;
			
 
				     void *bounce_buf;
			
 
				     int ret = 0;
			
 
				-    bool copy_to_target = false;
			
 
				-
			
 
				-    if (s->job) {
			
 
				-        copy_to_target = s->job->ret >= 0 &&
			
 
				-                         !job_is_cancelled(&s->job->common.job) &&
			
 
				-                         s->job->copy_mode == MIRROR_COPY_MODE_WRITE_BLOCKING;
			
 
				-    }
			
 
				+    bool copy_to_target = should_copy_to_target(bs->opaque);
			
 
				 
			
 
				     if (copy_to_target) {
			
 
				         /* The guest might concurrently modify the data to write; but
			
@@ -1546,8 +1597,8 @@ bdrv_mirror_top_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
 
				         flags &= ~BDRV_REQ_REGISTERED_BUF;
			
 
				     }
			
 
				 
			
 
				-    ret = bdrv_mirror_top_do_write(bs, MIRROR_METHOD_COPY, offset, bytes, qiov,
			
 
				-                                   flags);
			
 
				+    ret = bdrv_mirror_top_do_write(bs, MIRROR_METHOD_COPY, copy_to_target,
			
 
				+                                   offset, bytes, qiov, flags);
			
 
				 
			
 
				     if (copy_to_target) {
			
 
				         qemu_iovec_destroy(&bounce_qiov);
			
@@ -1570,15 +1621,17 @@ static int coroutine_fn GRAPH_RDLOCK
 
				 bdrv_mirror_top_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
			
 
				                               int64_t bytes, BdrvRequestFlags flags)
			
 
				 {
			
 
				-    return bdrv_mirror_top_do_write(bs, MIRROR_METHOD_ZERO, offset, bytes, NULL,
			
 
				-                                    flags);
			
 
				+    bool copy_to_target = should_copy_to_target(bs->opaque);
			
 
				+    return bdrv_mirror_top_do_write(bs, MIRROR_METHOD_ZERO, copy_to_target,
			
 
				+                                    offset, bytes, NULL, flags);
			
 
				 }
			
 
				 
			
 
				 static int coroutine_fn GRAPH_RDLOCK
			
 
				 bdrv_mirror_top_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
			
 
				 {
			
 
				-    return bdrv_mirror_top_do_write(bs, MIRROR_METHOD_DISCARD, offset, bytes,
			
 
				-                                    NULL, 0);
			
 
				+    bool copy_to_target = should_copy_to_target(bs->opaque);
			
 
				+    return bdrv_mirror_top_do_write(bs, MIRROR_METHOD_DISCARD, copy_to_target,
			
 
				+                                    offset, bytes, NULL, 0);
			
 
				 }
			
 
				 
			
 
				 static void bdrv_mirror_top_refresh_filename(BlockDriverState *bs)
			
@@ -1813,7 +1866,7 @@ static BlockJob *mirror_start_job(
 
				     s->is_none_mode = is_none_mode;
			
 
				     s->backing_mode = backing_mode;
			
 
				     s->zero_target = zero_target;
			
 
				-    s->copy_mode = copy_mode;
			
 
				+    qatomic_set(&s->copy_mode, copy_mode);
			
 
				     s->base = base;
			
 
				     s->base_overlay = bdrv_find_overlay(bs, base);
			
 
				     s->granularity = granularity;
			
@@ -1823,13 +1876,17 @@ static BlockJob *mirror_start_job(
 
				         s->should_complete = true;
			
 
				     }
			
 
				 
			
 
				-    s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
			
 
				+    s->dirty_bitmap = bdrv_create_dirty_bitmap(s->mirror_top_bs, granularity,
			
 
				+                                               NULL, errp);
			
 
				     if (!s->dirty_bitmap) {
			
 
				         goto fail;
			
 
				     }
			
 
				-    if (s->copy_mode == MIRROR_COPY_MODE_WRITE_BLOCKING) {
			
 
				-        bdrv_disable_dirty_bitmap(s->dirty_bitmap);
			
 
				-    }
			
 
				+
			
 
				+    /*
			
 
				+     * The dirty bitmap is set by bdrv_mirror_top_do_write() when not in active
			
 
				+     * mode.
			
 
				+     */
			
 
				+    bdrv_disable_dirty_bitmap(s->dirty_bitmap);
			
 
				 
			
 
				     ret = block_job_add_bdrv(&s->common, "source", bs, 0,
			
 
				                              BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE |
			
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -846,7 +846,7 @@ void hmp_info_block_jobs(Monitor *mon, const QDict *qdict)
 
				     }
			
 
				 
			
 
				     while (list) {
			
 
				-        if (strcmp(list->value->type, "stream") == 0) {
			
 
				+        if (list->value->type == JOB_TYPE_STREAM) {
			
 
				             monitor_printf(mon, "Streaming device %s: Completed %" PRId64
			
 
				                            " of %" PRId64 " bytes, speed limit %" PRId64
			
 
				                            " bytes/s\n",
			
@@ -858,7 +858,7 @@ void hmp_info_block_jobs(Monitor *mon, const QDict *qdict)
 
				             monitor_printf(mon, "Type %s, device %s: Completed %" PRId64
			
 
				                            " of %" PRId64 " bytes, speed limit %" PRId64
			
 
				                            " bytes/s\n",
			
 
				-                           list->value->type,
			
 
				+                           JobType_str(list->value->type),
			
 
				                            list->value->device,
			
 
				                            list->value->offset,
			
 
				                            list->value->len,
			
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -16,6 +16,7 @@
 
				 #include "qapi/error.h"
			
 
				 #include "qapi/qmp/qdict.h"
			
 
				 #include "qapi/qmp/qstring.h"
			
 
				+#include "qemu/defer-call.h"
			
 
				 #include "qemu/error-report.h"
			
 
				 #include "qemu/main-loop.h"
			
 
				 #include "qemu/module.h"
			
@@ -476,7 +477,7 @@ static void nvme_trace_command(const NvmeCmd *cmd)
 
				     }
			
 
				 }
			
 
				 
			
 
				-static void nvme_unplug_fn(void *opaque)
			
 
				+static void nvme_deferred_fn(void *opaque)
			
 
				 {
			
 
				     NVMeQueuePair *q = opaque;
			
 
				 
			
@@ -503,7 +504,7 @@ static void nvme_submit_command(NVMeQueuePair *q, NVMeRequest *req,
 
				     q->need_kick++;
			
 
				     qemu_mutex_unlock(&q->lock);
			
 
				 
			
 
				-    blk_io_plug_call(nvme_unplug_fn, q);
			
 
				+    defer_call(nvme_deferred_fn, q);
			
 
				 }
			
 
				 
			
 
				 static void nvme_admin_cmd_sync_cb(void *opaque, int ret)
			
--- a/block/plug.c
+++ b/block/plug.c
@@ -1,159 +0,0 @@
 
				-/* SPDX-License-Identifier: GPL-2.0-or-later */
			
 
				-/*
			
 
				- * Block I/O plugging
			
 
				- *
			
 
				- * Copyright Red Hat.
			
 
				- *
			
 
				- * This API defers a function call within a blk_io_plug()/blk_io_unplug()
			
 
				- * section, allowing multiple calls to batch up. This is a performance
			
 
				- * optimization that is used in the block layer to submit several I/O requests
			
 
				- * at once instead of individually:
			
 
				- *
			
 
				- *   blk_io_plug(); <-- start of plugged region
			
 
				- *   ...
			
 
				- *   blk_io_plug_call(my_func, my_obj); <-- deferred my_func(my_obj) call
			
 
				- *   blk_io_plug_call(my_func, my_obj); <-- another
			
 
				- *   blk_io_plug_call(my_func, my_obj); <-- another
			
 
				- *   ...
			
 
				- *   blk_io_unplug(); <-- end of plugged region, my_func(my_obj) is called once
			
 
				- *
			
 
				- * This code is actually generic and not tied to the block layer. If another
			
 
				- * subsystem needs this functionality, it could be renamed.
			
 
				- */
			
 
				-
			
 
				-#include "qemu/osdep.h"
			
 
				-#include "qemu/coroutine-tls.h"
			
 
				-#include "qemu/notify.h"
			
 
				-#include "qemu/thread.h"
			
 
				-#include "sysemu/block-backend.h"
			
 
				-
			
 
				-/* A function call that has been deferred until unplug() */
			
 
				-typedef struct {
			
 
				-    void (*fn)(void *);
			
 
				-    void *opaque;
			
 
				-} UnplugFn;
			
 
				-
			
 
				-/* Per-thread state */
			
 
				-typedef struct {
			
 
				-    unsigned count;       /* how many times has plug() been called? */
			
 
				-    GArray *unplug_fns;   /* functions to call at unplug time */
			
 
				-} Plug;
			
 
				-
			
 
				-/* Use get_ptr_plug() to fetch this thread-local value */
			
 
				-QEMU_DEFINE_STATIC_CO_TLS(Plug, plug);
			
 
				-
			
 
				-/* Called at thread cleanup time */
			
 
				-static void blk_io_plug_atexit(Notifier *n, void *value)
			
 
				-{
			
 
				-    Plug *plug = get_ptr_plug();
			
 
				-    g_array_free(plug->unplug_fns, TRUE);
			
 
				-}
			
 
				-
			
 
				-/* This won't involve coroutines, so use __thread */
			
 
				-static __thread Notifier blk_io_plug_atexit_notifier;
			
 
				-
			
 
				-/**
			
 
				- * blk_io_plug_call:
			
 
				- * @fn: a function pointer to be invoked
			
 
				- * @opaque: a user-defined argument to @fn()
			
 
				- *
			
 
				- * Call @fn(@opaque) immediately if not within a blk_io_plug()/blk_io_unplug()
			
 
				- * section.
			
 
				- *
			
 
				- * Otherwise defer the call until the end of the outermost
			
 
				- * blk_io_plug()/blk_io_unplug() section in this thread. If the same
			
 
				- * @fn/@opaque pair has already been deferred, it will only be called once upon
			
 
				- * blk_io_unplug() so that accumulated calls are batched into a single call.
			
 
				- *
			
 
				- * The caller must ensure that @opaque is not freed before @fn() is invoked.
			
 
				- */
			
 
				-void blk_io_plug_call(void (*fn)(void *), void *opaque)
			
 
				-{
			
 
				-    Plug *plug = get_ptr_plug();
			
 
				-
			
 
				-    /* Call immediately if we're not plugged */
			
 
				-    if (plug->count == 0) {
			
 
				-        fn(opaque);
			
 
				-        return;
			
 
				-    }
			
 
				-
			
 
				-    GArray *array = plug->unplug_fns;
			
 
				-    if (!array) {
			
 
				-        array = g_array_new(FALSE, FALSE, sizeof(UnplugFn));
			
 
				-        plug->unplug_fns = array;
			
 
				-        blk_io_plug_atexit_notifier.notify = blk_io_plug_atexit;
			
 
				-        qemu_thread_atexit_add(&blk_io_plug_atexit_notifier);
			
 
				-    }
			
 
				-
			
 
				-    UnplugFn *fns = (UnplugFn *)array->data;
			
 
				-    UnplugFn new_fn = {
			
 
				-        .fn = fn,
			
 
				-        .opaque = opaque,
			
 
				-    };
			
 
				-
			
 
				-    /*
			
 
				-     * There won't be many, so do a linear search. If this becomes a bottleneck
			
 
				-     * then a binary search (glib 2.62+) or different data structure could be
			
 
				-     * used.
			
 
				-     */
			
 
				-    for (guint i = 0; i < array->len; i++) {
			
 
				-        if (memcmp(&fns[i], &new_fn, sizeof(new_fn)) == 0) {
			
 
				-            return; /* already exists */
			
 
				-        }
			
 
				-    }
			
 
				-
			
 
				-    g_array_append_val(array, new_fn);
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * blk_io_plug: Defer blk_io_plug_call() functions until blk_io_unplug()
			
 
				- *
			
 
				- * blk_io_plug/unplug are thread-local operations. This means that multiple
			
 
				- * threads can simultaneously call plug/unplug, but the caller must ensure that
			
 
				- * each unplug() is called in the same thread of the matching plug().
			
 
				- *
			
 
				- * Nesting is supported. blk_io_plug_call() functions are only called at the
			
 
				- * outermost blk_io_unplug().
			
 
				- */
			
 
				-void blk_io_plug(void)
			
 
				-{
			
 
				-    Plug *plug = get_ptr_plug();
			
 
				-
			
 
				-    assert(plug->count < UINT32_MAX);
			
 
				-
			
 
				-    plug->count++;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * blk_io_unplug: Run any pending blk_io_plug_call() functions
			
 
				- *
			
 
				- * There must have been a matching blk_io_plug() call in the same thread prior
			
 
				- * to this blk_io_unplug() call.
			
 
				- */
			
 
				-void blk_io_unplug(void)
			
 
				-{
			
 
				-    Plug *plug = get_ptr_plug();
			
 
				-
			
 
				-    assert(plug->count > 0);
			
 
				-
			
 
				-    if (--plug->count > 0) {
			
 
				-        return;
			
 
				-    }
			
 
				-
			
 
				-    GArray *array = plug->unplug_fns;
			
 
				-    if (!array) {
			
 
				-        return;
			
 
				-    }
			
 
				-
			
 
				-    UnplugFn *fns = (UnplugFn *)array->data;
			
 
				-
			
 
				-    for (guint i = 0; i < array->len; i++) {
			
 
				-        fns[i].fn(fns[i].opaque);
			
 
				-    }
			
 
				-
			
 
				-    /*
			
 
				-     * This resets the array without freeing memory so that appending is cheap
			
 
				-     * in the future.
			
 
				-     */
			
 
				-    g_array_set_size(array, 0);
			
 
				-}
			
--- a/block/qapi-sysemu.c
+++ b/block/qapi-sysemu.c
@@ -237,6 +237,7 @@ static void qmp_blockdev_insert_anon_medium(BlockBackend *blk,
 
				                                             BlockDriverState *bs, Error **errp)
			
 
				 {
			
 
				     Error *local_err = NULL;
			
 
				+    AioContext *ctx;
			
 
				     bool has_device;
			
 
				     int ret;
			
 
				 
			
@@ -258,7 +259,11 @@ static void qmp_blockdev_insert_anon_medium(BlockBackend *blk,
 
				         return;
			
 
				     }
			
 
				 
			
 
				+    ctx = bdrv_get_aio_context(bs);
			
 
				+    aio_context_acquire(ctx);
			
 
				     ret = blk_insert_bs(blk, bs, errp);
			
 
				+    aio_context_release(ctx);
			
 
				+
			
 
				     if (ret < 0) {
			
 
				         return;
			
 
				     }
			
--- a/blockdev.c
+++ b/blockdev.c
@@ -2968,6 +2968,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
 
				 
			
 
				     if (replaces) {
			
 
				         BlockDriverState *to_replace_bs;
			
 
				+        AioContext *aio_context;
			
 
				         AioContext *replace_aio_context;
			
 
				         int64_t bs_size, replace_size;
			
 
				 
			
@@ -2982,10 +2983,19 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
 
				             return;
			
 
				         }
			
 
				 
			
 
				+        aio_context = bdrv_get_aio_context(bs);
			
 
				         replace_aio_context = bdrv_get_aio_context(to_replace_bs);
			
 
				-        aio_context_acquire(replace_aio_context);
			
 
				+        /*
			
 
				+         * bdrv_getlength() is a co-wrapper and uses AIO_WAIT_WHILE. Be sure not
			
 
				+         * to acquire the same AioContext twice.
			
 
				+         */
			
 
				+        if (replace_aio_context != aio_context) {
			
 
				+            aio_context_acquire(replace_aio_context);
			
 
				+        }
			
 
				         replace_size = bdrv_getlength(to_replace_bs);
			
 
				-        aio_context_release(replace_aio_context);
			
 
				+        if (replace_aio_context != aio_context) {
			
 
				+            aio_context_release(replace_aio_context);
			
 
				+        }
			
 
				 
			
 
				         if (replace_size < 0) {
			
 
				             error_setg_errno(errp, -replace_size,
			
@@ -3382,6 +3392,20 @@ void qmp_block_job_dismiss(const char *id, Error **errp)
 
				     job_dismiss_locked(&job, errp);
			
 
				 }
			
 
				 
			
 
				+void qmp_block_job_change(BlockJobChangeOptions *opts, Error **errp)
			
 
				+{
			
 
				+    BlockJob *job;
			
 
				+
			
 
				+    JOB_LOCK_GUARD();
			
 
				+    job = find_block_job_locked(opts->id, errp);
			
 
				+
			
 
				+    if (!job) {
			
 
				+        return;
			
 
				+    }
			
 
				+
			
 
				+    block_job_change_locked(job, opts, errp);
			
 
				+}
			
 
				+
			
 
				 void qmp_change_backing_file(const char *device,
			
 
				                              const char *image_node_name,
			
 
				                              const char *backing_file,
			
--- a/blockjob.c
+++ b/blockjob.c
@@ -198,7 +198,9 @@ void block_job_remove_all_bdrv(BlockJob *job)
 
				      * one to make sure that such a concurrent access does not attempt
			
 
				      * to process an already freed BdrvChild.
			
 
				      */
			
 
				+    aio_context_release(job->job.aio_context);
			
 
				     bdrv_graph_wrlock(NULL);
			
 
				+    aio_context_acquire(job->job.aio_context);
			
 
				     while (job->nodes) {
			
 
				         GSList *l = job->nodes;
			
 
				         BdrvChild *c = l->data;
			
@@ -328,6 +330,26 @@ static bool block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
 
				     return block_job_set_speed_locked(job, speed, errp);
			
 
				 }
			
 
				 
			
 
				+void block_job_change_locked(BlockJob *job, BlockJobChangeOptions *opts,
			
 
				+                             Error **errp)
			
 
				+{
			
 
				+    const BlockJobDriver *drv = block_job_driver(job);
			
 
				+
			
 
				+    GLOBAL_STATE_CODE();
			
 
				+
			
 
				+    if (job_apply_verb_locked(&job->job, JOB_VERB_CHANGE, errp)) {
			
 
				+        return;
			
 
				+    }
			
 
				+
			
 
				+    if (drv->change) {
			
 
				+        job_unlock();
			
 
				+        drv->change(job, opts, errp);
			
 
				+        job_lock();
			
 
				+    } else {
			
 
				+        error_setg(errp, "Job type does not support change");
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				 void block_job_ratelimit_processed_bytes(BlockJob *job, uint64_t n)
			
 
				 {
			
 
				     IO_CODE();
			
@@ -356,6 +378,7 @@ BlockJobInfo *block_job_query_locked(BlockJob *job, Error **errp)
 
				 {
			
 
				     BlockJobInfo *info;
			
 
				     uint64_t progress_current, progress_total;
			
 
				+    const BlockJobDriver *drv = block_job_driver(job);
			
 
				 
			
 
				     GLOBAL_STATE_CODE();
			
 
				 
			
@@ -368,7 +391,7 @@ BlockJobInfo *block_job_query_locked(BlockJob *job, Error **errp)
 
				                           &progress_total);
			
 
				 
			
 
				     info = g_new0(BlockJobInfo, 1);
			
 
				-    info->type      = g_strdup(job_type_str(&job->job));
			
 
				+    info->type      = job_type(&job->job);
			
 
				     info->device    = g_strdup(job->job.id);
			
 
				     info->busy      = job->job.busy;
			
 
				     info->paused    = job->job.pause_count > 0;
			
@@ -385,6 +408,11 @@ BlockJobInfo *block_job_query_locked(BlockJob *job, Error **errp)
 
				                         g_strdup(error_get_pretty(job->job.err)) :
			
 
				                         g_strdup(strerror(-job->job.ret));
			
 
				     }
			
 
				+    if (drv->query) {
			
 
				+        job_unlock();
			
 
				+        drv->query(job, info);
			
 
				+        job_lock();
			
 
				+    }
			
 
				     return info;
			
 
				 }
			
 
				 
			
--- a/docs/tools/qemu-img.rst
+++ b/docs/tools/qemu-img.rst
@@ -667,7 +667,7 @@ Command description:
 
				 
			
 
				   List, apply, create or delete snapshots in image *FILENAME*.
			
 
				 
			
 
				-.. option:: rebase [--object OBJECTDEF] [--image-opts] [-U] [-q] [-f FMT] [-t CACHE] [-T SRC_CACHE] [-p] [-u] -b BACKING_FILE [-F BACKING_FMT] FILENAME
			
 
				+.. option:: rebase [--object OBJECTDEF] [--image-opts] [-U] [-q] [-f FMT] [-t CACHE] [-T SRC_CACHE] [-p] [-u] [-c] -b BACKING_FILE [-F BACKING_FMT] FILENAME
			
 
				 
			
 
				   Changes the backing file of an image. Only the formats ``qcow2`` and
			
 
				   ``qed`` support changing the backing file.
			
@@ -694,7 +694,9 @@ Command description:
 
				 
			
 
				     In order to achieve this, any clusters that differ between
			
 
				     *BACKING_FILE* and the old backing file of *FILENAME* are merged
			
 
				-    into *FILENAME* before actually changing the backing file.
			
 
				+    into *FILENAME* before actually changing the backing file. With the
			
 
				+    ``-c`` option specified, the clusters which are being merged (but not
			
 
				+    the entire *FILENAME* image) are compressed when written.
			
 
				 
			
 
				     Note that the safe mode is an expensive operation, comparable to
			
 
				     converting an image. It only works if the old backing file still
			
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -31,9 +31,6 @@ struct VirtIOBlockDataPlane {
 
				 
			
 
				     VirtIOBlkConf *conf;
			
 
				     VirtIODevice *vdev;
			
 
				-    QEMUBH *bh;                     /* bh for guest notification */
			
 
				-    unsigned long *batch_notify_vqs;
			
 
				-    bool batch_notifications;
			
 
				 
			
 
				     /* Note that these EventNotifiers are assigned by value.  This is
			
 
				      * fine as long as you do not call event_notifier_cleanup on them
			
@@ -47,36 +44,7 @@ struct VirtIOBlockDataPlane {
 
				 /* Raise an interrupt to signal guest, if necessary */
			
 
				 void virtio_blk_data_plane_notify(VirtIOBlockDataPlane *s, VirtQueue *vq)
			
 
				 {
			
 
				-    if (s->batch_notifications) {
			
 
				-        set_bit(virtio_get_queue_index(vq), s->batch_notify_vqs);
			
 
				-        qemu_bh_schedule(s->bh);
			
 
				-    } else {
			
 
				-        virtio_notify_irqfd(s->vdev, vq);
			
 
				-    }
			
 
				-}
			
 
				-
			
 
				-static void notify_guest_bh(void *opaque)
			
 
				-{
			
 
				-    VirtIOBlockDataPlane *s = opaque;
			
 
				-    unsigned nvqs = s->conf->num_queues;
			
 
				-    unsigned long bitmap[BITS_TO_LONGS(nvqs)];
			
 
				-    unsigned j;
			
 
				-
			
 
				-    memcpy(bitmap, s->batch_notify_vqs, sizeof(bitmap));
			
 
				-    memset(s->batch_notify_vqs, 0, sizeof(bitmap));
			
 
				-
			
 
				-    for (j = 0; j < nvqs; j += BITS_PER_LONG) {
			
 
				-        unsigned long bits = bitmap[j / BITS_PER_LONG];
			
 
				-
			
 
				-        while (bits != 0) {
			
 
				-            unsigned i = j + ctzl(bits);
			
 
				-            VirtQueue *vq = virtio_get_queue(s->vdev, i);
			
 
				-
			
 
				-            virtio_notify_irqfd(s->vdev, vq);
			
 
				-
			
 
				-            bits &= bits - 1; /* clear right-most bit */
			
 
				-        }
			
 
				-    }
			
 
				+    virtio_notify_irqfd(s->vdev, vq);
			
 
				 }
			
 
				 
			
 
				 /* Context: QEMU global mutex held */
			
@@ -126,9 +94,6 @@ bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf,
 
				     } else {
			
 
				         s->ctx = qemu_get_aio_context();
			
 
				     }
			
 
				-    s->bh = aio_bh_new_guarded(s->ctx, notify_guest_bh, s,
			
 
				-                               &DEVICE(vdev)->mem_reentrancy_guard);
			
 
				-    s->batch_notify_vqs = bitmap_new(conf->num_queues);
			
 
				 
			
 
				     *dataplane = s;
			
 
				 
			
@@ -146,8 +111,6 @@ void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s)
 
				 
			
 
				     vblk = VIRTIO_BLK(s->vdev);
			
 
				     assert(!vblk->dataplane_started);
			
 
				-    g_free(s->batch_notify_vqs);
			
 
				-    qemu_bh_delete(s->bh);
			
 
				     if (s->iothread) {
			
 
				         object_unref(OBJECT(s->iothread));
			
 
				     }
			
@@ -173,12 +136,6 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev)
 
				 
			
 
				     s->starting = true;
			
 
				 
			
 
				-    if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
			
 
				-        s->batch_notifications = true;
			
 
				-    } else {
			
 
				-        s->batch_notifications = false;
			
 
				-    }
			
 
				-
			
 
				     /* Set up guest notifier (irq) */
			
 
				     r = k->set_guest_notifiers(qbus->parent, nvqs, true);
			
 
				     if (r != 0) {
			
@@ -370,9 +327,6 @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev)
 
				 
			
 
				     aio_context_release(s->ctx);
			
 
				 
			
 
				-    qemu_bh_cancel(s->bh);
			
 
				-    notify_guest_bh(s); /* final chance to notify guest */
			
 
				-
			
 
				     /* Clean up guest notifier (irq) */
			
 
				     k->set_guest_notifiers(qbus->parent, nvqs, false);
			
 
				 
			
--- a/hw/block/dataplane/xen-block.c
+++ b/hw/block/dataplane/xen-block.c
@@ -19,6 +19,7 @@
 
				  */
			
 
				 
			
 
				 #include "qemu/osdep.h"
			
 
				+#include "qemu/defer-call.h"
			
 
				 #include "qemu/error-report.h"
			
 
				 #include "qemu/main-loop.h"
			
 
				 #include "qemu/memalign.h"
			
@@ -509,7 +510,7 @@ static int xen_block_get_request(XenBlockDataPlane *dataplane,
 
				 
			
 
				 /*
			
 
				  * Threshold of in-flight requests above which we will start using
			
 
				- * blk_io_plug()/blk_io_unplug() to batch requests.
			
 
				+ * defer_call_begin()/defer_call_end() to batch requests.
			
 
				  */
			
 
				 #define IO_PLUG_THRESHOLD 1
			
 
				 
			
@@ -537,7 +538,7 @@ static bool xen_block_handle_requests(XenBlockDataPlane *dataplane)
 
				      * is below us.
			
 
				      */
			
 
				     if (inflight_atstart > IO_PLUG_THRESHOLD) {
			
 
				-        blk_io_plug();
			
 
				+        defer_call_begin();
			
 
				     }
			
 
				     while (rc != rp) {
			
 
				         /* pull request from ring */
			
@@ -577,12 +578,12 @@ static bool xen_block_handle_requests(XenBlockDataPlane *dataplane)
 
				 
			
 
				         if (inflight_atstart > IO_PLUG_THRESHOLD &&
			
 
				             batched >= inflight_atstart) {
			
 
				-            blk_io_unplug();
			
 
				+            defer_call_end();
			
 
				         }
			
 
				         xen_block_do_aio(request);
			
 
				         if (inflight_atstart > IO_PLUG_THRESHOLD) {
			
 
				             if (batched >= inflight_atstart) {
			
 
				-                blk_io_plug();
			
 
				+                defer_call_begin();
			
 
				                 batched = 0;
			
 
				             } else {
			
 
				                 batched++;
			
@@ -590,7 +591,7 @@ static bool xen_block_handle_requests(XenBlockDataPlane *dataplane)
 
				         }
			
 
				     }
			
 
				     if (inflight_atstart > IO_PLUG_THRESHOLD) {
			
 
				-        blk_io_unplug();
			
 
				+        defer_call_end();
			
 
				     }
			
 
				 
			
 
				     return done_something;
			
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -12,6 +12,7 @@
 
				  */
			
 
				 
			
 
				 #include "qemu/osdep.h"
			
 
				+#include "qemu/defer-call.h"
			
 
				 #include "qapi/error.h"
			
 
				 #include "qemu/iov.h"
			
 
				 #include "qemu/module.h"
			
@@ -1134,7 +1135,7 @@ void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq)
 
				     bool suppress_notifications = virtio_queue_get_notification(vq);
			
 
				 
			
 
				     aio_context_acquire(blk_get_aio_context(s->blk));
			
 
				-    blk_io_plug();
			
 
				+    defer_call_begin();
			
 
				 
			
 
				     do {
			
 
				         if (suppress_notifications) {
			
@@ -1158,7 +1159,7 @@ void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq)
 
				         virtio_blk_submit_multireq(s, &mrb);
			
 
				     }
			
 
				 
			
 
				-    blk_io_unplug();
			
 
				+    defer_call_end();
			
 
				     aio_context_release(blk_get_aio_context(s->blk));
			
 
				 }
			
 
				 
			
--- a/hw/scsi/virtio-scsi.c
+++ b/hw/scsi/virtio-scsi.c
@@ -18,6 +18,7 @@
 
				 #include "standard-headers/linux/virtio_ids.h"
			
 
				 #include "hw/virtio/virtio-scsi.h"
			
 
				 #include "migration/qemu-file-types.h"
			
 
				+#include "qemu/defer-call.h"
			
 
				 #include "qemu/error-report.h"
			
 
				 #include "qemu/iov.h"
			
 
				 #include "qemu/module.h"
			
@@ -799,7 +800,7 @@ static int virtio_scsi_handle_cmd_req_prepare(VirtIOSCSI *s, VirtIOSCSIReq *req)
 
				         return -ENOBUFS;
			
 
				     }
			
 
				     scsi_req_ref(req->sreq);
			
 
				-    blk_io_plug();
			
 
				+    defer_call_begin();
			
 
				     object_unref(OBJECT(d));
			
 
				     return 0;
			
 
				 }
			
@@ -810,7 +811,7 @@ static void virtio_scsi_handle_cmd_req_submit(VirtIOSCSI *s, VirtIOSCSIReq *req)
 
				     if (scsi_req_enqueue(sreq)) {
			
 
				         scsi_req_continue(sreq);
			
 
				     }
			
 
				-    blk_io_unplug();
			
 
				+    defer_call_end();
			
 
				     scsi_req_unref(sreq);
			
 
				 }
			
 
				 
			
@@ -836,7 +837,7 @@ static void virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq)
 
				                 while (!QTAILQ_EMPTY(&reqs)) {
			
 
				                     req = QTAILQ_FIRST(&reqs);
			
 
				                     QTAILQ_REMOVE(&reqs, req, next);
			
 
				-                    blk_io_unplug();
			
 
				+                    defer_call_end();
			
 
				                     scsi_req_unref(req->sreq);
			
 
				                     virtqueue_detach_element(req->vq, &req->elem, 0);
			
 
				                     virtio_scsi_free_req(req);
			
--- a/hw/virtio/trace-events
+++ b/hw/virtio/trace-events
@@ -73,6 +73,7 @@ virtqueue_fill(void *vq, const void *elem, unsigned int len, unsigned int idx) "
 
				 virtqueue_flush(void *vq, unsigned int count) "vq %p count %u"
			
 
				 virtqueue_pop(void *vq, void *elem, unsigned int in_num, unsigned int out_num) "vq %p elem %p in_num %u out_num %u"
			
 
				 virtio_queue_notify(void *vdev, int n, void *vq) "vdev %p n %d vq %p"
			
 
				+virtio_notify_irqfd_deferred_fn(void *vdev, void *vq) "vdev %p vq %p"
			
 
				 virtio_notify_irqfd(void *vdev, void *vq) "vdev %p vq %p"
			
 
				 virtio_notify(void *vdev, void *vq) "vdev %p vq %p"
			
 
				 virtio_set_status(void *vdev, uint8_t val) "vdev %p val %u"
			
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -15,6 +15,7 @@
 
				 #include "qapi/error.h"
			
 
				 #include "qapi/qapi-commands-virtio.h"
			
 
				 #include "trace.h"
			
 
				+#include "qemu/defer-call.h"
			
 
				 #include "qemu/error-report.h"
			
 
				 #include "qemu/log.h"
			
 
				 #include "qemu/main-loop.h"
			
@@ -2445,6 +2446,16 @@ static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
 
				     }
			
 
				 }
			
 
				 
			
 
				+/* Batch irqs while inside a defer_call_begin()/defer_call_end() section */
			
 
				+static void virtio_notify_irqfd_deferred_fn(void *opaque)
			
 
				+{
			
 
				+    EventNotifier *notifier = opaque;
			
 
				+    VirtQueue *vq = container_of(notifier, VirtQueue, guest_notifier);
			
 
				+
			
 
				+    trace_virtio_notify_irqfd_deferred_fn(vq->vdev, vq);
			
 
				+    event_notifier_set(notifier);
			
 
				+}
			
 
				+
			
 
				 void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
			
 
				 {
			
 
				     WITH_RCU_READ_LOCK_GUARD() {
			
@@ -2471,7 +2482,7 @@ void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
 
				      * to an atomic operation.
			
 
				      */
			
 
				     virtio_set_isr(vq->vdev, 0x1);
			
 
				-    event_notifier_set(&vq->guest_notifier);
			
 
				+    defer_call(virtio_notify_irqfd_deferred_fn, &vq->guest_notifier);
			
 
				 }
			
 
				 
			
 
				 static void virtio_irq(VirtQueue *vq)
			
--- a/include/block/blockjob.h
+++ b/include/block/blockjob.h
@@ -172,6 +172,17 @@ bool block_job_has_bdrv(BlockJob *job, BlockDriverState *bs);
 
				  */
			
 
				 bool block_job_set_speed_locked(BlockJob *job, int64_t speed, Error **errp);
			
 
				 
			
 
				+/**
			
 
				+ * block_job_change_locked:
			
 
				+ * @job: The job to change.
			
 
				+ * @opts: The new options.
			
 
				+ * @errp: Error object.
			
 
				+ *
			
 
				+ * Change the job according to opts.
			
 
				+ */
			
 
				+void block_job_change_locked(BlockJob *job, BlockJobChangeOptions *opts,
			
 
				+                             Error **errp);
			
 
				+
			
 
				 /**
			
 
				  * block_job_query_locked:
			
 
				  * @job: The job to get information about.
			
--- a/include/block/blockjob_int.h
+++ b/include/block/blockjob_int.h
@@ -67,6 +67,18 @@ struct BlockJobDriver {
 
				     void (*attached_aio_context)(BlockJob *job, AioContext *new_context);
			
 
				 
			
 
				     void (*set_speed)(BlockJob *job, int64_t speed);
			
 
				+
			
 
				+    /*
			
 
				+     * Change the @job's options according to @opts.
			
 
				+     *
			
 
				+     * Note that this can already be called before the job coroutine is running.
			
 
				+     */
			
 
				+    void (*change)(BlockJob *job, BlockJobChangeOptions *opts, Error **errp);
			
 
				+
			
 
				+    /*
			
 
				+     * Query information specific to this kind of block job.
			
 
				+     */
			
 
				+    void (*query)(BlockJob *job, BlockJobInfo *info);
			
 
				 };
			
 
				 
			
 
				 /*
			
--- a/include/qemu/defer-call.h
+++ b/include/qemu/defer-call.h
@@ -0,0 +1,16 @@
 
				+/* SPDX-License-Identifier: GPL-2.0-or-later */
			
 
				+/*
			
 
				+ * Deferred calls
			
 
				+ *
			
 
				+ * Copyright Red Hat.
			
 
				+ */
			
 
				+
			
 
				+#ifndef QEMU_DEFER_CALL_H
			
 
				+#define QEMU_DEFER_CALL_H
			
 
				+
			
 
				+/* See documentation in util/defer-call.c */
			
 
				+void defer_call_begin(void);
			
 
				+void defer_call_end(void);
			
 
				+void defer_call(void (*fn)(void *), void *opaque);
			
 
				+
			
 
				+#endif /* QEMU_DEFER_CALL_H */
			
--- a/include/sysemu/block-backend-io.h
+++ b/include/sysemu/block-backend-io.h
@@ -100,10 +100,6 @@ void blk_iostatus_set_err(BlockBackend *blk, int error);
 
				 int blk_get_max_iov(BlockBackend *blk);
			
 
				 int blk_get_max_hw_iov(BlockBackend *blk);
			
 
				 
			
 
				-void blk_io_plug(void);
			
 
				-void blk_io_unplug(void);
			
 
				-void blk_io_plug_call(void (*fn)(void *), void *opaque);
			
 
				-
			
 
				 AioContext *blk_get_aio_context(BlockBackend *blk);
			
 
				 BlockAcctStats *blk_get_stats(BlockBackend *blk);
			
 
				 void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
			
--- a/job.c
+++ b/job.c
@@ -80,6 +80,7 @@ bool JobVerbTable[JOB_VERB__MAX][JOB_STATUS__MAX] = {
 
				     [JOB_VERB_COMPLETE]             = {0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0},
			
 
				     [JOB_VERB_FINALIZE]             = {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0},
			
 
				     [JOB_VERB_DISMISS]              = {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0},
			
 
				+    [JOB_VERB_CHANGE]               = {0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0},
			
 
				 };
			
 
				 
			
 
				 /* Transactional group of jobs */
			
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1352,6 +1352,20 @@
 
				 { 'enum': 'MirrorCopyMode',
			
 
				   'data': ['background', 'write-blocking'] }
			
 
				 
			
 
				+##
			
 
				+# @BlockJobInfoMirror:
			
 
				+#
			
 
				+# Information specific to mirror block jobs.
			
 
				+#
			
 
				+# @actively-synced: Whether the source is actively synced to the
			
 
				+#     target, i.e. same data and new writes are done synchronously to
			
 
				+#     both.
			
 
				+#
			
 
				+# Since 8.2
			
 
				+##
			
 
				+{ 'struct': 'BlockJobInfoMirror',
			
 
				+  'data': { 'actively-synced': 'bool' } }
			
 
				+
			
 
				 ##
			
 
				 # @BlockJobInfo:
			
 
				 #
			
@@ -1395,13 +1409,15 @@
 
				 #
			
 
				 # Since: 1.1
			
 
				 ##
			
 
				-{ 'struct': 'BlockJobInfo',
			
 
				-  'data': {'type': 'str', 'device': 'str', 'len': 'int',
			
 
				+{ 'union': 'BlockJobInfo',
			
 
				+  'base': {'type': 'JobType', 'device': 'str', 'len': 'int',
			
 
				            'offset': 'int', 'busy': 'bool', 'paused': 'bool', 'speed': 'int',
			
 
				            'io-status': 'BlockDeviceIoStatus', 'ready': 'bool',
			
 
				            'status': 'JobStatus',
			
 
				            'auto-finalize': 'bool', 'auto-dismiss': 'bool',
			
 
				-           '*error': 'str' } }
			
 
				+           '*error': 'str' },
			
 
				+  'discriminator': 'type',
			
 
				+  'data': { 'mirror': 'BlockJobInfoMirror' } }
			
 
				 
			
 
				 ##
			
 
				 # @query-block-jobs:
			
@@ -3044,6 +3060,43 @@
 
				 { 'command': 'block-job-finalize', 'data': { 'id': 'str' },
			
 
				   'allow-preconfig': true }
			
 
				 
			
 
				+##
			
 
				+# @BlockJobChangeOptionsMirror:
			
 
				+#
			
 
				+# @copy-mode: Switch to this copy mode.  Currently, only the switch
			
 
				+#     from 'background' to 'write-blocking' is implemented.
			
 
				+#
			
 
				+# Since: 8.2
			
 
				+##
			
 
				+{ 'struct': 'BlockJobChangeOptionsMirror',
			
 
				+  'data': { 'copy-mode' : 'MirrorCopyMode' } }
			
 
				+
			
 
				+##
			
 
				+# @BlockJobChangeOptions:
			
 
				+#
			
 
				+# Block job options that can be changed after job creation.
			
 
				+#
			
 
				+# @id: The job identifier
			
 
				+#
			
 
				+# @type: The job type
			
 
				+#
			
 
				+# Since 8.2
			
 
				+##
			
 
				+{ 'union': 'BlockJobChangeOptions',
			
 
				+  'base': { 'id': 'str', 'type': 'JobType' },
			
 
				+  'discriminator': 'type',
			
 
				+  'data': { 'mirror': 'BlockJobChangeOptionsMirror' } }
			
 
				+
			
 
				+##
			
 
				+# @block-job-change:
			
 
				+#
			
 
				+# Change the block job's options.
			
 
				+#
			
 
				+# Since: 8.2
			
 
				+##
			
 
				+{ 'command': 'block-job-change',
			
 
				+  'data': 'BlockJobChangeOptions', 'boxed': true }
			
 
				+
			
 
				 ##
			
 
				 # @BlockdevDiscardOptions:
			
 
				 #
			
--- a/qapi/job.json
+++ b/qapi/job.json
@@ -105,11 +105,13 @@
 
				 #
			
 
				 # @finalize: see @job-finalize
			
 
				 #
			
 
				+# @change: see @block-job-change (since 8.2)
			
 
				+#
			
 
				 # Since: 2.12
			
 
				 ##
			
 
				 { 'enum': 'JobVerb',
			
 
				   'data': ['cancel', 'pause', 'resume', 'set-speed', 'complete', 'dismiss',
			
 
				-           'finalize' ] }
			
 
				+           'finalize', 'change' ] }
			
 
				 
			
 
				 ##
			
 
				 # @JOB_STATUS_CHANGE:
			
--- a/qemu-img-cmds.hx
+++ b/qemu-img-cmds.hx
@@ -88,9 +88,9 @@ SRST
 
				 ERST
			
 
				 
			
 
				 DEF("rebase", img_rebase,
			
 
				-    "rebase [--object objectdef] [--image-opts] [-U] [-q] [-f fmt] [-t cache] [-T src_cache] [-p] [-u] -b backing_file [-F backing_fmt] filename")
			
 
				+    "rebase [--object objectdef] [--image-opts] [-U] [-q] [-f fmt] [-t cache] [-T src_cache] [-p] [-u] [-c] -b backing_file [-F backing_fmt] filename")
			
 
				 SRST
			
 
				-.. option:: rebase [--object OBJECTDEF] [--image-opts] [-U] [-q] [-f FMT] [-t CACHE] [-T SRC_CACHE] [-p] [-u] -b BACKING_FILE [-F BACKING_FMT] FILENAME
			
 
				+.. option:: rebase [--object OBJECTDEF] [--image-opts] [-U] [-q] [-f FMT] [-t CACHE] [-T SRC_CACHE] [-p] [-u] [-c] -b BACKING_FILE [-F BACKING_FMT] FILENAME
			
 
				 ERST
			
 
				 
			
 
				 DEF("resize", img_resize,
			
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -1274,23 +1274,29 @@ static int is_allocated_sectors_min(const uint8_t *buf, int n, int *pnum,
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * Compares two buffers sector by sector. Returns 0 if the first
			
 
				- * sector of each buffer matches, non-zero otherwise.
			
 
				+ * Compares two buffers chunk by chunk, where @chsize is the chunk size.
			
 
				+ * If @chsize is 0, default chunk size of BDRV_SECTOR_SIZE is used.
			
 
				+ * Returns 0 if the first chunk of each buffer matches, non-zero otherwise.
			
 
				  *
			
 
				- * pnum is set to the sector-aligned size of the buffer prefix that
			
 
				- * has the same matching status as the first sector.
			
 
				+ * @pnum is set to the size of the buffer prefix aligned to @chsize that
			
 
				+ * has the same matching status as the first chunk.
			
 
				  */
			
 
				 static int compare_buffers(const uint8_t *buf1, const uint8_t *buf2,
			
 
				-                           int64_t bytes, int64_t *pnum)
			
 
				+                           int64_t bytes, uint64_t chsize, int64_t *pnum)
			
 
				 {
			
 
				     bool res;
			
 
				-    int64_t i = MIN(bytes, BDRV_SECTOR_SIZE);
			
 
				+    int64_t i;
			
 
				 
			
 
				     assert(bytes > 0);
			
 
				 
			
 
				+    if (!chsize) {
			
 
				+        chsize = BDRV_SECTOR_SIZE;
			
 
				+    }
			
 
				+    i = MIN(bytes, chsize);
			
 
				+
			
 
				     res = !!memcmp(buf1, buf2, i);
			
 
				     while (i < bytes) {
			
 
				-        int64_t len = MIN(bytes - i, BDRV_SECTOR_SIZE);
			
 
				+        int64_t len = MIN(bytes - i, chsize);
			
 
				 
			
 
				         if (!!memcmp(buf1 + i, buf2 + i, len) != res) {
			
 
				             break;
			
@@ -1559,7 +1565,7 @@ static int img_compare(int argc, char **argv)
 
				                     ret = 4;
			
 
				                     goto out;
			
 
				                 }
			
 
				-                ret = compare_buffers(buf1, buf2, chunk, &pnum);
			
 
				+                ret = compare_buffers(buf1, buf2, chunk, 0, &pnum);
			
 
				                 if (ret || pnum != chunk) {
			
 
				                     qprintf(quiet, "Content mismatch at offset %" PRId64 "!\n",
			
 
				                             offset + (ret ? 0 : pnum));
			
@@ -3524,16 +3530,20 @@ static int img_rebase(int argc, char **argv)
 
				     uint8_t *buf_new = NULL;
			
 
				     BlockDriverState *bs = NULL, *prefix_chain_bs = NULL;
			
 
				     BlockDriverState *unfiltered_bs;
			
 
				+    BlockDriverInfo bdi = {0};
			
 
				     char *filename;
			
 
				     const char *fmt, *cache, *src_cache, *out_basefmt, *out_baseimg;
			
 
				     int c, flags, src_flags, ret;
			
 
				+    BdrvRequestFlags write_flags = 0;
			
 
				     bool writethrough, src_writethrough;
			
 
				     int unsafe = 0;
			
 
				     bool force_share = false;
			
 
				     int progress = 0;
			
 
				     bool quiet = false;
			
 
				+    bool compress = false;
			
 
				     Error *local_err = NULL;
			
 
				     bool image_opts = false;
			
 
				+    int64_t write_align;
			
 
				 
			
 
				     /* Parse commandline parameters */
			
 
				     fmt = NULL;
			
@@ -3547,9 +3557,10 @@ static int img_rebase(int argc, char **argv)
 
				             {"object", required_argument, 0, OPTION_OBJECT},
			
 
				             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
			
 
				             {"force-share", no_argument, 0, 'U'},
			
 
				+            {"compress", no_argument, 0, 'c'},
			
 
				             {0, 0, 0, 0}
			
 
				         };
			
 
				-        c = getopt_long(argc, argv, ":hf:F:b:upt:T:qU",
			
 
				+        c = getopt_long(argc, argv, ":hf:F:b:upt:T:qUc",
			
 
				                         long_options, NULL);
			
 
				         if (c == -1) {
			
 
				             break;
			
@@ -3597,6 +3608,9 @@ static int img_rebase(int argc, char **argv)
 
				         case 'U':
			
 
				             force_share = true;
			
 
				             break;
			
 
				+        case 'c':
			
 
				+            compress = true;
			
 
				+            break;
			
 
				         }
			
 
				     }
			
 
				 
			
@@ -3649,6 +3663,14 @@ static int img_rebase(int argc, char **argv)
 
				 
			
 
				     unfiltered_bs = bdrv_skip_filters(bs);
			
 
				 
			
 
				+    if (compress && !block_driver_can_compress(unfiltered_bs->drv)) {
			
 
				+        error_report("Compression not supported for this file format");
			
 
				+        ret = -1;
			
 
				+        goto out;
			
 
				+    } else if (compress) {
			
 
				+        write_flags |= BDRV_REQ_WRITE_COMPRESSED;
			
 
				+    }
			
 
				+
			
 
				     if (out_basefmt != NULL) {
			
 
				         if (bdrv_find_format(out_basefmt) == NULL) {
			
 
				             error_report("Invalid format name: '%s'", out_basefmt);
			
@@ -3657,6 +3679,20 @@ static int img_rebase(int argc, char **argv)
 
				         }
			
 
				     }
			
 
				 
			
 
				+    /*
			
 
				+     * We need overlay subcluster size (or cluster size in case writes are
			
 
				+     * compressed) to make sure write requests are aligned.
			
 
				+     */
			
 
				+    ret = bdrv_get_info(unfiltered_bs, &bdi);
			
 
				+    if (ret < 0) {
			
 
				+        error_report("could not get block driver info");
			
 
				+        goto out;
			
 
				+    } else if (bdi.subcluster_size == 0) {
			
 
				+        bdi.cluster_size = bdi.subcluster_size = 1;
			
 
				+    }
			
 
				+
			
 
				+    write_align = compress ? bdi.cluster_size : bdi.subcluster_size;
			
 
				+
			
 
				     /* For safe rebasing we need to compare old and new backing file */
			
 
				     if (!unsafe) {
			
 
				         QDict *options = NULL;
			
@@ -3756,11 +3792,16 @@ static int img_rebase(int argc, char **argv)
 
				         int64_t old_backing_size = 0;
			
 
				         int64_t new_backing_size = 0;
			
 
				         uint64_t offset;
			
 
				-        int64_t n;
			
 
				+        int64_t n, n_old = 0, n_new = 0;
			
 
				         float local_progress = 0;
			
 
				 
			
 
				-        buf_old = blk_blockalign(blk, IO_BUF_SIZE);
			
 
				-        buf_new = blk_blockalign(blk, IO_BUF_SIZE);
			
 
				+        if (blk_old_backing && bdrv_opt_mem_align(blk_bs(blk_old_backing)) >
			
 
				+            bdrv_opt_mem_align(blk_bs(blk))) {
			
 
				+            buf_old = blk_blockalign(blk_old_backing, IO_BUF_SIZE);
			
 
				+        } else {
			
 
				+            buf_old = blk_blockalign(blk, IO_BUF_SIZE);
			
 
				+        }
			
 
				+        buf_new = blk_blockalign(blk_new_backing, IO_BUF_SIZE);
			
 
				 
			
 
				         size = blk_getlength(blk);
			
 
				         if (size < 0) {
			
@@ -3797,7 +3838,8 @@ static int img_rebase(int argc, char **argv)
 
				         }
			
 
				 
			
 
				         for (offset = 0; offset < size; offset += n) {
			
 
				-            bool buf_old_is_zero = false;
			
 
				+            bool old_backing_eof = false;
			
 
				+            int64_t n_alloc;
			
 
				 
			
 
				             /* How many bytes can we handle with the next read? */
			
 
				             n = MIN(IO_BUF_SIZE, size - offset);
			
@@ -3814,6 +3856,8 @@ static int img_rebase(int argc, char **argv)
 
				             }
			
 
				 
			
 
				             if (prefix_chain_bs) {
			
 
				+                uint64_t bytes = n;
			
 
				+
			
 
				                 /*
			
 
				                  * If cluster wasn't changed since prefix_chain, we don't need
			
 
				                  * to take action
			
@@ -3826,38 +3870,60 @@ static int img_rebase(int argc, char **argv)
 
				                                  strerror(-ret));
			
 
				                     goto out;
			
 
				                 }
			
 
				-                if (!ret) {
			
 
				+                if (!ret && n) {
			
 
				                     continue;
			
 
				                 }
			
 
				+                if (!n) {
			
 
				+                    /*
			
 
				+                     * If we've reached EOF of the old backing, it means that
			
 
				+                     * offsets beyond the old backing size were read as zeroes.
			
 
				+                     * Now we will need to explicitly zero the cluster in
			
 
				+                     * order to preserve that state after the rebase.
			
 
				+                     */
			
 
				+                    n = bytes;
			
 
				+                }
			
 
				             }
			
 
				 
			
 
				+            /*
			
 
				+             * At this point we know that the region [offset; offset + n)
			
 
				+             * is unallocated within the target image.  This region might be
			
 
				+             * unaligned to the target image's (sub)cluster boundaries, as
			
 
				+             * old backing may have smaller clusters (or have subclusters).
			
 
				+             * We extend it to the aligned boundaries to avoid CoW on
			
 
				+             * partial writes in blk_pwrite(),
			
 
				+             */
			
 
				+            n += offset - QEMU_ALIGN_DOWN(offset, write_align);
			
 
				+            offset = QEMU_ALIGN_DOWN(offset, write_align);
			
 
				+            n += QEMU_ALIGN_UP(offset + n, write_align) - (offset + n);
			
 
				+            n = MIN(n, size - offset);
			
 
				+            assert(!bdrv_is_allocated(unfiltered_bs, offset, n, &n_alloc) &&
			
 
				+                   n_alloc == n);
			
 
				+
			
 
				+            /*
			
 
				+             * Much like with the target image, we'll try to read as much
			
 
				+             * of the old and new backings as we can.
			
 
				+             */
			
 
				+            n_old = MIN(n, MAX(0, old_backing_size - (int64_t) offset));
			
 
				+            n_new = MIN(n, MAX(0, new_backing_size - (int64_t) offset));
			
 
				+
			
 
				             /*
			
 
				              * Read old and new backing file and take into consideration that
			
 
				              * backing files may be smaller than the COW image.
			
 
				              */
			
 
				-            if (offset >= old_backing_size) {
			
 
				-                memset(buf_old, 0, n);
			
 
				-                buf_old_is_zero = true;
			
 
				+            memset(buf_old + n_old, 0, n - n_old);
			
 
				+            if (!n_old) {
			
 
				+                old_backing_eof = true;
			
 
				             } else {
			
 
				-                if (offset + n > old_backing_size) {
			
 
				-                    n = old_backing_size - offset;
			
 
				-                }
			
 
				-
			
 
				-                ret = blk_pread(blk_old_backing, offset, n, buf_old, 0);
			
 
				+                ret = blk_pread(blk_old_backing, offset, n_old, buf_old, 0);
			
 
				                 if (ret < 0) {
			
 
				                     error_report("error while reading from old backing file");
			
 
				                     goto out;
			
 
				                 }
			
 
				             }
			
 
				 
			
 
				-            if (offset >= new_backing_size || !blk_new_backing) {
			
 
				-                memset(buf_new, 0, n);
			
 
				-            } else {
			
 
				-                if (offset + n > new_backing_size) {
			
 
				-                    n = new_backing_size - offset;
			
 
				-                }
			
 
				-
			
 
				-                ret = blk_pread(blk_new_backing, offset, n, buf_new, 0);
			
 
				+            memset(buf_new + n_new, 0, n - n_new);
			
 
				+            if (n_new) {
			
 
				+                ret = blk_pread(blk_new_backing, offset, n_new, buf_new, 0);
			
 
				                 if (ret < 0) {
			
 
				                     error_report("error while reading from new backing file");
			
 
				                     goto out;
			
@@ -3871,13 +3937,14 @@ static int img_rebase(int argc, char **argv)
 
				                 int64_t pnum;
			
 
				 
			
 
				                 if (compare_buffers(buf_old + written, buf_new + written,
			
 
				-                                    n - written, &pnum))
			
 
				+                                    n - written, write_align, &pnum))
			
 
				                 {
			
 
				-                    if (buf_old_is_zero) {
			
 
				+                    if (old_backing_eof) {
			
 
				                         ret = blk_pwrite_zeroes(blk, offset + written, pnum, 0);
			
 
				                     } else {
			
 
				+                        assert(written + pnum <= IO_BUF_SIZE);
			
 
				                         ret = blk_pwrite(blk, offset + written, pnum,
			
 
				-                                         buf_old + written, 0);
			
 
				+                                         buf_old + written, write_flags);
			
 
				                     }
			
 
				                     if (ret < 0) {
			
 
				                         error_report("Error while writing to COW image: %s",
			
@@ -3887,6 +3954,9 @@ static int img_rebase(int argc, char **argv)
 
				                 }
			
 
				 
			
 
				                 written += pnum;
			
 
				+                if (offset + written >= old_backing_size) {
			
 
				+                    old_backing_eof = true;
			
 
				+                }
			
 
				             }
			
 
				             qemu_progress_print(local_progress, 100);
			
 
				         }
			
--- a/tests/qemu-iotests/024
+++ b/tests/qemu-iotests/024
@@ -199,6 +199,123 @@ echo
 
				 # $BASE_OLD and $BASE_NEW)
			
 
				 $QEMU_IMG map "$OVERLAY" | _filter_qemu_img_map
			
 
				 
			
 
				+# Check that rebase within the chain is working when
			
 
				+# overlay_size > old_backing_size
			
 
				+#
			
 
				+# base_new <-- base_old <-- overlay
			
 
				+#
			
 
				+# Backing (new): 11 11 11 11 11
			
 
				+# Backing (old): 22 22 22 22
			
 
				+# Overlay:       -- -- -- -- --
			
 
				+#
			
 
				+# As a result, overlay should contain data identical to base_old, with the
			
 
				+# last cluster remaining unallocated.
			
 
				+
			
 
				+echo
			
 
				+echo "=== Test rebase within one backing chain ==="
			
 
				+echo
			
 
				+
			
 
				+echo "Creating backing chain"
			
 
				+echo
			
 
				+
			
 
				+TEST_IMG=$BASE_NEW _make_test_img $(( CLUSTER_SIZE * 5 ))
			
 
				+TEST_IMG=$BASE_OLD _make_test_img -b "$BASE_NEW" -F $IMGFMT \
			
 
				+    $(( CLUSTER_SIZE * 4 ))
			
 
				+TEST_IMG=$OVERLAY _make_test_img -b "$BASE_OLD" -F $IMGFMT \
			
 
				+    $(( CLUSTER_SIZE * 5 ))
			
 
				+
			
 
				+echo
			
 
				+echo "Fill backing files with data"
			
 
				+echo
			
 
				+
			
 
				+$QEMU_IO "$BASE_NEW" -c "write -P 0x11 0 $(( CLUSTER_SIZE * 5 ))" \
			
 
				+    | _filter_qemu_io
			
 
				+$QEMU_IO "$BASE_OLD" -c "write -P 0x22 0 $(( CLUSTER_SIZE * 4 ))" \
			
 
				+    | _filter_qemu_io
			
 
				+
			
 
				+echo
			
 
				+echo "Check the last cluster is zeroed in overlay before the rebase"
			
 
				+echo
			
 
				+$QEMU_IO "$OVERLAY" -c "read -P 0x00 $(( CLUSTER_SIZE * 4 )) $CLUSTER_SIZE" \
			
 
				+    | _filter_qemu_io
			
 
				+
			
 
				+echo
			
 
				+echo "Rebase onto another image in the same chain"
			
 
				+echo
			
 
				+
			
 
				+$QEMU_IMG rebase -b "$BASE_NEW" -F $IMGFMT "$OVERLAY"
			
 
				+
			
 
				+echo "Verify that data is read the same before and after rebase"
			
 
				+echo
			
 
				+
			
 
				+# Verify the first 4 clusters are still read the same as in the old base
			
 
				+$QEMU_IO "$OVERLAY" -c "read -P 0x22 0 $(( CLUSTER_SIZE * 4 ))" \
			
 
				+    | _filter_qemu_io
			
 
				+# Verify the last cluster still reads as zeroes
			
 
				+$QEMU_IO "$OVERLAY" -c "read -P 0x00 $(( CLUSTER_SIZE * 4 )) $CLUSTER_SIZE" \
			
 
				+    | _filter_qemu_io
			
 
				+
			
 
				+echo
			
 
				+
			
 
				+# Check that rebase within the chain is working when
			
 
				+# overlay cluster size > backings cluster size
			
 
				+# (here overlay cluster size == 2 * backings cluster size)
			
 
				+#
			
 
				+# base_new <-- base_old <-- overlay
			
 
				+#
			
 
				+# Backing (new): -- -- -- -- -- --
			
 
				+# Backing (old): -- 11 -- -- 22 --
			
 
				+# Overlay:      |-- --|-- --|-- --|
			
 
				+#
			
 
				+# We should end up having 1st and 3rd cluster allocated, and their halves
			
 
				+# being read as zeroes.
			
 
				+
			
 
				+echo
			
 
				+echo "=== Test rebase with different cluster sizes ==="
			
 
				+echo
			
 
				+
			
 
				+echo "Creating backing chain"
			
 
				+echo
			
 
				+
			
 
				+TEST_IMG=$BASE_NEW _make_test_img $(( CLUSTER_SIZE * 6 ))
			
 
				+TEST_IMG=$BASE_OLD _make_test_img -b "$BASE_NEW" -F $IMGFMT \
			
 
				+    $(( CLUSTER_SIZE * 6 ))
			
 
				+CLUSTER_SIZE=$(( CLUSTER_SIZE * 2 )) TEST_IMG=$OVERLAY \
			
 
				+    _make_test_img -b "$BASE_OLD" -F $IMGFMT $(( CLUSTER_SIZE * 6 ))
			
 
				+
			
 
				+TEST_IMG=$OVERLAY _img_info
			
 
				+
			
 
				+echo
			
 
				+echo "Fill backing files with data"
			
 
				+echo
			
 
				+
			
 
				+$QEMU_IO "$BASE_OLD" -c "write -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" \
			
 
				+    -c "write -P 0x22 $(( CLUSTER_SIZE * 4 )) $CLUSTER_SIZE" \
			
 
				+    | _filter_qemu_io
			
 
				+
			
 
				+echo
			
 
				+echo "Rebase onto another image in the same chain"
			
 
				+echo
			
 
				+
			
 
				+$QEMU_IMG rebase -b "$BASE_NEW" -F $IMGFMT "$OVERLAY"
			
 
				+
			
 
				+echo "Verify that data is read the same before and after rebase"
			
 
				+echo
			
 
				+
			
 
				+$QEMU_IO "$OVERLAY" -c "read -P 0x00 0 $CLUSTER_SIZE" \
			
 
				+    -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" \
			
 
				+    -c "read -P 0x00 $(( CLUSTER_SIZE * 2 )) $(( CLUSTER_SIZE * 2 ))" \
			
 
				+    -c "read -P 0x22 $(( CLUSTER_SIZE * 4 )) $CLUSTER_SIZE" \
			
 
				+    -c "read -P 0x00 $(( CLUSTER_SIZE * 5 )) $CLUSTER_SIZE" \
			
 
				+    | _filter_qemu_io
			
 
				+
			
 
				+echo
			
 
				+echo "Verify that untouched cluster remains unallocated"
			
 
				+echo
			
 
				+
			
 
				+$QEMU_IMG map "$OVERLAY" | _filter_qemu_img_map
			
 
				+
			
 
				+echo
			
 
				 
			
 
				 # success, all done
			
 
				 echo "*** done"
			
--- a/tests/qemu-iotests/024.out
+++ b/tests/qemu-iotests/024.out
@@ -171,4 +171,77 @@ read 65536/65536 bytes at offset 196608
 
				 Offset          Length          File
			
 
				 0               0x30000         TEST_DIR/subdir/t.IMGFMT
			
 
				 0x30000         0x10000         TEST_DIR/subdir/t.IMGFMT.base_new
			
 
				+
			
 
				+=== Test rebase within one backing chain ===
			
 
				+
			
 
				+Creating backing chain
			
 
				+
			
 
				+Formatting 'TEST_DIR/subdir/t.IMGFMT.base_new', fmt=IMGFMT size=327680
			
 
				+Formatting 'TEST_DIR/subdir/t.IMGFMT.base_old', fmt=IMGFMT size=262144 backing_file=TEST_DIR/subdir/t.IMGFMT.base_new backing_fmt=IMGFMT
			
 
				+Formatting 'TEST_DIR/subdir/t.IMGFMT', fmt=IMGFMT size=327680 backing_file=TEST_DIR/subdir/t.IMGFMT.base_old backing_fmt=IMGFMT
			
 
				+
			
 
				+Fill backing files with data
			
 
				+
			
 
				+wrote 327680/327680 bytes at offset 0
			
 
				+320 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
			
 
				+wrote 262144/262144 bytes at offset 0
			
 
				+256 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
			
 
				+
			
 
				+Check the last cluster is zeroed in overlay before the rebase
			
 
				+
			
 
				+read 65536/65536 bytes at offset 262144
			
 
				+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
			
 
				+
			
 
				+Rebase onto another image in the same chain
			
 
				+
			
 
				+Verify that data is read the same before and after rebase
			
 
				+
			
 
				+read 262144/262144 bytes at offset 0
			
 
				+256 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
			
 
				+read 65536/65536 bytes at offset 262144
			
 
				+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
			
 
				+
			
 
				+
			
 
				+=== Test rebase with different cluster sizes ===
			
 
				+
			
 
				+Creating backing chain
			
 
				+
			
 
				+Formatting 'TEST_DIR/subdir/t.IMGFMT.base_new', fmt=IMGFMT size=393216
			
 
				+Formatting 'TEST_DIR/subdir/t.IMGFMT.base_old', fmt=IMGFMT size=393216 backing_file=TEST_DIR/subdir/t.IMGFMT.base_new backing_fmt=IMGFMT
			
 
				+Formatting 'TEST_DIR/subdir/t.IMGFMT', fmt=IMGFMT size=393216 backing_file=TEST_DIR/subdir/t.IMGFMT.base_old backing_fmt=IMGFMT
			
 
				+image: TEST_DIR/subdir/t.IMGFMT
			
 
				+file format: IMGFMT
			
 
				+virtual size: 384 KiB (393216 bytes)
			
 
				+cluster_size: 131072
			
 
				+backing file: TEST_DIR/subdir/t.IMGFMT.base_old
			
 
				+backing file format: IMGFMT
			
 
				+
			
 
				+Fill backing files with data
			
 
				+
			
 
				+wrote 65536/65536 bytes at offset 65536
			
 
				+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
			
 
				+wrote 65536/65536 bytes at offset 262144
			
 
				+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
			
 
				+
			
 
				+Rebase onto another image in the same chain
			
 
				+
			
 
				+Verify that data is read the same before and after rebase
			
 
				+
			
 
				+read 65536/65536 bytes at offset 0
			
 
				+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
			
 
				+read 65536/65536 bytes at offset 65536
			
 
				+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
			
 
				+read 131072/131072 bytes at offset 131072
			
 
				+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
			
 
				+read 65536/65536 bytes at offset 262144
			
 
				+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
			
 
				+read 65536/65536 bytes at offset 327680
			
 
				+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
			
 
				+
			
 
				+Verify that untouched cluster remains unallocated
			
 
				+
			
 
				+Offset          Length          File
			
 
				+0               0x20000         TEST_DIR/subdir/t.IMGFMT
			
 
				+0x40000         0x20000         TEST_DIR/subdir/t.IMGFMT
			
 
				+
			
 
				 *** done
			
--- a/tests/qemu-iotests/109.out
+++ b/tests/qemu-iotests/109.out
@@ -38,7 +38,7 @@ read 512/512 bytes at offset 0
 
				 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}}
			
 
				 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 1024, "offset": 1024, "speed": 0, "type": "mirror"}}
			
 
				 {"execute":"query-block-jobs"}
			
 
				-{"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 1024, "offset": 1024, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror"}]}
			
 
				+{"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 1024, "offset": 1024, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]}
			
 
				 {"execute":"quit"}
			
 
				 {"return": {}}
			
 
				 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
			
@@ -90,7 +90,7 @@ read 512/512 bytes at offset 0
 
				 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}}
			
 
				 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 197120, "offset": 197120, "speed": 0, "type": "mirror"}}
			
 
				 {"execute":"query-block-jobs"}
			
 
				-{"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 197120, "offset": 197120, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror"}]}
			
 
				+{"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 197120, "offset": 197120, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]}
			
 
				 {"execute":"quit"}
			
 
				 {"return": {}}
			
 
				 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
			
@@ -142,7 +142,7 @@ read 512/512 bytes at offset 0
 
				 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}}
			
 
				 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 327680, "offset": 327680, "speed": 0, "type": "mirror"}}
			
 
				 {"execute":"query-block-jobs"}
			
 
				-{"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 327680, "offset": 327680, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror"}]}
			
 
				+{"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 327680, "offset": 327680, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]}
			
 
				 {"execute":"quit"}
			
 
				 {"return": {}}
			
 
				 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
			
@@ -194,7 +194,7 @@ read 512/512 bytes at offset 0
 
				 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}}
			
 
				 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 1024, "offset": 1024, "speed": 0, "type": "mirror"}}
			
 
				 {"execute":"query-block-jobs"}
			
 
				-{"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 1024, "offset": 1024, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror"}]}
			
 
				+{"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 1024, "offset": 1024, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]}
			
 
				 {"execute":"quit"}
			
 
				 {"return": {}}
			
 
				 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
			
@@ -246,7 +246,7 @@ read 512/512 bytes at offset 0
 
				 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}}
			
 
				 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 65536, "offset": 65536, "speed": 0, "type": "mirror"}}
			
 
				 {"execute":"query-block-jobs"}
			
 
				-{"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 65536, "offset": 65536, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror"}]}
			
 
				+{"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 65536, "offset": 65536, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]}
			
 
				 {"execute":"quit"}
			
 
				 {"return": {}}
			
 
				 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
			
@@ -298,7 +298,7 @@ read 512/512 bytes at offset 0
 
				 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}}
			
 
				 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 2560, "offset": 2560, "speed": 0, "type": "mirror"}}
			
 
				 {"execute":"query-block-jobs"}
			
 
				-{"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 2560, "offset": 2560, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror"}]}
			
 
				+{"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 2560, "offset": 2560, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]}
			
 
				 {"execute":"quit"}
			
 
				 {"return": {}}
			
 
				 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
			
@@ -349,7 +349,7 @@ read 512/512 bytes at offset 0
 
				 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}}
			
 
				 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 2560, "offset": 2560, "speed": 0, "type": "mirror"}}
			
 
				 {"execute":"query-block-jobs"}
			
 
				-{"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 2560, "offset": 2560, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror"}]}
			
 
				+{"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 2560, "offset": 2560, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]}
			
 
				 {"execute":"quit"}
			
 
				 {"return": {}}
			
 
				 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
			
@@ -400,7 +400,7 @@ read 512/512 bytes at offset 0
 
				 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}}
			
 
				 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 31457280, "offset": 31457280, "speed": 0, "type": "mirror"}}
			
 
				 {"execute":"query-block-jobs"}
			
 
				-{"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 31457280, "offset": 31457280, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror"}]}
			
 
				+{"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 31457280, "offset": 31457280, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]}
			
 
				 {"execute":"quit"}
			
 
				 {"return": {}}
			
 
				 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
			
@@ -451,7 +451,7 @@ read 512/512 bytes at offset 0
 
				 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}}
			
 
				 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 327680, "offset": 327680, "speed": 0, "type": "mirror"}}
			
 
				 {"execute":"query-block-jobs"}
			
 
				-{"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 327680, "offset": 327680, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror"}]}
			
 
				+{"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 327680, "offset": 327680, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]}
			
 
				 {"execute":"quit"}
			
 
				 {"return": {}}
			
 
				 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
			
@@ -502,7 +502,7 @@ read 512/512 bytes at offset 0
 
				 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}}
			
 
				 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 2048, "offset": 2048, "speed": 0, "type": "mirror"}}
			
 
				 {"execute":"query-block-jobs"}
			
 
				-{"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 2048, "offset": 2048, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror"}]}
			
 
				+{"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 2048, "offset": 2048, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]}
			
 
				 {"execute":"quit"}
			
 
				 {"return": {}}
			
 
				 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
			
@@ -533,7 +533,7 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed
 
				 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}}
			
 
				 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 512, "offset": 512, "speed": 0, "type": "mirror"}}
			
 
				 {"execute":"query-block-jobs"}
			
 
				-{"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 512, "offset": 512, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror"}]}
			
 
				+{"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 512, "offset": 512, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]}
			
 
				 {"execute":"quit"}
			
 
				 {"return": {}}
			
 
				 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
			
@@ -557,7 +557,7 @@ Images are identical.
 
				 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}}
			
 
				 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 512, "offset": 512, "speed": 0, "type": "mirror"}}
			
 
				 {"execute":"query-block-jobs"}
			
 
				-{"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 512, "offset": 512, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror"}]}
			
 
				+{"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 512, "offset": 512, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]}
			
 
				 {"execute":"quit"}
			
 
				 {"return": {}}
			
 
				 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
			
--- a/tests/qemu-iotests/118
+++ b/tests/qemu-iotests/118
@@ -277,7 +277,8 @@ class TestInitiallyFilled(GeneralChangeTestsBaseClass):
 
				                                    'file.driver=file',
			
 
				                                    'file.filename=%s' % old_img ])
			
 
				         if self.interface == 'scsi':
			
 
				-            self.vm.add_device('virtio-scsi-pci')
			
 
				+            self.vm.add_object('iothread,id=iothread0')
			
 
				+            self.vm.add_device('virtio-scsi-pci,iothread=iothread0')
			
 
				         self.vm.add_device('%s,drive=drive0,id=%s' %
			
 
				                            (interface_to_device_name(self.interface),
			
 
				                             self.device_name))
			
@@ -312,7 +313,8 @@ class TestInitiallyEmpty(GeneralChangeTestsBaseClass):
 
				         if self.use_drive:
			
 
				             self.vm.add_drive(None, 'media=%s' % self.media, 'none')
			
 
				         if self.interface == 'scsi':
			
 
				-            self.vm.add_device('virtio-scsi-pci')
			
 
				+            self.vm.add_object('iothread,id=iothread0')
			
 
				+            self.vm.add_device('virtio-scsi-pci,iothread=iothread0')
			
 
				         self.vm.add_device('%s,%sid=%s' %
			
 
				                            (interface_to_device_name(self.interface),
			
 
				                             'drive=drive0,' if self.use_drive else '',
			
--- a/tests/qemu-iotests/271
+++ b/tests/qemu-iotests/271
@@ -899,6 +899,137 @@ _concurrent_io     | $QEMU_IO | _filter_qemu_io | \
 
				     sed -e 's/\(20480\|40960\)/OFFSET/'
			
 
				 _concurrent_verify | $QEMU_IO | _filter_qemu_io
			
 
				 
			
 
				+############################################################
			
 
				+############################################################
			
 
				+############################################################
			
 
				+
			
 
				+echo
			
 
				+echo "### Rebase of qcow2 images with subclusters ###"
			
 
				+echo
			
 
				+
			
 
				+l2_offset=$((0x400000))
			
 
				+
			
 
				+# Check that rebase operation preserve holes between allocated subclusters
			
 
				+# within one cluster (i.e. does not allocate extra space).  Check that the
			
 
				+# data is preserved as well.
			
 
				+#
			
 
				+# Base (new backing): -- -- -- ... -- -- --
			
 
				+# Mid (old backing):  -- 11 -- ... -- 22 --
			
 
				+# Top:                -- -- -- ... -- -- --
			
 
				+
			
 
				+echo "### Preservation of unallocated holes after rebase ###"
			
 
				+echo
			
 
				+
			
 
				+echo "# create backing chain"
			
 
				+echo
			
 
				+
			
 
				+TEST_IMG="$TEST_IMG.base" _make_test_img -o cluster_size=1M,extended_l2=on 1M
			
 
				+TEST_IMG="$TEST_IMG.mid" _make_test_img -o cluster_size=1M,extended_l2=on \
			
 
				+    -b "$TEST_IMG.base" -F qcow2 1M
			
 
				+TEST_IMG="$TEST_IMG.top" _make_test_img -o cluster_size=1M,extended_l2=on \
			
 
				+    -b "$TEST_IMG.mid" -F qcow2 1M
			
 
				+
			
 
				+echo
			
 
				+echo "# fill old backing with data (separate subclusters within cluster)"
			
 
				+echo
			
 
				+
			
 
				+$QEMU_IO -c "write -P 0x11 32k 32k" \
			
 
				+         -c "write -P 0x22 $(( 30 * 32 ))k 32k" \
			
 
				+         "$TEST_IMG.mid" | _filter_qemu_io
			
 
				+
			
 
				+echo
			
 
				+echo "# rebase topmost image onto the new backing"
			
 
				+echo
			
 
				+
			
 
				+$QEMU_IMG rebase -b "$TEST_IMG.base" -F qcow2 "$TEST_IMG.top"
			
 
				+
			
 
				+echo "# verify that data is read the same before and after rebase"
			
 
				+echo
			
 
				+
			
 
				+$QEMU_IO -c "read -P 0x00 0 32k" \
			
 
				+         -c "read -P 0x11 32k 32k" \
			
 
				+         -c "read -P 0x00 64k $(( 28 * 32 ))k" \
			
 
				+         -c "read -P 0x22 $(( 30 * 32 ))k 32k" \
			
 
				+         -c "read -P 0x00 $(( 31 * 32 ))k 32k" \
			
 
				+         "$TEST_IMG.top" | _filter_qemu_io
			
 
				+
			
 
				+echo
			
 
				+echo "# verify that only selected subclusters remain allocated"
			
 
				+echo
			
 
				+
			
 
				+$QEMU_IMG map "$TEST_IMG.top" | _filter_testdir
			
 
				+
			
 
				+echo
			
 
				+echo "# verify image bitmap"
			
 
				+echo
			
 
				+
			
 
				+TEST_IMG="$TEST_IMG.top" alloc="1 30" zero="" _verify_l2_bitmap 0
			
 
				+
			
 
				+# Check that rebase with compression works correctly with images containing
			
 
				+# subclusters.  When compression is enabled and we allocate a new
			
 
				+# subcluster within the target (overlay) image, we expect the entire cluster
			
 
				+# containing that subcluster to become compressed.
			
 
				+#
			
 
				+# Here we expect 1st and 3rd clusters of the top (overlay) image to become
			
 
				+# compressed after the rebase, while cluster 2 to remain unallocated and
			
 
				+# be read from the base (new backing) image.
			
 
				+#
			
 
				+# Base (new backing): |-- -- .. -- --|11 11 .. 11 11|-- -- .. -- --|
			
 
				+# Mid (old backing):  |-- -- .. -- 22|-- -- .. -- --|33 -- .. -- --|
			
 
				+# Top:                |-- -- .. -- --|-- -- -- -- --|-- -- .. -- --|
			
 
				+
			
 
				+echo
			
 
				+echo "### Rebase with compression for images with subclusters ###"
			
 
				+echo
			
 
				+
			
 
				+echo "# create backing chain"
			
 
				+echo
			
 
				+
			
 
				+TEST_IMG="$TEST_IMG.base" _make_test_img -o cluster_size=1M,extended_l2=on 3M
			
 
				+TEST_IMG="$TEST_IMG.mid" _make_test_img -o cluster_size=1M,extended_l2=on \
			
 
				+    -b "$TEST_IMG.base" -F qcow2 3M
			
 
				+TEST_IMG="$TEST_IMG.top" _make_test_img -o cluster_size=1M,extended_l2=on \
			
 
				+    -b "$TEST_IMG.mid" -F qcow2 3M
			
 
				+
			
 
				+echo
			
 
				+echo "# fill old and new backing with data"
			
 
				+echo
			
 
				+
			
 
				+$QEMU_IO -c "write -P 0x11 1M 1M" "$TEST_IMG.base" | _filter_qemu_io
			
 
				+$QEMU_IO -c "write -P 0x22 $(( 31 * 32 ))k 32k" \
			
 
				+         -c "write -P 0x33 $(( 64 * 32 ))k 32k" \
			
 
				+         "$TEST_IMG.mid" | _filter_qemu_io
			
 
				+
			
 
				+echo
			
 
				+echo "# rebase topmost image onto the new backing, with compression"
			
 
				+echo
			
 
				+
			
 
				+$QEMU_IMG rebase -c -b "$TEST_IMG.base" -F qcow2 "$TEST_IMG.top"
			
 
				+
			
 
				+echo "# verify that the 1st and 3rd clusters've become compressed"
			
 
				+echo
			
 
				+
			
 
				+$QEMU_IMG map --output=json "$TEST_IMG.top" | _filter_testdir
			
 
				+
			
 
				+echo
			
 
				+echo "# verify that data is read the same before and after rebase"
			
 
				+echo
			
 
				+
			
 
				+$QEMU_IO -c "read -P 0x22 $(( 31 * 32 ))k 32k" \
			
 
				+         -c "read -P 0x11 1M 1M" \
			
 
				+         -c "read -P 0x33 $(( 64 * 32 ))k 32k" \
			
 
				+         "$TEST_IMG.top" | _filter_qemu_io
			
 
				+
			
 
				+echo
			
 
				+echo "# verify image bitmap"
			
 
				+echo
			
 
				+
			
 
				+# For compressed clusters bitmap is always 0.  For unallocated cluster
			
 
				+# there should be no entry at all, thus bitmap is also 0.
			
 
				+TEST_IMG="$TEST_IMG.top" alloc="" zero="" _verify_l2_bitmap 0
			
 
				+TEST_IMG="$TEST_IMG.top" alloc="" zero="" _verify_l2_bitmap 1
			
 
				+TEST_IMG="$TEST_IMG.top" alloc="" zero="" _verify_l2_bitmap 2
			
 
				+
			
 
				 # success, all done
			
 
				 echo "*** done"
			
 
				 rm -f $seq.full
			
--- a/tests/qemu-iotests/271.out
+++ b/tests/qemu-iotests/271.out
@@ -723,4 +723,86 @@ wrote 2048/2048 bytes at offset OFFSET
 
				 2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
			
 
				 wrote 2048/2048 bytes at offset OFFSET
			
 
				 2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
			
 
				+
			
 
				+### Rebase of qcow2 images with subclusters ###
			
 
				+
			
 
				+### Preservation of unallocated holes after rebase ###
			
 
				+
			
 
				+# create backing chain
			
 
				+
			
 
				+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=1048576
			
 
				+Formatting 'TEST_DIR/t.IMGFMT.mid', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=IMGFMT
			
 
				+Formatting 'TEST_DIR/t.IMGFMT.top', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.IMGFMT.mid backing_fmt=IMGFMT
			
 
				+
			
 
				+# fill old backing with data (separate subclusters within cluster)
			
 
				+
			
 
				+wrote 32768/32768 bytes at offset 32768
			
 
				+32 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
			
 
				+wrote 32768/32768 bytes at offset 983040
			
 
				+32 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
			
 
				+
			
 
				+# rebase topmost image onto the new backing
			
 
				+
			
 
				+# verify that data is read the same before and after rebase
			
 
				+
			
 
				+read 32768/32768 bytes at offset 0
			
 
				+32 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
			
 
				+read 32768/32768 bytes at offset 32768
			
 
				+32 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
			
 
				+read 917504/917504 bytes at offset 65536
			
 
				+896 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
			
 
				+read 32768/32768 bytes at offset 983040
			
 
				+32 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
			
 
				+read 32768/32768 bytes at offset 1015808
			
 
				+32 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
			
 
				+
			
 
				+# verify that only selected subclusters remain allocated
			
 
				+
			
 
				+Offset          Length          Mapped to       File
			
 
				+0x8000          0x8000          0x508000        TEST_DIR/t.qcow2.top
			
 
				+0xf0000         0x8000          0x5f0000        TEST_DIR/t.qcow2.top
			
 
				+
			
 
				+# verify image bitmap
			
 
				+
			
 
				+L2 entry #0: 0x8000000000500000 0000000040000002
			
 
				+
			
 
				+### Rebase with compression for images with subclusters ###
			
 
				+
			
 
				+# create backing chain
			
 
				+
			
 
				+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=3145728
			
 
				+Formatting 'TEST_DIR/t.IMGFMT.mid', fmt=IMGFMT size=3145728 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=IMGFMT
			
 
				+Formatting 'TEST_DIR/t.IMGFMT.top', fmt=IMGFMT size=3145728 backing_file=TEST_DIR/t.IMGFMT.mid backing_fmt=IMGFMT
			
 
				+
			
 
				+# fill old and new backing with data
			
 
				+
			
 
				+wrote 1048576/1048576 bytes at offset 1048576
			
 
				+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
			
 
				+wrote 32768/32768 bytes at offset 1015808
			
 
				+32 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
			
 
				+wrote 32768/32768 bytes at offset 2097152
			
 
				+32 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
			
 
				+
			
 
				+# rebase topmost image onto the new backing, with compression
			
 
				+
			
 
				+# verify that the 1st and 3rd clusters've become compressed
			
 
				+
			
 
				+[{ "start": 0, "length": 1048576, "depth": 0, "present": true, "zero": false, "data": true, "compressed": true},
			
 
				+{ "start": 1048576, "length": 1048576, "depth": 1, "present": true, "zero": false, "data": true, "compressed": false, "offset": 5242880},
			
 
				+{ "start": 2097152, "length": 1048576, "depth": 0, "present": true, "zero": false, "data": true, "compressed": true}]
			
 
				+
			
 
				+# verify that data is read the same before and after rebase
			
 
				+
			
 
				+read 32768/32768 bytes at offset 1015808
			
 
				+32 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
			
 
				+read 1048576/1048576 bytes at offset 1048576
			
 
				+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
			
 
				+read 32768/32768 bytes at offset 2097152
			
 
				+32 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
			
 
				+
			
 
				+# verify image bitmap
			
 
				+
			
 
				+L2 entry #0: 0x4008000000500000 0000000000000000
			
 
				+L2 entry #1: 0x0000000000000000 0000000000000000
			
 
				+L2 entry #2: 0x400800000050040b 0000000000000000
			
 
				 *** done
			
--- a/tests/qemu-iotests/314
+++ b/tests/qemu-iotests/314
@@ -0,0 +1,165 @@
 
				+#!/usr/bin/env bash
			
 
				+# group: rw backing auto quick
			
 
				+#
			
 
				+# Test qemu-img rebase with compression
			
 
				+#
			
 
				+# Copyright (c) 2023 Virtuozzo International GmbH.
			
 
				+#
			
 
				+# This program is free software; you can redistribute it and/or modify
			
 
				+# it under the terms of the GNU General Public License as published by
			
 
				+# the Free Software Foundation; either version 2 of the License, or
			
 
				+# (at your option) any later version.
			
 
				+#
			
 
				+# This program is distributed in the hope that it will be useful,
			
 
				+# but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+# GNU General Public License for more details.
			
 
				+#
			
 
				+# You should have received a copy of the GNU General Public License
			
 
				+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
			
 
				+#
			
 
				+
			
 
				+# creator
			
 
				+owner=andrey.drobyshev@virtuozzo.com
			
 
				+
			
 
				+seq=`basename $0`
			
 
				+echo "QA output created by $seq"
			
 
				+
			
 
				+status=1	# failure is the default!
			
 
				+
			
 
				+_cleanup()
			
 
				+{
			
 
				+    _cleanup_test_img
			
 
				+    _rm_test_img "$TEST_IMG.base"
			
 
				+    _rm_test_img "$TEST_IMG.itmd"
			
 
				+}
			
 
				+trap "_cleanup; exit \$status" 0 1 2 3 15
			
 
				+
			
 
				+# get standard environment, filters and checks
			
 
				+. ./common.rc
			
 
				+. ./common.filter
			
 
				+
			
 
				+_supported_fmt qcow2
			
 
				+_supported_proto file
			
 
				+_supported_os Linux
			
 
				+
			
 
				+# Want the size divisible by 2 and 3
			
 
				+size=$(( 48 * 1024 * 1024 ))
			
 
				+half_size=$(( size / 2 ))
			
 
				+third_size=$(( size / 3 ))
			
 
				+
			
 
				+# 1. "qemu-img rebase -c" should refuse working with any format which doesn't
			
 
				+# support compression.  We only check "-f raw" here.
			
 
				+echo
			
 
				+echo "=== Testing compressed rebase format compatibility ==="
			
 
				+echo
			
 
				+
			
 
				+$QEMU_IMG create -f raw "$TEST_IMG" "$size" | _filter_img_create
			
 
				+$QEMU_IMG rebase -c -f raw -b "" "$TEST_IMG"
			
 
				+
			
 
				+# 2. Write the 1st half of $size to backing file (compressed), 2nd half -- to
			
 
				+# the top image (also compressed).  Rebase the top image onto no backing file,
			
 
				+# with compression (i.e. "qemu-img -c -b ''").  Check that the resulting image
			
 
				+# has the written data preserved, and "qemu-img check" reports 100% clusters
			
 
				+# as compressed.
			
 
				+echo
			
 
				+echo "=== Testing rebase with compression onto no backing file ==="
			
 
				+echo
			
 
				+
			
 
				+TEST_IMG="$TEST_IMG.base" _make_test_img $size
			
 
				+_make_test_img -b "$TEST_IMG.base" -F $IMGFMT $size
			
 
				+
			
 
				+$QEMU_IO -c "write -c -P 0xaa 0 $half_size" "$TEST_IMG.base" | _filter_qemu_io
			
 
				+$QEMU_IO -c "write -c -P 0xbb $half_size $half_size" "$TEST_IMG" \
			
 
				+    | _filter_qemu_io
			
 
				+
			
 
				+$QEMU_IMG rebase -c -f $IMGFMT -b "" "$TEST_IMG"
			
 
				+
			
 
				+$QEMU_IO -c "read -P 0xaa 0 $half_size" "$TEST_IMG" | _filter_qemu_io
			
 
				+$QEMU_IO -c "read -P 0xbb $half_size $half_size" "$TEST_IMG" | _filter_qemu_io
			
 
				+
			
 
				+$QEMU_IMG check "$TEST_IMG" | _filter_testdir
			
 
				+
			
 
				+# 3. Same as the previous one, but with raw backing file (hence write to
			
 
				+# the backing is uncompressed).
			
 
				+echo
			
 
				+echo "=== Testing rebase with compression with raw backing file ==="
			
 
				+echo
			
 
				+
			
 
				+$QEMU_IMG create -f raw "$TEST_IMG.base" "$half_size" | _filter_img_create
			
 
				+_make_test_img -b "$TEST_IMG.base" -F raw $size
			
 
				+
			
 
				+$QEMU_IO -f raw -c "write -P 0xaa 0 $half_size" "$TEST_IMG.base" \
			
 
				+    | _filter_qemu_io
			
 
				+$QEMU_IO -c "write -c -P 0xbb $half_size $half_size" \
			
 
				+    "$TEST_IMG" | _filter_qemu_io
			
 
				+
			
 
				+$QEMU_IMG rebase -c -f $IMGFMT -b "" "$TEST_IMG"
			
 
				+
			
 
				+$QEMU_IO -c "read -P 0xaa 0 $half_size" "$TEST_IMG" | _filter_qemu_io
			
 
				+$QEMU_IO -c "read -P 0xbb $half_size $half_size" "$TEST_IMG" | _filter_qemu_io
			
 
				+
			
 
				+$QEMU_IMG check "$TEST_IMG" | _filter_testdir
			
 
				+
			
 
				+# 4. Create a backing chain base<--itmd<--img, filling 1st, 2nd and 3rd
			
 
				+# thirds of them, respectively (with compression).  Rebase img onto base,
			
 
				+# effectively deleting itmd from the chain, and check that written data is
			
 
				+# preserved in the resulting image.  Also check that "qemu-img check" reports
			
 
				+# 100% clusters as compressed.
			
 
				+echo
			
 
				+echo "=== Testing compressed rebase removing single delta from the chain ==="
			
 
				+echo
			
 
				+
			
 
				+TEST_IMG="$TEST_IMG.base" _make_test_img $size
			
 
				+TEST_IMG="$TEST_IMG.itmd" _make_test_img -b "$TEST_IMG.base" -F $IMGFMT $size
			
 
				+_make_test_img -b "$TEST_IMG.itmd" -F $IMGFMT $size
			
 
				+
			
 
				+$QEMU_IO -c "write -c -P 0xaa 0 $third_size" \
			
 
				+    "$TEST_IMG.base" | _filter_qemu_io
			
 
				+$QEMU_IO -c "write -c -P 0xbb $third_size $third_size" \
			
 
				+    "$TEST_IMG.itmd" | _filter_qemu_io
			
 
				+$QEMU_IO -c "write -c -P 0xcc $((third_size * 2 )) $third_size" \
			
 
				+    "$TEST_IMG" | _filter_qemu_io
			
 
				+
			
 
				+$QEMU_IMG rebase -c -f $IMGFMT -b "$TEST_IMG.base" -F $IMGFMT "$TEST_IMG"
			
 
				+
			
 
				+$QEMU_IO -c "read -P 0xaa 0 $third_size" "$TEST_IMG" | _filter_qemu_io
			
 
				+$QEMU_IO -c "read -P 0xbb $third_size $third_size" \
			
 
				+    "$TEST_IMG" | _filter_qemu_io
			
 
				+$QEMU_IO -c "read -P 0xcc $(( third_size * 2 )) $third_size" \
			
 
				+    "$TEST_IMG" | _filter_qemu_io
			
 
				+
			
 
				+$QEMU_IMG check "$TEST_IMG" | _filter_testdir
			
 
				+
			
 
				+# 5. Create one-cluster backing and overlay images, and fill only the first
			
 
				+# (half - 1) bytes of the backing with data (uncompressed).  Rebase the
			
 
				+# overlay onto no backing file with compression.  Check that data is still
			
 
				+# read correctly, and that cluster is now really compressed ("qemu-img check"
			
 
				+# reports 100% clusters as compressed.
			
 
				+echo
			
 
				+echo "=== Testing compressed rebase with unaligned unmerged data ==="
			
 
				+echo
			
 
				+
			
 
				+CLUSTER_SIZE=65536
			
 
				+
			
 
				+TEST_IMG="$TEST_IMG.base" _make_test_img $CLUSTER_SIZE
			
 
				+_make_test_img -b "$TEST_IMG.base" -F $IMGFMT $CLUSTER_SIZE
			
 
				+
			
 
				+$QEMU_IO -c "write -P 0xaa 0 $(( CLUSTER_SIZE / 2 - 1 ))" $TEST_IMG.base \
			
 
				+    | _filter_qemu_io
			
 
				+
			
 
				+$QEMU_IMG rebase -c -f $IMGFMT -b "" "$TEST_IMG"
			
 
				+
			
 
				+$QEMU_IO -c "read -P 0xaa 0 $(( CLUSTER_SIZE / 2 - 1 ))" "$TEST_IMG" \
			
 
				+    | _filter_qemu_io
			
 
				+$QEMU_IO -c \
			
 
				+    "read -P 0x00 $(( CLUSTER_SIZE / 2 - 1 )) $(( CLUSTER_SIZE / 2 + 1 ))" \
			
 
				+    "$TEST_IMG" | _filter_qemu_io
			
 
				+
			
 
				+$QEMU_IMG check "$TEST_IMG" | _filter_testdir
			
 
				+
			
 
				+# success, all done
			
 
				+echo
			
 
				+echo '*** done'
			
 
				+rm -f $seq.full
			
 
				+status=0
			
--- a/tests/qemu-iotests/314.out
+++ b/tests/qemu-iotests/314.out
@@ -0,0 +1,75 @@
 
				+QA output created by 314
			
 
				+
			
 
				+=== Testing compressed rebase format compatibility ===
			
 
				+
			
 
				+Formatting 'TEST_DIR/t.IMGFMT', fmt=raw size=50331648
			
 
				+qemu-img: Compression not supported for this file format
			
 
				+
			
 
				+=== Testing rebase with compression onto no backing file ===
			
 
				+
			
 
				+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=50331648
			
 
				+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=50331648 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=IMGFMT
			
 
				+wrote 25165824/25165824 bytes at offset 0
			
 
				+24 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
			
 
				+wrote 25165824/25165824 bytes at offset 25165824
			
 
				+24 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
			
 
				+read 25165824/25165824 bytes at offset 0
			
 
				+24 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
			
 
				+read 25165824/25165824 bytes at offset 25165824
			
 
				+24 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
			
 
				+No errors were found on the image.
			
 
				+768/768 = 100.00% allocated, 100.00% fragmented, 100.00% compressed clusters
			
 
				+Image end offset: 458752
			
 
				+
			
 
				+=== Testing rebase with compression with raw backing file ===
			
 
				+
			
 
				+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=raw size=25165824
			
 
				+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=50331648 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=raw
			
 
				+wrote 25165824/25165824 bytes at offset 0
			
 
				+24 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
			
 
				+wrote 25165824/25165824 bytes at offset 25165824
			
 
				+24 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
			
 
				+read 25165824/25165824 bytes at offset 0
			
 
				+24 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
			
 
				+read 25165824/25165824 bytes at offset 25165824
			
 
				+24 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
			
 
				+No errors were found on the image.
			
 
				+768/768 = 100.00% allocated, 100.00% fragmented, 100.00% compressed clusters
			
 
				+Image end offset: 458752
			
 
				+
			
 
				+=== Testing compressed rebase removing single delta from the chain ===
			
 
				+
			
 
				+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=50331648
			
 
				+Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=50331648 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=IMGFMT
			
 
				+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=50331648 backing_file=TEST_DIR/t.IMGFMT.itmd backing_fmt=IMGFMT
			
 
				+wrote 16777216/16777216 bytes at offset 0
			
 
				+16 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
			
 
				+wrote 16777216/16777216 bytes at offset 16777216
			
 
				+16 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
			
 
				+wrote 16777216/16777216 bytes at offset 33554432
			
 
				+16 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
			
 
				+read 16777216/16777216 bytes at offset 0
			
 
				+16 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
			
 
				+read 16777216/16777216 bytes at offset 16777216
			
 
				+16 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
			
 
				+read 16777216/16777216 bytes at offset 33554432
			
 
				+16 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
			
 
				+No errors were found on the image.
			
 
				+512/768 = 66.67% allocated, 100.00% fragmented, 100.00% compressed clusters
			
 
				+Image end offset: 458752
			
 
				+
			
 
				+=== Testing compressed rebase with unaligned unmerged data ===
			
 
				+
			
 
				+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=65536
			
 
				+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=65536 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=IMGFMT
			
 
				+wrote 32767/32767 bytes at offset 0
			
 
				+31.999 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
			
 
				+read 32767/32767 bytes at offset 0
			
 
				+31.999 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
			
 
				+read 32769/32769 bytes at offset 32767
			
 
				+32.001 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
			
 
				+No errors were found on the image.
			
 
				+1/1 = 100.00% allocated, 100.00% fragmented, 100.00% compressed clusters
			
 
				+Image end offset: 393216
			
 
				+
			
 
				+*** done
			
--- a/tests/qemu-iotests/tests/mirror-change-copy-mode
+++ b/tests/qemu-iotests/tests/mirror-change-copy-mode
@@ -0,0 +1,193 @@
 
				+#!/usr/bin/env python3
			
 
				+# group: rw
			
 
				+#
			
 
				+# Test for changing mirror copy mode from background to active
			
 
				+#
			
 
				+# Copyright (C) 2023 Proxmox Server Solutions GmbH
			
 
				+#
			
 
				+# This program is free software; you can redistribute it and/or modify
			
 
				+# it under the terms of the GNU General Public License as published by
			
 
				+# the Free Software Foundation; either version 2 of the License, or
			
 
				+# (at your option) any later version.
			
 
				+#
			
 
				+# This program is distributed in the hope that it will be useful,
			
 
				+# but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+# GNU General Public License for more details.
			
 
				+#
			
 
				+# You should have received a copy of the GNU General Public License
			
 
				+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
			
 
				+#
			
 
				+
			
 
				+import os
			
 
				+import time
			
 
				+
			
 
				+import iotests
			
 
				+from iotests import qemu_img, QemuStorageDaemon
			
 
				+
			
 
				+iops_target = 8
			
 
				+iops_source = iops_target * 2
			
 
				+image_size = 1 * 1024 * 1024
			
 
				+source_img = os.path.join(iotests.test_dir, 'source.' + iotests.imgfmt)
			
 
				+target_img = os.path.join(iotests.test_dir, 'target.' + iotests.imgfmt)
			
 
				+nbd_sock = os.path.join(iotests.sock_dir, 'nbd.sock')
			
 
				+
			
 
				+class TestMirrorChangeCopyMode(iotests.QMPTestCase):
			
 
				+
			
 
				+    def setUp(self):
			
 
				+        qemu_img('create', '-f', iotests.imgfmt, source_img, str(image_size))
			
 
				+        qemu_img('create', '-f', iotests.imgfmt, target_img, str(image_size))
			
 
				+
			
 
				+        self.qsd = QemuStorageDaemon('--nbd-server',
			
 
				+                                     f'addr.type=unix,addr.path={nbd_sock}',
			
 
				+                                     qmp=True)
			
 
				+
			
 
				+        self.qsd.cmd('object-add', {
			
 
				+            'qom-type': 'throttle-group',
			
 
				+            'id': 'thrgr-target',
			
 
				+            'limits': {
			
 
				+                'iops-write': iops_target,
			
 
				+                'iops-write-max': iops_target
			
 
				+            }
			
 
				+        })
			
 
				+
			
 
				+        self.qsd.cmd('blockdev-add', {
			
 
				+            'node-name': 'target',
			
 
				+            'driver': 'throttle',
			
 
				+            'throttle-group': 'thrgr-target',
			
 
				+            'file': {
			
 
				+                'driver': iotests.imgfmt,
			
 
				+                'file': {
			
 
				+                    'driver': 'file',
			
 
				+                    'filename': target_img
			
 
				+                }
			
 
				+            }
			
 
				+        })
			
 
				+
			
 
				+        self.qsd.cmd('block-export-add', {
			
 
				+            'id': 'exp0',
			
 
				+            'type': 'nbd',
			
 
				+            'node-name': 'target',
			
 
				+            'writable': True
			
 
				+        })
			
 
				+
			
 
				+        self.vm = iotests.VM()
			
 
				+        self.vm.add_args('-drive',
			
 
				+                         f'file={source_img},if=none,format={iotests.imgfmt},'
			
 
				+                         f'iops_wr={iops_source},'
			
 
				+                         f'iops_wr_max={iops_source},'
			
 
				+                         'id=source')
			
 
				+        self.vm.launch()
			
 
				+
			
 
				+        self.vm.cmd('blockdev-add', {
			
 
				+            'node-name': 'target',
			
 
				+            'driver': 'nbd',
			
 
				+            'export': 'target',
			
 
				+            'server': {
			
 
				+                'type': 'unix',
			
 
				+                'path': nbd_sock
			
 
				+            }
			
 
				+        })
			
 
				+
			
 
				+
			
 
				+    def tearDown(self):
			
 
				+        self.vm.shutdown()
			
 
				+        self.qsd.stop()
			
 
				+        self.check_qemu_io_errors()
			
 
				+        self.check_images_identical()
			
 
				+        os.remove(source_img)
			
 
				+        os.remove(target_img)
			
 
				+
			
 
				+    # Once the VM is shut down we can parse the log and see if qemu-io ran
			
 
				+    # without errors.
			
 
				+    def check_qemu_io_errors(self):
			
 
				+        self.assertFalse(self.vm.is_running())
			
 
				+        log = self.vm.get_log()
			
 
				+        for line in log.split("\n"):
			
 
				+            assert not line.startswith("Pattern verification failed")
			
 
				+
			
 
				+    def check_images_identical(self):
			
 
				+        qemu_img('compare', '-f', iotests.imgfmt, source_img, target_img)
			
 
				+
			
 
				+    def start_mirror(self):
			
 
				+        self.vm.cmd('blockdev-mirror',
			
 
				+                    job_id='mirror',
			
 
				+                    device='source',
			
 
				+                    target='target',
			
 
				+                    filter_node_name='mirror-top',
			
 
				+                    sync='full',
			
 
				+                    copy_mode='background')
			
 
				+
			
 
				+    def test_background_to_active(self):
			
 
				+        self.vm.hmp_qemu_io('source', f'write 0 {image_size}')
			
 
				+        self.vm.hmp_qemu_io('target', f'write 0 {image_size}')
			
 
				+
			
 
				+        self.start_mirror()
			
 
				+
			
 
				+        result = self.vm.cmd('query-block-jobs')
			
 
				+        assert not result[0]['actively-synced']
			
 
				+
			
 
				+        self.vm.event_wait('BLOCK_JOB_READY')
			
 
				+
			
 
				+        result = self.vm.cmd('query-block-jobs')
			
 
				+        assert not result[0]['actively-synced']
			
 
				+
			
 
				+        # Start some background requests.
			
 
				+        reqs = 4 * iops_source
			
 
				+        req_size = image_size // reqs
			
 
				+        for i in range(0, reqs):
			
 
				+            req = f'aio_write -P 7 {req_size * i} {req_size}'
			
 
				+            self.vm.hmp_qemu_io('source', req)
			
 
				+
			
 
				+        # Wait for the first few requests.
			
 
				+        time.sleep(1)
			
 
				+        self.vm.qtest(f'clock_step {1 * 1000 * 1000 * 1000}')
			
 
				+
			
 
				+        result = self.vm.cmd('query-block-jobs')
			
 
				+        # There should've been new requests.
			
 
				+        assert result[0]['len'] > image_size
			
 
				+        # To verify later that not all requests were completed at this point.
			
 
				+        len_before_change = result[0]['len']
			
 
				+
			
 
				+        # Change the copy mode while requests are happening.
			
 
				+        self.vm.cmd('block-job-change',
			
 
				+                    id='mirror',
			
 
				+                    type='mirror',
			
 
				+                    copy_mode='write-blocking')
			
 
				+
			
 
				+        # Wait until image is actively synced.
			
 
				+        while True:
			
 
				+            time.sleep(0.1)
			
 
				+            self.vm.qtest(f'clock_step {100 * 1000 * 1000}')
			
 
				+            result = self.vm.cmd('query-block-jobs')
			
 
				+            if result[0]['actively-synced']:
			
 
				+                break
			
 
				+
			
 
				+        # Because of throttling, not all requests should have been completed
			
 
				+        # above.
			
 
				+        result = self.vm.cmd('query-block-jobs')
			
 
				+        assert result[0]['len'] > len_before_change
			
 
				+
			
 
				+        # Issue enough requests for a few seconds only touching the first half
			
 
				+        # of the image.
			
 
				+        reqs = 4 * iops_target
			
 
				+        req_size = image_size // 2 // reqs
			
 
				+        for i in range(0, reqs):
			
 
				+            req = f'aio_write -P 19 {req_size * i} {req_size}'
			
 
				+            self.vm.hmp_qemu_io('source', req)
			
 
				+
			
 
				+        # Now issue a synchronous write in the second half of the image and
			
 
				+        # immediately verify that it was written to the target too. This would
			
 
				+        # fail without switching the copy mode. Note that this only produces a
			
 
				+        # log line and the actual checking happens during tearDown().
			
 
				+        req_args = f'-P 37 {3 * (image_size // 4)} {req_size}'
			
 
				+        self.vm.hmp_qemu_io('source', f'write {req_args}')
			
 
				+        self.vm.hmp_qemu_io('target', f'read {req_args}')
			
 
				+
			
 
				+        self.vm.cmd('block-job-cancel', device='mirror')
			
 
				+        while len(self.vm.cmd('query-block-jobs')) > 0:
			
 
				+            time.sleep(0.1)
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    iotests.main(supported_fmts=['qcow2', 'raw'],
			
 
				+                 supported_protocols=['file'])
			
--- a/tests/qemu-iotests/tests/mirror-change-copy-mode.out
+++ b/tests/qemu-iotests/tests/mirror-change-copy-mode.out
@@ -0,0 +1,5 @@
 
				+.
			
 
				+----------------------------------------------------------------------
			
 
				+Ran 1 tests
			
 
				+
			
 
				+OK
			
--- a/util/defer-call.c
+++ b/util/defer-call.c
@@ -0,0 +1,156 @@
 
				+/* SPDX-License-Identifier: GPL-2.0-or-later */
			
 
				+/*
			
 
				+ * Deferred calls
			
 
				+ *
			
 
				+ * Copyright Red Hat.
			
 
				+ *
			
 
				+ * This API defers a function call within a defer_call_begin()/defer_call_end()
			
 
				+ * section, allowing multiple calls to batch up. This is a performance
			
 
				+ * optimization that is used in the block layer to submit several I/O requests
			
 
				+ * at once instead of individually:
			
 
				+ *
			
 
				+ *   defer_call_begin(); <-- start of section
			
 
				+ *   ...
			
 
				+ *   defer_call(my_func, my_obj); <-- deferred my_func(my_obj) call
			
 
				+ *   defer_call(my_func, my_obj); <-- another
			
 
				+ *   defer_call(my_func, my_obj); <-- another
			
 
				+ *   ...
			
 
				+ *   defer_call_end(); <-- end of section, my_func(my_obj) is called once
			
 
				+ */
			
 
				+
			
 
				+#include "qemu/osdep.h"
			
 
				+#include "qemu/coroutine-tls.h"
			
 
				+#include "qemu/notify.h"
			
 
				+#include "qemu/thread.h"
			
 
				+#include "qemu/defer-call.h"
			
 
				+
			
 
				+/* A function call that has been deferred until defer_call_end() */
			
 
				+typedef struct {
			
 
				+    void (*fn)(void *);
			
 
				+    void *opaque;
			
 
				+} DeferredCall;
			
 
				+
			
 
				+/* Per-thread state */
			
 
				+typedef struct {
			
 
				+    unsigned nesting_level;
			
 
				+    GArray *deferred_call_array;
			
 
				+} DeferCallThreadState;
			
 
				+
			
 
				+/* Use get_ptr_defer_call_thread_state() to fetch this thread-local value */
			
 
				+QEMU_DEFINE_STATIC_CO_TLS(DeferCallThreadState, defer_call_thread_state);
			
 
				+
			
 
				+/* Called at thread cleanup time */
			
 
				+static void defer_call_atexit(Notifier *n, void *value)
			
 
				+{
			
 
				+    DeferCallThreadState *thread_state = get_ptr_defer_call_thread_state();
			
 
				+    g_array_free(thread_state->deferred_call_array, TRUE);
			
 
				+}
			
 
				+
			
 
				+/* This won't involve coroutines, so use __thread */
			
 
				+static __thread Notifier defer_call_atexit_notifier;
			
 
				+
			
 
				+/**
			
 
				+ * defer_call:
			
 
				+ * @fn: a function pointer to be invoked
			
 
				+ * @opaque: a user-defined argument to @fn()
			
 
				+ *
			
 
				+ * Call @fn(@opaque) immediately if not within a
			
 
				+ * defer_call_begin()/defer_call_end() section.
			
 
				+ *
			
 
				+ * Otherwise defer the call until the end of the outermost
			
 
				+ * defer_call_begin()/defer_call_end() section in this thread. If the same
			
 
				+ * @fn/@opaque pair has already been deferred, it will only be called once upon
			
 
				+ * defer_call_end() so that accumulated calls are batched into a single call.
			
 
				+ *
			
 
				+ * The caller must ensure that @opaque is not freed before @fn() is invoked.
			
 
				+ */
			
 
				+void defer_call(void (*fn)(void *), void *opaque)
			
 
				+{
			
 
				+    DeferCallThreadState *thread_state = get_ptr_defer_call_thread_state();
			
 
				+
			
 
				+    /* Call immediately if we're not deferring calls */
			
 
				+    if (thread_state->nesting_level == 0) {
			
 
				+        fn(opaque);
			
 
				+        return;
			
 
				+    }
			
 
				+
			
 
				+    GArray *array = thread_state->deferred_call_array;
			
 
				+    if (!array) {
			
 
				+        array = g_array_new(FALSE, FALSE, sizeof(DeferredCall));
			
 
				+        thread_state->deferred_call_array = array;
			
 
				+        defer_call_atexit_notifier.notify = defer_call_atexit;
			
 
				+        qemu_thread_atexit_add(&defer_call_atexit_notifier);
			
 
				+    }
			
 
				+
			
 
				+    DeferredCall *fns = (DeferredCall *)array->data;
			
 
				+    DeferredCall new_fn = {
			
 
				+        .fn = fn,
			
 
				+        .opaque = opaque,
			
 
				+    };
			
 
				+
			
 
				+    /*
			
 
				+     * There won't be many, so do a linear search. If this becomes a bottleneck
			
 
				+     * then a binary search (glib 2.62+) or different data structure could be
			
 
				+     * used.
			
 
				+     */
			
 
				+    for (guint i = 0; i < array->len; i++) {
			
 
				+        if (memcmp(&fns[i], &new_fn, sizeof(new_fn)) == 0) {
			
 
				+            return; /* already exists */
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    g_array_append_val(array, new_fn);
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * defer_call_begin: Defer defer_call() functions until defer_call_end()
			
 
				+ *
			
 
				+ * defer_call_begin() and defer_call_end() are thread-local operations. The
			
 
				+ * caller must ensure that each defer_call_begin() has a matching
			
 
				+ * defer_call_end() in the same thread.
			
 
				+ *
			
 
				+ * Nesting is supported. defer_call() functions are only called at the
			
 
				+ * outermost defer_call_end().
			
 
				+ */
			
 
				+void defer_call_begin(void)
			
 
				+{
			
 
				+    DeferCallThreadState *thread_state = get_ptr_defer_call_thread_state();
			
 
				+
			
 
				+    assert(thread_state->nesting_level < UINT32_MAX);
			
 
				+
			
 
				+    thread_state->nesting_level++;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * defer_call_end: Run any pending defer_call() functions
			
 
				+ *
			
 
				+ * There must have been a matching defer_call_begin() call in the same thread
			
 
				+ * prior to this defer_call_end() call.
			
 
				+ */
			
 
				+void defer_call_end(void)
			
 
				+{
			
 
				+    DeferCallThreadState *thread_state = get_ptr_defer_call_thread_state();
			
 
				+
			
 
				+    assert(thread_state->nesting_level > 0);
			
 
				+
			
 
				+    if (--thread_state->nesting_level > 0) {
			
 
				+        return;
			
 
				+    }
			
 
				+
			
 
				+    GArray *array = thread_state->deferred_call_array;
			
 
				+    if (!array) {
			
 
				+        return;
			
 
				+    }
			
 
				+
			
 
				+    DeferredCall *fns = (DeferredCall *)array->data;
			
 
				+
			
 
				+    for (guint i = 0; i < array->len; i++) {
			
 
				+        fns[i].fn(fns[i].opaque);
			
 
				+    }
			
 
				+
			
 
				+    /*
			
 
				+     * This resets the array without freeing memory so that appending is cheap
			
 
				+     * in the future.
			
 
				+     */
			
 
				+    g_array_set_size(array, 0);
			
 
				+}
			
--- a/util/meson.build
+++ b/util/meson.build
@@ -28,6 +28,7 @@ util_ss.add(when: 'CONFIG_WIN32', if_true: pathcch)
 
				 if glib_has_gslice
			
 
				   util_ss.add(files('qtree.c'))
			
 
				 endif
			
 
				+util_ss.add(files('defer-call.c'))
			
 
				 util_ss.add(files('envlist.c', 'path.c', 'module.c'))
			
 
				 util_ss.add(files('host-utils.c'))
			
 
				 util_ss.add(files('bitmap.c', 'bitops.c'))
			
--- a/util/thread-pool.c
+++ b/util/thread-pool.c
@@ -15,6 +15,7 @@
 
				  * GNU GPL, version 2 or (at your option) any later version.
			
 
				  */
			
 
				 #include "qemu/osdep.h"
			
 
				+#include "qemu/defer-call.h"
			
 
				 #include "qemu/queue.h"
			
 
				 #include "qemu/thread.h"
			
 
				 #include "qemu/coroutine.h"
			
@@ -175,6 +176,8 @@ static void thread_pool_completion_bh(void *opaque)
 
				     ThreadPool *pool = opaque;
			
 
				     ThreadPoolElement *elem, *next;
			
 
				 
			
 
				+    defer_call_begin(); /* cb() may use defer_call() to coalesce work */
			
 
				+
			
 
				 restart:
			
 
				     QLIST_FOREACH_SAFE(elem, &pool->head, all, next) {
			
 
				         if (elem->state != THREAD_DONE) {
			
@@ -208,6 +211,8 @@ restart:
 
				             qemu_aio_unref(elem);
			
 
				         }
			
 
				     }
			
 
				+
			
 
				+    defer_call_end();
			
 
				 }
			
 
				 
			
 
				 static void thread_pool_cancel(BlockAIOCB *acb)