瀏覽代碼

Merge tag 'migration-20250310-pull-request' of https://gitlab.com/farosas/qemu into staging

Migration pull request

- Fix use-after-free in incoming migration
- Improve cpr migration blocker for volatile ram
- Fix RDMA migration

# -----BEGIN PGP SIGNATURE-----
#
# iQJEBAABCAAuFiEEqhtIsKIjJqWkw2TPx5jcdBvsMZ0FAmfPaCAQHGZhcm9zYXNA
# c3VzZS5kZQAKCRDHmNx0G+wxnQy9EADRp/6GaSzoqWgafU8DGM5Q69HyKiZ888DZ
# 7qXqJeH3c95nvOnIw2BMhUYX4t8kkAbUcWlr7L8KCjZT/6N/d1/Z5fimqymRkw4x
# +8kDyADv5FY0339aMLf3qBbIAQj/gvPvg8H+e+hXfokZqoYgLXZ0eqNAz8MjIcyN
# +A+waEBMLNvTgZyTQl2TbCvb+mbRial8u8C9BIoILhn/gNuoMX7lbt0tq41HZwe0
# l3v16jnXlsDvQUXp99bGySomRgkcYqdAt+HWHLje3frT/Ap8dGaUJKlpgJ8DXJiA
# fV1reKihJdj37q9GSG8cR02W+ATBesiecufV4TUPNQYQzTdxn3fOMwdc3Pck074D
# YAQxFT20OPou+NRxjYoHT/GqFUY36/2qBJpt7TY3ramdklHJhXpRyedK4rppTZNn
# pC3lnbpA/LHRmfD1Nh0CRmqZpbV+qW1BWEgMwk4qui46BxYWHxKHFpxAuwlJQmcw
# RxY8qPhIXQM03tiTgIddBNDZLoVqRoUP7YpzR7MMa1rz0T5inNFMcNGm72WpKODE
# rzpw4ezXO7+D4/QmMq3PoPfhFv3QFnH6jaGj8JkJM378KLvh4fQ0woXtDKFl4Tbq
# 1oBZ17WUv6aHr75b+KMyKJNLinvMu5WF5WoRYIt1lNXaqk7I494yvIjtRrimWZIS
# Z5Q0tpUmpw==
# =yEH0
# -----END PGP SIGNATURE-----
# gpg: Signature made Tue 11 Mar 2025 06:30:56 HKT
# gpg:                using RSA key AA1B48B0A22326A5A4C364CFC798DC741BEC319D
# gpg:                issuer "farosas@suse.de"
# gpg: Good signature from "Fabiano Rosas <farosas@suse.de>" [unknown]
# gpg:                 aka "Fabiano Almeida Rosas <fabiano.rosas@suse.com>" [unknown]
# gpg: WARNING: The key's User ID is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: AA1B 48B0 A223 26A5 A4C3  64CF C798 DC74 1BEC 319D

* tag 'migration-20250310-pull-request' of https://gitlab.com/farosas/qemu:
  migration: Prioritize RDMA in ram_save_target_page()
  migration: ram block cpr blockers
  migration: Fix UAF for incoming migration on MigrationState

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Stefan Hajnoczi 5 月之前
父節點
當前提交
825b96dbce
共有 6 個文件被更改,包括 115 次插入6 次删除
  1. 3 0
      include/exec/memory.h
  2. 1 0
      include/exec/ramblock.h
  3. 38 2
      migration/migration.c
  4. 5 4
      migration/ram.c
  5. 2 0
      migration/savevm.c
  6. 66 0
      system/physmem.c

+ 3 - 0
include/exec/memory.h

@@ -3203,6 +3203,9 @@ bool ram_block_discard_is_disabled(void);
  */
 bool ram_block_discard_is_required(void);
 
+void ram_block_add_cpr_blocker(RAMBlock *rb, Error **errp);
+void ram_block_del_cpr_blocker(RAMBlock *rb);
+
 #endif
 
 #endif

+ 1 - 0
include/exec/ramblock.h

@@ -39,6 +39,7 @@ struct RAMBlock {
     /* RCU-enabled, writes protected by the ramlist lock */
     QLIST_ENTRY(RAMBlock) next;
     QLIST_HEAD(, RAMBlockNotifier) ramblock_notifiers;
+    Error *cpr_blocker;
     int fd;
     uint64_t fd_offset;
     int guest_memfd;

+ 38 - 2
migration/migration.c

@@ -116,6 +116,27 @@ static void migration_downtime_start(MigrationState *s)
     s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
 }
 
+/*
+ * This is unfortunate: incoming migration actually needs the outgoing
+ * migration state (MigrationState) to be there too, e.g. to query
+ * capabilities, parameters, using locks, setup errors, etc.
+ *
+ * NOTE: when calling this, making sure current_migration exists and not
+ * been freed yet!  Otherwise trying to access the refcount is already
+ * an use-after-free itself..
+ *
+ * TODO: Move shared part of incoming / outgoing out into separate object.
+ * Then this is not needed.
+ */
+static void migrate_incoming_ref_outgoing_state(void)
+{
+    object_ref(migrate_get_current());
+}
+static void migrate_incoming_unref_outgoing_state(void)
+{
+    object_unref(migrate_get_current());
+}
+
 static void migration_downtime_end(MigrationState *s)
 {
     int64_t now = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
@@ -863,7 +884,7 @@ process_incoming_migration_co(void *opaque)
              * postcopy thread.
              */
             trace_process_incoming_migration_co_postcopy_end_main();
-            return;
+            goto out;
         }
         /* Else if something went wrong then just fall out of the normal exit */
     }
@@ -879,7 +900,8 @@ process_incoming_migration_co(void *opaque)
     }
 
     migration_bh_schedule(process_incoming_migration_bh, mis);
-    return;
+    goto out;
+
 fail:
     migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
                       MIGRATION_STATUS_FAILED);
@@ -896,6 +918,9 @@ fail:
 
         exit(EXIT_FAILURE);
     }
+out:
+    /* Pairs with the refcount taken in qmp_migrate_incoming() */
+    migrate_incoming_unref_outgoing_state();
 }
 
 /**
@@ -1901,6 +1926,17 @@ void qmp_migrate_incoming(const char *uri, bool has_channels,
         return;
     }
 
+    /*
+     * Making sure MigrationState is available until incoming migration
+     * completes.
+     *
+     * NOTE: QEMU _might_ leak this refcount in some failure paths, but
+     * that's OK.  This is the minimum change we need to at least making
+     * sure success case is clean on the refcount.  We can try harder to
+     * make it accurate for any kind of failures, but it might be an
+     * overkill and doesn't bring us much benefit.
+     */
+    migrate_incoming_ref_outgoing_state();
     once = false;
 }
 

+ 5 - 4
migration/ram.c

@@ -1964,6 +1964,11 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss)
     ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
     int res;
 
+    /* Hand over to RDMA first */
+    if (control_save_page(pss, offset, &res)) {
+        return res;
+    }
+
     if (!migrate_multifd()
         || migrate_zero_page_detection() == ZERO_PAGE_DETECTION_LEGACY) {
         if (save_zero_page(rs, pss, offset)) {
@@ -1976,10 +1981,6 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss)
         return ram_save_multifd_page(block, offset);
     }
 
-    if (control_save_page(pss, offset, &res)) {
-        return res;
-    }
-
     return ram_save_page(rs, pss);
 }
 

+ 2 - 0
migration/savevm.c

@@ -3514,12 +3514,14 @@ void vmstate_register_ram(MemoryRegion *mr, DeviceState *dev)
     qemu_ram_set_idstr(mr->ram_block,
                        memory_region_name(mr), dev);
     qemu_ram_set_migratable(mr->ram_block);
+    ram_block_add_cpr_blocker(mr->ram_block, &error_fatal);
 }
 
 void vmstate_unregister_ram(MemoryRegion *mr, DeviceState *dev)
 {
     qemu_ram_unset_idstr(mr->ram_block);
     qemu_ram_unset_migratable(mr->ram_block);
+    ram_block_del_cpr_blocker(mr->ram_block);
 }
 
 void vmstate_register_ram_global(MemoryRegion *mr)

+ 66 - 0
system/physmem.c

@@ -71,7 +71,10 @@
 
 #include "qemu/pmem.h"
 
+#include "qapi/qapi-types-migration.h"
+#include "migration/blocker.h"
 #include "migration/cpr.h"
+#include "migration/options.h"
 #include "migration/vmstate.h"
 
 #include "qemu/range.h"
@@ -1904,6 +1907,14 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
             qemu_mutex_unlock_ramlist();
             goto out_free;
         }
+
+        error_setg(&new_block->cpr_blocker,
+                   "Memory region %s uses guest_memfd, "
+                   "which is not supported with CPR.",
+                   memory_region_name(new_block->mr));
+        migrate_add_blocker_modes(&new_block->cpr_blocker, errp,
+                                  MIG_MODE_CPR_TRANSFER,
+                                  -1);
     }
 
     ram_size = (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS;
@@ -4095,3 +4106,58 @@ bool ram_block_discard_is_required(void)
     return qatomic_read(&ram_block_discard_required_cnt) ||
            qatomic_read(&ram_block_coordinated_discard_required_cnt);
 }
+
+/*
+ * Return true if ram is compatible with CPR.  Do not exclude rom,
+ * because the rom file could change in new QEMU.
+ */
+static bool ram_is_cpr_compatible(RAMBlock *rb)
+{
+    MemoryRegion *mr = rb->mr;
+
+    if (!mr || !memory_region_is_ram(mr)) {
+        return true;
+    }
+
+    /* Ram device is remapped in new QEMU */
+    if (memory_region_is_ram_device(mr)) {
+        return true;
+    }
+
+    /*
+     * A file descriptor is passed to new QEMU and remapped, or its backing
+     * file is reopened and mapped.  It must be shared to avoid COW.
+     */
+    if (rb->fd >= 0 && qemu_ram_is_shared(rb)) {
+        return true;
+    }
+
+    return false;
+}
+
+/*
+ * Add a blocker for each volatile ram block.  This function should only be
+ * called after we know that the block is migratable.  Non-migratable blocks
+ * are either re-created in new QEMU, or are handled specially, or are covered
+ * by a device-level CPR blocker.
+ */
+void ram_block_add_cpr_blocker(RAMBlock *rb, Error **errp)
+{
+    assert(qemu_ram_is_migratable(rb));
+
+    if (ram_is_cpr_compatible(rb)) {
+        return;
+    }
+
+    error_setg(&rb->cpr_blocker,
+               "Memory region %s is not compatible with CPR. share=on is "
+               "required for memory-backend objects, and aux-ram-share=on is "
+               "required.", memory_region_name(rb->mr));
+    migrate_add_blocker_modes(&rb->cpr_blocker, errp, MIG_MODE_CPR_TRANSFER,
+                              -1);
+}
+
+void ram_block_del_cpr_blocker(RAMBlock *rb)
+{
+    migrate_del_blocker(&rb->cpr_blocker);
+}