瀏覽代碼

Merge tag 'migration-20250129-pull-request' of https://gitlab.com/farosas/qemu into staging

Migration pull request

- Purge of ram_save_target_page_legacy
- Cleanups to postcopy, json writer, migration states
- New migration mode cpr-transfer
- Fix for a -Werror=maybe-uninitialized instance in savevm

# -----BEGIN PGP SIGNATURE-----
#
# iQJEBAABCAAuFiEEqhtIsKIjJqWkw2TPx5jcdBvsMZ0FAmeaT8EQHGZhcm9zYXNA
# c3VzZS5kZQAKCRDHmNx0G+wxndXrEACTT+rdoEvOsNs4nM2a67GjxUoQZVTAWn+8
# lYhhNZLA4E+qHwpHTDCwyfyvCe615r72+bF7QO1KTrYeXGJg4SPk5kbEhCDqqjEu
# SGqrlPwkC1x3WkTvb228iDddDQ8dccko3Sy6wAyz0o8dtp5p4iK+57qzB/84u94L
# y3zQ+owOo9OLnXgdfMpN99HGQSvPR7CbP/2L293IrMCuPDUo9XhI7ARNS/phbT3Z
# aDl10WGHKz1SJWOkPj137E6+xMKuCmOZDTufTcTaHfyliD04JRWgEZVnKJxKJDxd
# 9e+lzHvXuYfO7YO11fr7DttPRnLEfjipELVTxrudM92eZ95XwdL4+ggfBTGHt76P
# yFUrp7G8qsUjWd+DHPmoo6Gx71zPbE6v9J2NMn2/1k4WdPOYy7HTmDgCkRirRTvV
# irYkHtdSFFsj3c0g4P4mhOzXnvUkGXzgrjteM5hlLy3bjSeZz9VMZADjiGqFGVPb
# 6euPcLLa9oynkoP5UXmFd/9PjWcgnfIbQu2MVlIyWhjvTGZKSGecVZmH5pWTJuBV
# xCbab1jYprRFpUIAMo94rgvRQRosomS1+GjGndFkX5++dTTlFSqpDLSGcEnPSGRx
# o9n+IldNiqh2vjN1bj60pLfmrHN/F+hsGTsDJlW+kfeyBXBkGArg1rDjN5ae7GvD
# UZK0N+OG0g==
# =jwOI
# -----END PGP SIGNATURE-----
# gpg: Signature made Wed 29 Jan 2025 10:56:49 EST
# gpg:                using RSA key AA1B48B0A22326A5A4C364CFC798DC741BEC319D
# gpg:                issuer "farosas@suse.de"
# gpg: Good signature from "Fabiano Rosas <farosas@suse.de>" [unknown]
# gpg:                 aka "Fabiano Almeida Rosas <fabiano.rosas@suse.com>" [unknown]
# gpg: WARNING: The key's User ID is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: AA1B 48B0 A223 26A5 A4C3  64CF C798 DC74 1BEC 319D

* tag 'migration-20250129-pull-request' of https://gitlab.com/farosas/qemu: (42 commits)
  migration: refactor ram_save_target_page functions
  migration: Trivial cleanup on JSON writer of vmstate_save()
  migration: Merge precopy/postcopy on switchover start
  migration: Always set DEVICE state
  migration: Cleanup qemu_savevm_state_complete_precopy()
  migration: Unwrap qemu_savevm_state_complete_precopy() in postcopy
  migration: Notify COMPLETE once for postcopy
  migration: Take BQL slightly longer in postcopy_start()
  migration: Drop cached migration state in migration_maybe_pause()
  migration: Adjust locking in migration_maybe_pause()
  migration: Adjust postcopy bandwidth during switchover
  migration: Synchronize all CPU states only for non-iterable dump
  migration: Drop inactivate_disk param in qemu_savevm_state_complete*
  migration: Avoid two src-downtime-end tracepoints for postcopy
  migration: Optimize postcopy on downtime by avoiding JSON writer
  migration: Do not construct JSON description if suppressed
  migration: Remove postcopy implications in should_send_vmdesc()
  migration: cpr-transfer documentation
  migration-test: cpr-transfer
  tests/qtest: assert qmp connected
  ...

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Stefan Hajnoczi 6 月之前
父節點
當前提交
9736ee382e
共有 56 個文件被更改,包括 1712 次插入385 次删除
  1. 1 1
      backends/hostmem-epc.c
  2. 1 1
      backends/hostmem-file.c
  3. 10 4
      backends/hostmem-memfd.c
  4. 1 1
      backends/hostmem-ram.c
  5. 10 41
      backends/hostmem-shm.c
  6. 182 2
      docs/devel/migration/CPR.rst
  7. 22 0
      hw/core/machine.c
  8. 10 0
      include/exec/memory.h
  9. 9 4
      include/exec/ram_addr.h
  10. 1 0
      include/hw/boards.h
  11. 33 0
      include/migration/cpr.h
  12. 7 0
      include/migration/misc.h
  13. 9 0
      include/migration/vmstate.h
  14. 1 0
      include/qemu/osdep.h
  15. 6 2
      meson.build
  16. 71 0
      migration/cpr-transfer.c
  17. 224 0
      migration/cpr.c
  18. 2 0
      migration/meson.build
  19. 266 82
      migration/migration.c
  20. 3 2
      migration/migration.h
  21. 6 2
      migration/options.c
  22. 80 4
      migration/qemu-file.c
  23. 2 0
      migration/qemu-file.h
  24. 18 49
      migration/ram.c
  25. 45 71
      migration/savevm.c
  26. 3 3
      migration/savevm.h
  27. 12 1
      migration/trace-events
  28. 24 0
      migration/vmstate-types.c
  29. 4 2
      migration/vmstate.c
  30. 47 4
      qapi/migration.json
  31. 34 0
      qemu-options.hx
  32. 7 0
      stubs/vmstate.c
  33. 2 2
      system/memory.c
  34. 129 21
      system/physmem.c
  35. 1 0
      system/trace-events
  36. 40 3
      system/vl.c
  37. 1 0
      tests/qemu-iotests/194.out
  38. 1 0
      tests/qemu-iotests/203.out
  39. 2 0
      tests/qemu-iotests/234.out
  40. 1 0
      tests/qemu-iotests/262.out
  41. 1 0
      tests/qemu-iotests/280.out
  42. 2 1
      tests/qtest/libqos/libqos.c
  43. 68 35
      tests/qtest/libqtest.c
  44. 22 2
      tests/qtest/libqtest.h
  45. 62 0
      tests/qtest/migration/cpr-tests.c
  46. 67 13
      tests/qtest/migration/framework.c
  47. 11 0
      tests/qtest/migration/framework.h
  48. 44 9
      tests/qtest/migration/migration-qmp.c
  49. 6 4
      tests/qtest/migration/migration-qmp.h
  50. 15 8
      tests/qtest/migration/migration-util.c
  51. 8 1
      tests/qtest/migration/misc-tests.c
  52. 3 3
      tests/qtest/migration/precopy-tests.c
  53. 4 4
      tests/qtest/virtio-net-failover.c
  54. 13 3
      util/memfd.c
  55. 52 0
      util/oslib-posix.c
  56. 6 0
      util/oslib-win32.c

+ 1 - 1
backends/hostmem-epc.c

@@ -36,7 +36,7 @@ sgx_epc_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
 
 
     backend->aligned = true;
     backend->aligned = true;
     name = object_get_canonical_path(OBJECT(backend));
     name = object_get_canonical_path(OBJECT(backend));
-    ram_flags = (backend->share ? RAM_SHARED : 0) | RAM_PROTECTED;
+    ram_flags = (backend->share ? RAM_SHARED : RAM_PRIVATE) | RAM_PROTECTED;
     return memory_region_init_ram_from_fd(&backend->mr, OBJECT(backend), name,
     return memory_region_init_ram_from_fd(&backend->mr, OBJECT(backend), name,
                                           backend->size, ram_flags, fd, 0, errp);
                                           backend->size, ram_flags, fd, 0, errp);
 }
 }

+ 1 - 1
backends/hostmem-file.c

@@ -82,7 +82,7 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
 
 
     backend->aligned = true;
     backend->aligned = true;
     name = host_memory_backend_get_name(backend);
     name = host_memory_backend_get_name(backend);
-    ram_flags = backend->share ? RAM_SHARED : 0;
+    ram_flags = backend->share ? RAM_SHARED : RAM_PRIVATE;
     ram_flags |= fb->readonly ? RAM_READONLY_FD : 0;
     ram_flags |= fb->readonly ? RAM_READONLY_FD : 0;
     ram_flags |= fb->rom == ON_OFF_AUTO_ON ? RAM_READONLY : 0;
     ram_flags |= fb->rom == ON_OFF_AUTO_ON ? RAM_READONLY : 0;
     ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
     ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;

+ 10 - 4
backends/hostmem-memfd.c

@@ -17,6 +17,7 @@
 #include "qemu/module.h"
 #include "qemu/module.h"
 #include "qapi/error.h"
 #include "qapi/error.h"
 #include "qom/object.h"
 #include "qom/object.h"
+#include "migration/cpr.h"
 
 
 OBJECT_DECLARE_SIMPLE_TYPE(HostMemoryBackendMemfd, MEMORY_BACKEND_MEMFD)
 OBJECT_DECLARE_SIMPLE_TYPE(HostMemoryBackendMemfd, MEMORY_BACKEND_MEMFD)
 
 
@@ -33,15 +34,19 @@ static bool
 memfd_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
 memfd_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
 {
 {
     HostMemoryBackendMemfd *m = MEMORY_BACKEND_MEMFD(backend);
     HostMemoryBackendMemfd *m = MEMORY_BACKEND_MEMFD(backend);
-    g_autofree char *name = NULL;
+    g_autofree char *name = host_memory_backend_get_name(backend);
+    int fd = cpr_find_fd(name, 0);
     uint32_t ram_flags;
     uint32_t ram_flags;
-    int fd;
 
 
     if (!backend->size) {
     if (!backend->size) {
         error_setg(errp, "can't create backend with size 0");
         error_setg(errp, "can't create backend with size 0");
         return false;
         return false;
     }
     }
 
 
+    if (fd >= 0) {
+        goto have_fd;
+    }
+
     fd = qemu_memfd_create(TYPE_MEMORY_BACKEND_MEMFD, backend->size,
     fd = qemu_memfd_create(TYPE_MEMORY_BACKEND_MEMFD, backend->size,
                            m->hugetlb, m->hugetlbsize, m->seal ?
                            m->hugetlb, m->hugetlbsize, m->seal ?
                            F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL : 0,
                            F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL : 0,
@@ -49,10 +54,11 @@ memfd_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
     if (fd == -1) {
     if (fd == -1) {
         return false;
         return false;
     }
     }
+    cpr_save_fd(name, 0, fd);
 
 
+have_fd:
     backend->aligned = true;
     backend->aligned = true;
-    name = host_memory_backend_get_name(backend);
-    ram_flags = backend->share ? RAM_SHARED : 0;
+    ram_flags = backend->share ? RAM_SHARED : RAM_PRIVATE;
     ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
     ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
     ram_flags |= backend->guest_memfd ? RAM_GUEST_MEMFD : 0;
     ram_flags |= backend->guest_memfd ? RAM_GUEST_MEMFD : 0;
     return memory_region_init_ram_from_fd(&backend->mr, OBJECT(backend), name,
     return memory_region_init_ram_from_fd(&backend->mr, OBJECT(backend), name,

+ 1 - 1
backends/hostmem-ram.c

@@ -28,7 +28,7 @@ ram_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
     }
     }
 
 
     name = host_memory_backend_get_name(backend);
     name = host_memory_backend_get_name(backend);
-    ram_flags = backend->share ? RAM_SHARED : 0;
+    ram_flags = backend->share ? RAM_SHARED : RAM_PRIVATE;
     ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
     ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
     ram_flags |= backend->guest_memfd ? RAM_GUEST_MEMFD : 0;
     ram_flags |= backend->guest_memfd ? RAM_GUEST_MEMFD : 0;
     return memory_region_init_ram_flags_nomigrate(&backend->mr, OBJECT(backend),
     return memory_region_init_ram_flags_nomigrate(&backend->mr, OBJECT(backend),

+ 10 - 41
backends/hostmem-shm.c

@@ -13,6 +13,7 @@
 #include "qemu/osdep.h"
 #include "qemu/osdep.h"
 #include "system/hostmem.h"
 #include "system/hostmem.h"
 #include "qapi/error.h"
 #include "qapi/error.h"
+#include "migration/cpr.h"
 
 
 #define TYPE_MEMORY_BACKEND_SHM "memory-backend-shm"
 #define TYPE_MEMORY_BACKEND_SHM "memory-backend-shm"
 
 
@@ -25,11 +26,9 @@ struct HostMemoryBackendShm {
 static bool
 static bool
 shm_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
 shm_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
 {
 {
-    g_autoptr(GString) shm_name = g_string_new(NULL);
-    g_autofree char *backend_name = NULL;
+    g_autofree char *backend_name = host_memory_backend_get_name(backend);
     uint32_t ram_flags;
     uint32_t ram_flags;
-    int fd, oflag;
-    mode_t mode;
+    int fd = cpr_find_fd(backend_name, 0);
 
 
     if (!backend->size) {
     if (!backend->size) {
         error_setg(errp, "can't create shm backend with size 0");
         error_setg(errp, "can't create shm backend with size 0");
@@ -41,48 +40,18 @@ shm_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
         return false;
         return false;
     }
     }
 
 
-    /*
-     * Let's use `mode = 0` because we don't want other processes to open our
-     * memory unless we share the file descriptor with them.
-     */
-    mode = 0;
-    oflag = O_RDWR | O_CREAT | O_EXCL;
-    backend_name = host_memory_backend_get_name(backend);
-
-    /*
-     * Some operating systems allow creating anonymous POSIX shared memory
-     * objects (e.g. FreeBSD provides the SHM_ANON constant), but this is not
-     * defined by POSIX, so let's create a unique name.
-     *
-     * From Linux's shm_open(3) man-page:
-     *   For  portable  use,  a shared  memory  object should be identified
-     *   by a name of the form /somename;"
-     */
-    g_string_printf(shm_name, "/qemu-" FMT_pid "-shm-%s", getpid(),
-                    backend_name);
-
-    fd = shm_open(shm_name->str, oflag, mode);
-    if (fd < 0) {
-        error_setg_errno(errp, errno,
-                         "failed to create POSIX shared memory");
-        return false;
+    if (fd >= 0) {
+        goto have_fd;
     }
     }
 
 
-    /*
-     * We have the file descriptor, so we no longer need to expose the
-     * POSIX shared memory object. However it will remain allocated as long as
-     * there are file descriptors pointing to it.
-     */
-    shm_unlink(shm_name->str);
-
-    if (ftruncate(fd, backend->size) == -1) {
-        error_setg_errno(errp, errno,
-                         "failed to resize POSIX shared memory to %" PRIu64,
-                         backend->size);
-        close(fd);
+    fd = qemu_shm_alloc(backend->size, errp);
+    if (fd < 0) {
         return false;
         return false;
     }
     }
+    cpr_save_fd(backend_name, 0, fd);
 
 
+have_fd:
+    /* Let's do the same as memory-backend-ram,share=on would do. */
     ram_flags = RAM_SHARED;
     ram_flags = RAM_SHARED;
     ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
     ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
 
 

+ 182 - 2
docs/devel/migration/CPR.rst

@@ -5,7 +5,7 @@ CPR is the umbrella name for a set of migration modes in which the
 VM is migrated to a new QEMU instance on the same host.  It is
 VM is migrated to a new QEMU instance on the same host.  It is
 intended for use when the goal is to update host software components
 intended for use when the goal is to update host software components
 that run the VM, such as QEMU or even the host kernel.  At this time,
 that run the VM, such as QEMU or even the host kernel.  At this time,
-cpr-reboot is the only available mode.
+the cpr-reboot and cpr-transfer modes are available.
 
 
 Because QEMU is restarted on the same host, with access to the same
 Because QEMU is restarted on the same host, with access to the same
 local devices, CPR is allowed in certain cases where normal migration
 local devices, CPR is allowed in certain cases where normal migration
@@ -53,7 +53,7 @@ RAM is copied to the migration URI.
 Outgoing:
 Outgoing:
   * Set the migration mode parameter to ``cpr-reboot``.
   * Set the migration mode parameter to ``cpr-reboot``.
   * Set the ``x-ignore-shared`` capability if desired.
   * Set the ``x-ignore-shared`` capability if desired.
-  * Issue the ``migrate`` command.  It is recommended the the URI be a
+  * Issue the ``migrate`` command.  It is recommended the URI be a
     ``file`` type, but one can use other types such as ``exec``,
     ``file`` type, but one can use other types such as ``exec``,
     provided the command captures all the data from the outgoing side,
     provided the command captures all the data from the outgoing side,
     and provides all the data to the incoming side.
     and provides all the data to the incoming side.
@@ -145,3 +145,183 @@ Caveats
 
 
 cpr-reboot mode may not be used with postcopy, background-snapshot,
 cpr-reboot mode may not be used with postcopy, background-snapshot,
 or COLO.
 or COLO.
+
+cpr-transfer mode
+-----------------
+
+This mode allows the user to transfer a guest to a new QEMU instance
+on the same host with minimal guest pause time, by preserving guest
+RAM in place, albeit with new virtual addresses in new QEMU.  Devices
+and their pinned memory pages will also be preserved in a future QEMU
+release.
+
+The user starts new QEMU on the same host as old QEMU, with command-
+line arguments to create the same machine, plus the ``-incoming``
+option for the main migration channel, like normal live migration.
+In addition, the user adds a second -incoming option with channel
+type ``cpr``.  This CPR channel must support file descriptor transfer
+with SCM_RIGHTS, i.e. it must be a UNIX domain socket.
+
+To initiate CPR, the user issues a migrate command to old QEMU,
+adding a second migration channel of type ``cpr`` in the channels
+argument.  Old QEMU stops the VM, saves state to the migration
+channels, and enters the postmigrate state.  Execution resumes in
+new QEMU.
+
+New QEMU reads the CPR channel before opening a monitor, hence
+the CPR channel cannot be specified in the list of channels for a
+migrate-incoming command.  It may only be specified on the command
+line.
+
+Usage
+^^^^^
+
+Memory backend objects must have the ``share=on`` attribute.
+
+The VM must be started with the ``-machine aux-ram-share=on``
+option.  This causes implicit RAM blocks (those not described by
+a memory-backend object) to be allocated by mmap'ing a memfd.
+Examples include VGA and ROM.
+
+Outgoing:
+  * Set the migration mode parameter to ``cpr-transfer``.
+  * Issue the ``migrate`` command, containing a main channel and
+    a cpr channel.
+
+Incoming:
+  * Start new QEMU with two ``-incoming`` options.
+  * If the VM was running when the outgoing ``migrate`` command was
+    issued, then QEMU automatically resumes VM execution.
+
+Caveats
+^^^^^^^
+
+cpr-transfer mode may not be used with postcopy, background-snapshot,
+or COLO.
+
+memory-backend-epc is not supported.
+
+The main incoming migration channel address cannot be a file type.
+
+If the main incoming channel address is an inet socket, then the port
+cannot be 0 (meaning dynamically choose a port).
+
+When using ``-incoming defer``, you must issue the migrate command to
+old QEMU before issuing any monitor commands to new QEMU, because new
+QEMU blocks waiting to read from the cpr channel before starting its
+monitor, and old QEMU does not write to the channel until the migrate
+command is issued.  However, new QEMU does not open and read the
+main migration channel until you issue the migrate incoming command.
+
+Example 1: incoming channel
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+In these examples, we simply restart the same version of QEMU, but
+in a real scenario one would start new QEMU on the incoming side.
+Note that new QEMU does not print the monitor prompt until old QEMU
+has issued the migrate command.  The outgoing side uses QMP because
+HMP cannot specify a CPR channel.  Some QMP responses are omitted for
+brevity.
+
+::
+
+  Outgoing:                             Incoming:
+
+  # qemu-kvm -qmp stdio
+  -object memory-backend-file,id=ram0,size=4G,
+  mem-path=/dev/shm/ram0,share=on -m 4G
+  -machine memory-backend=ram0
+  -machine aux-ram-share=on
+  ...
+                                        # qemu-kvm -monitor stdio
+                                        -incoming tcp:0:44444
+                                        -incoming '{"channel-type": "cpr",
+                                          "addr": { "transport": "socket",
+                                          "type": "unix", "path": "cpr.sock"}}'
+                                        ...
+  {"execute":"qmp_capabilities"}
+
+  {"execute": "query-status"}
+  {"return": {"status": "running",
+              "running": true}}
+
+  {"execute":"migrate-set-parameters",
+   "arguments":{"mode":"cpr-transfer"}}
+
+  {"execute": "migrate", "arguments": { "channels": [
+    {"channel-type": "main",
+     "addr": { "transport": "socket", "type": "inet",
+               "host": "0", "port": "44444" }},
+    {"channel-type": "cpr",
+     "addr": { "transport": "socket", "type": "unix",
+               "path": "cpr.sock" }}]}}
+
+                                        QEMU 10.0.50 monitor
+                                        (qemu) info status
+                                        VM status: running
+
+  {"execute": "query-status"}
+  {"return": {"status": "postmigrate",
+              "running": false}}
+
+Example 2: incoming defer
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+This example uses ``-incoming defer`` to hot plug a device before
+accepting the main migration channel.  Again note you must issue the
+migrate command to old QEMU before you can issue any monitor
+commands to new QEMU.
+
+
+::
+
+  Outgoing:                             Incoming:
+
+  # qemu-kvm -monitor stdio
+  -object memory-backend-file,id=ram0,size=4G,
+  mem-path=/dev/shm/ram0,share=on -m 4G
+  -machine memory-backend=ram0
+  -machine aux-ram-share=on
+  ...
+                                        # qemu-kvm -monitor stdio
+                                        -incoming defer
+                                        -incoming '{"channel-type": "cpr",
+                                          "addr": { "transport": "socket",
+                                          "type": "unix", "path": "cpr.sock"}}'
+                                        ...
+  {"execute":"qmp_capabilities"}
+
+  {"execute": "device_add",
+   "arguments": {"driver": "pcie-root-port"}}
+
+  {"execute":"migrate-set-parameters",
+   "arguments":{"mode":"cpr-transfer"}}
+
+  {"execute": "migrate", "arguments": { "channels": [
+    {"channel-type": "main",
+     "addr": { "transport": "socket", "type": "inet",
+               "host": "0", "port": "44444" }},
+    {"channel-type": "cpr",
+     "addr": { "transport": "socket", "type": "unix",
+               "path": "cpr.sock" }}]}}
+
+                                        QEMU 10.0.50 monitor
+                                        (qemu) info status
+                                        VM status: paused (inmigrate)
+                                        (qemu) device_add pcie-root-port
+                                        (qemu) migrate_incoming tcp:0:44444
+                                        (qemu) info status
+                                        VM status: running
+
+  {"execute": "query-status"}
+  {"return": {"status": "postmigrate",
+              "running": false}}
+
+Futures
+^^^^^^^
+
+cpr-transfer mode is based on a capability to transfer open file
+descriptors from old to new QEMU.  In the future, descriptors for
+vfio, iommufd, vhost, and char devices could be transferred,
+preserving those devices and their kernel state without interruption,
+even if they do not explicitly support live migration.

+ 22 - 0
hw/core/machine.c

@@ -457,6 +457,22 @@ static void machine_set_mem_merge(Object *obj, bool value, Error **errp)
     ms->mem_merge = value;
     ms->mem_merge = value;
 }
 }
 
 
+#ifdef CONFIG_POSIX
+static bool machine_get_aux_ram_share(Object *obj, Error **errp)
+{
+    MachineState *ms = MACHINE(obj);
+
+    return ms->aux_ram_share;
+}
+
+static void machine_set_aux_ram_share(Object *obj, bool value, Error **errp)
+{
+    MachineState *ms = MACHINE(obj);
+
+    ms->aux_ram_share = value;
+}
+#endif
+
 static bool machine_get_usb(Object *obj, Error **errp)
 static bool machine_get_usb(Object *obj, Error **errp)
 {
 {
     MachineState *ms = MACHINE(obj);
     MachineState *ms = MACHINE(obj);
@@ -1162,6 +1178,12 @@ static void machine_class_init(ObjectClass *oc, void *data)
     object_class_property_set_description(oc, "mem-merge",
     object_class_property_set_description(oc, "mem-merge",
         "Enable/disable memory merge support");
         "Enable/disable memory merge support");
 
 
+#ifdef CONFIG_POSIX
+    object_class_property_add_bool(oc, "aux-ram-share",
+                                   machine_get_aux_ram_share,
+                                   machine_set_aux_ram_share);
+#endif
+
     object_class_property_add_bool(oc, "usb",
     object_class_property_add_bool(oc, "usb",
         machine_get_usb, machine_set_usb);
         machine_get_usb, machine_set_usb);
     object_class_property_set_description(oc, "usb",
     object_class_property_set_description(oc, "usb",

+ 10 - 0
include/exec/memory.h

@@ -246,6 +246,16 @@ typedef struct IOMMUTLBEvent {
 /* RAM can be private that has kvm guest memfd backend */
 /* RAM can be private that has kvm guest memfd backend */
 #define RAM_GUEST_MEMFD   (1 << 12)
 #define RAM_GUEST_MEMFD   (1 << 12)
 
 
+/*
+ * In RAMBlock creation functions, if MAP_SHARED is 0 in the flags parameter,
+ * the implementation may still create a shared mapping if other conditions
+ * require it.  Callers who specifically want a private mapping, eg objects
+ * specified by the user, must pass RAM_PRIVATE.
+ * After RAMBlock creation, MAP_SHARED in the block's flags indicates whether
+ * the block is shared or private, and MAP_PRIVATE is omitted.
+ */
+#define RAM_PRIVATE (1 << 13)
+
 static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn,
 static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn,
                                        IOMMUNotifierFlag flags,
                                        IOMMUNotifierFlag flags,
                                        hwaddr start, hwaddr end,
                                        hwaddr start, hwaddr end,

+ 9 - 4
include/exec/ram_addr.h

@@ -111,23 +111,30 @@ long qemu_maxrampagesize(void);
  *
  *
  * Parameters:
  * Parameters:
  *  @size: the size in bytes of the ram block
  *  @size: the size in bytes of the ram block
+ *  @max_size: the maximum size of the block after resizing
  *  @mr: the memory region where the ram block is
  *  @mr: the memory region where the ram block is
+ *  @resized: callback after calls to qemu_ram_resize
  *  @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM,
  *  @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM,
  *              RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY,
  *              RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY,
  *              RAM_READONLY_FD, RAM_GUEST_MEMFD
  *              RAM_READONLY_FD, RAM_GUEST_MEMFD
  *  @mem_path or @fd: specify the backing file or device
  *  @mem_path or @fd: specify the backing file or device
  *  @offset: Offset into target file
  *  @offset: Offset into target file
+ *  @grow: extend file if necessary (but an empty file is always extended).
  *  @errp: pointer to Error*, to store an error if it happens
  *  @errp: pointer to Error*, to store an error if it happens
  *
  *
  * Return:
  * Return:
  *  On success, return a pointer to the ram block.
  *  On success, return a pointer to the ram block.
  *  On failure, return NULL.
  *  On failure, return NULL.
  */
  */
+typedef void (*qemu_ram_resize_cb)(const char *, uint64_t length, void *host);
+
 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
                                    uint32_t ram_flags, const char *mem_path,
                                    uint32_t ram_flags, const char *mem_path,
                                    off_t offset, Error **errp);
                                    off_t offset, Error **errp);
-RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
+RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, ram_addr_t max_size,
+                                 qemu_ram_resize_cb resized, MemoryRegion *mr,
                                  uint32_t ram_flags, int fd, off_t offset,
                                  uint32_t ram_flags, int fd, off_t offset,
+                                 bool grow,
                                  Error **errp);
                                  Error **errp);
 
 
 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
@@ -135,9 +142,7 @@ RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
 RAMBlock *qemu_ram_alloc(ram_addr_t size, uint32_t ram_flags, MemoryRegion *mr,
 RAMBlock *qemu_ram_alloc(ram_addr_t size, uint32_t ram_flags, MemoryRegion *mr,
                          Error **errp);
                          Error **errp);
 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t max_size,
 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t max_size,
-                                    void (*resized)(const char*,
-                                                    uint64_t length,
-                                                    void *host),
+                                    qemu_ram_resize_cb resized,
                                     MemoryRegion *mr, Error **errp);
                                     MemoryRegion *mr, Error **errp);
 void qemu_ram_free(RAMBlock *block);
 void qemu_ram_free(RAMBlock *block);
 
 

+ 1 - 0
include/hw/boards.h

@@ -410,6 +410,7 @@ struct MachineState {
     bool enable_graphics;
     bool enable_graphics;
     ConfidentialGuestSupport *cgs;
     ConfidentialGuestSupport *cgs;
     HostMemoryBackend *memdev;
     HostMemoryBackend *memdev;
+    bool aux_ram_share;
     /*
     /*
      * convenience alias to ram_memdev_id backend memory region
      * convenience alias to ram_memdev_id backend memory region
      * or to numa container memory region
      * or to numa container memory region

+ 33 - 0
include/migration/cpr.h

@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021, 2024 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef MIGRATION_CPR_H
+#define MIGRATION_CPR_H
+
+#include "qapi/qapi-types-migration.h"
+
+#define MIG_MODE_NONE           -1
+
+#define QEMU_CPR_FILE_MAGIC     0x51435052
+#define QEMU_CPR_FILE_VERSION   0x00000001
+
+void cpr_save_fd(const char *name, int id, int fd);
+void cpr_delete_fd(const char *name, int id);
+int cpr_find_fd(const char *name, int id);
+
+MigMode cpr_get_incoming_mode(void);
+void cpr_set_incoming_mode(MigMode mode);
+
+int cpr_state_save(MigrationChannel *channel, Error **errp);
+int cpr_state_load(MigrationChannel *channel, Error **errp);
+void cpr_state_close(void);
+struct QIOChannel *cpr_state_ioc(void);
+
+QEMUFile *cpr_transfer_output(MigrationChannel *channel, Error **errp);
+QEMUFile *cpr_transfer_input(MigrationChannel *channel, Error **errp);
+
+#endif

+ 7 - 0
include/migration/misc.h

@@ -108,4 +108,11 @@ bool migration_in_bg_snapshot(void);
 bool migration_block_activate(Error **errp);
 bool migration_block_activate(Error **errp);
 bool migration_block_inactivate(void);
 bool migration_block_inactivate(void);
 
 
+/* True if @uri starts with a syntactically valid URI prefix */
+bool migrate_is_uri(const char *uri);
+
+/* Parse @uri and return @channel, returning true on success */
+bool migrate_uri_parse(const char *uri, MigrationChannel **channel,
+                       Error **errp);
+
 #endif
 #endif

+ 9 - 0
include/migration/vmstate.h

@@ -230,6 +230,7 @@ extern const VMStateInfo vmstate_info_uint8;
 extern const VMStateInfo vmstate_info_uint16;
 extern const VMStateInfo vmstate_info_uint16;
 extern const VMStateInfo vmstate_info_uint32;
 extern const VMStateInfo vmstate_info_uint32;
 extern const VMStateInfo vmstate_info_uint64;
 extern const VMStateInfo vmstate_info_uint64;
+extern const VMStateInfo vmstate_info_fd;
 
 
 /** Put this in the stream when migrating a null pointer.*/
 /** Put this in the stream when migrating a null pointer.*/
 #define VMS_NULLPTR_MARKER (0x30U) /* '0' */
 #define VMS_NULLPTR_MARKER (0x30U) /* '0' */
@@ -902,6 +903,9 @@ extern const VMStateInfo vmstate_info_qlist;
 #define VMSTATE_UINT64_V(_f, _s, _v)                                  \
 #define VMSTATE_UINT64_V(_f, _s, _v)                                  \
     VMSTATE_SINGLE(_f, _s, _v, vmstate_info_uint64, uint64_t)
     VMSTATE_SINGLE(_f, _s, _v, vmstate_info_uint64, uint64_t)
 
 
+#define VMSTATE_FD_V(_f, _s, _v)                                  \
+    VMSTATE_SINGLE(_f, _s, _v, vmstate_info_fd, int32_t)
+
 #ifdef CONFIG_LINUX
 #ifdef CONFIG_LINUX
 
 
 #define VMSTATE_U8_V(_f, _s, _v)                                   \
 #define VMSTATE_U8_V(_f, _s, _v)                                   \
@@ -936,6 +940,9 @@ extern const VMStateInfo vmstate_info_qlist;
 #define VMSTATE_UINT64(_f, _s)                                        \
 #define VMSTATE_UINT64(_f, _s)                                        \
     VMSTATE_UINT64_V(_f, _s, 0)
     VMSTATE_UINT64_V(_f, _s, 0)
 
 
+#define VMSTATE_FD(_f, _s)                                            \
+    VMSTATE_FD_V(_f, _s, 0)
+
 #ifdef CONFIG_LINUX
 #ifdef CONFIG_LINUX
 
 
 #define VMSTATE_U8(_f, _s)                                         \
 #define VMSTATE_U8(_f, _s)                                         \
@@ -1009,6 +1016,8 @@ extern const VMStateInfo vmstate_info_qlist;
 #define VMSTATE_UINT64_TEST(_f, _s, _t)                                  \
 #define VMSTATE_UINT64_TEST(_f, _s, _t)                                  \
     VMSTATE_SINGLE_TEST(_f, _s, _t, 0, vmstate_info_uint64, uint64_t)
     VMSTATE_SINGLE_TEST(_f, _s, _t, 0, vmstate_info_uint64, uint64_t)
 
 
+#define VMSTATE_FD_TEST(_f, _s, _t)                                            \
+    VMSTATE_SINGLE_TEST(_f, _s, _t, 0, vmstate_info_fd, int32_t)
 
 
 #define VMSTATE_TIMER_PTR_TEST(_f, _s, _test)                             \
 #define VMSTATE_TIMER_PTR_TEST(_f, _s, _test)                             \
     VMSTATE_POINTER_TEST(_f, _s, _test, vmstate_info_timer, QEMUTimer *)
     VMSTATE_POINTER_TEST(_f, _s, _test, vmstate_info_timer, QEMUTimer *)

+ 1 - 0
include/qemu/osdep.h

@@ -509,6 +509,7 @@ int qemu_daemon(int nochdir, int noclose);
 void *qemu_anon_ram_alloc(size_t size, uint64_t *align, bool shared,
 void *qemu_anon_ram_alloc(size_t size, uint64_t *align, bool shared,
                           bool noreserve);
                           bool noreserve);
 void qemu_anon_ram_free(void *ptr, size_t size);
 void qemu_anon_ram_free(void *ptr, size_t size);
+int qemu_shm_alloc(size_t size, Error **errp);
 
 
 #ifdef _WIN32
 #ifdef _WIN32
 #define HAVE_CHARDEV_SERIAL 1
 #define HAVE_CHARDEV_SERIAL 1

+ 6 - 2
meson.build

@@ -3696,9 +3696,13 @@ libqemuutil = static_library('qemuutil',
                              build_by_default: false,
                              build_by_default: false,
                              sources: util_ss.sources() + stub_ss.sources() + genh,
                              sources: util_ss.sources() + stub_ss.sources() + genh,
                              dependencies: [util_ss.dependencies(), libm, threads, glib, socket, malloc])
                              dependencies: [util_ss.dependencies(), libm, threads, glib, socket, malloc])
+qemuutil_deps = [event_loop_base]
+if host_os != 'windows'
+  qemuutil_deps += [rt]
+endif
 qemuutil = declare_dependency(link_with: libqemuutil,
 qemuutil = declare_dependency(link_with: libqemuutil,
                               sources: genh + version_res,
                               sources: genh + version_res,
-                              dependencies: [event_loop_base])
+                              dependencies: qemuutil_deps)
 
 
 if have_system or have_user
 if have_system or have_user
   decodetree = generator(find_program('scripts/decodetree.py'),
   decodetree = generator(find_program('scripts/decodetree.py'),
@@ -4357,7 +4361,7 @@ if have_tools
   subdir('contrib/elf2dmp')
   subdir('contrib/elf2dmp')
 
 
   executable('qemu-edid', files('qemu-edid.c', 'hw/display/edid-generate.c'),
   executable('qemu-edid', files('qemu-edid.c', 'hw/display/edid-generate.c'),
-             dependencies: qemuutil,
+             dependencies: [qemuutil, rt],
              install: true)
              install: true)
 
 
   if have_vhost_user
   if have_vhost_user

+ 71 - 0
migration/cpr-transfer.c

@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2022, 2024 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "io/channel-file.h"
+#include "io/channel-socket.h"
+#include "io/net-listener.h"
+#include "migration/cpr.h"
+#include "migration/migration.h"
+#include "migration/savevm.h"
+#include "migration/qemu-file.h"
+#include "migration/vmstate.h"
+#include "trace.h"
+
+QEMUFile *cpr_transfer_output(MigrationChannel *channel, Error **errp)
+{
+    MigrationAddress *addr = channel->addr;
+
+    if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET &&
+        addr->u.socket.type == SOCKET_ADDRESS_TYPE_UNIX) {
+
+        g_autoptr(QIOChannelSocket) sioc = qio_channel_socket_new();
+        QIOChannel *ioc = QIO_CHANNEL(sioc);
+        SocketAddress *saddr = &addr->u.socket;
+
+        if (qio_channel_socket_connect_sync(sioc, saddr, errp) < 0) {
+            return NULL;
+        }
+        trace_cpr_transfer_output(addr->u.socket.u.q_unix.path);
+        qio_channel_set_name(ioc, "cpr-out");
+        return qemu_file_new_output(ioc);
+
+    } else {
+        error_setg(errp, "bad cpr channel address; must be unix");
+        return NULL;
+    }
+}
+
+QEMUFile *cpr_transfer_input(MigrationChannel *channel, Error **errp)
+{
+    MigrationAddress *addr = channel->addr;
+
+    if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET &&
+        addr->u.socket.type == SOCKET_ADDRESS_TYPE_UNIX) {
+
+        g_autoptr(QIOChannelSocket) sioc = NULL;
+        SocketAddress *saddr = &addr->u.socket;
+        g_autoptr(QIONetListener) listener = qio_net_listener_new();
+        QIOChannel *ioc;
+
+        qio_net_listener_set_name(listener, "cpr-socket-listener");
+        if (qio_net_listener_open_sync(listener, saddr, 1, errp) < 0) {
+            return NULL;
+        }
+
+        sioc = qio_net_listener_wait_client(listener);
+        ioc = QIO_CHANNEL(sioc);
+        trace_cpr_transfer_input(addr->u.socket.u.q_unix.path);
+        qio_channel_set_name(ioc, "cpr-in");
+        return qemu_file_new_input(ioc);
+
+    } else {
+        error_setg(errp, "bad cpr channel socket type; must be unix");
+        return NULL;
+    }
+}

+ 224 - 0
migration/cpr.c

@@ -0,0 +1,224 @@
+/*
+ * Copyright (c) 2021-2024 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "migration/cpr.h"
+#include "migration/misc.h"
+#include "migration/options.h"
+#include "migration/qemu-file.h"
+#include "migration/savevm.h"
+#include "migration/vmstate.h"
+#include "system/runstate.h"
+#include "trace.h"
+
+/*************************************************************************/
+/* cpr state container for all information to be saved. */
+
+typedef QLIST_HEAD(CprFdList, CprFd) CprFdList;
+
+typedef struct CprState {
+    CprFdList fds;
+} CprState;
+
+static CprState cpr_state;
+
+/****************************************************************************/
+
+typedef struct CprFd {
+    char *name;
+    unsigned int namelen;
+    int id;
+    int fd;
+    QLIST_ENTRY(CprFd) next;
+} CprFd;
+
+static const VMStateDescription vmstate_cpr_fd = {
+    .name = "cpr fd",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(namelen, CprFd),
+        VMSTATE_VBUFFER_ALLOC_UINT32(name, CprFd, 0, NULL, namelen),
+        VMSTATE_INT32(id, CprFd),
+        VMSTATE_FD(fd, CprFd),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+void cpr_save_fd(const char *name, int id, int fd)
+{
+    CprFd *elem = g_new0(CprFd, 1);
+
+    trace_cpr_save_fd(name, id, fd);
+    elem->name = g_strdup(name);
+    elem->namelen = strlen(name) + 1;
+    elem->id = id;
+    elem->fd = fd;
+    QLIST_INSERT_HEAD(&cpr_state.fds, elem, next);
+}
+
+static CprFd *find_fd(CprFdList *head, const char *name, int id)
+{
+    CprFd *elem;
+
+    QLIST_FOREACH(elem, head, next) {
+        if (!strcmp(elem->name, name) && elem->id == id) {
+            return elem;
+        }
+    }
+    return NULL;
+}
+
+void cpr_delete_fd(const char *name, int id)
+{
+    CprFd *elem = find_fd(&cpr_state.fds, name, id);
+
+    if (elem) {
+        QLIST_REMOVE(elem, next);
+        g_free(elem->name);
+        g_free(elem);
+    }
+
+    trace_cpr_delete_fd(name, id);
+}
+
+int cpr_find_fd(const char *name, int id)
+{
+    CprFd *elem = find_fd(&cpr_state.fds, name, id);
+    int fd = elem ? elem->fd : -1;
+
+    trace_cpr_find_fd(name, id, fd);
+    return fd;
+}
+/*************************************************************************/
+#define CPR_STATE "CprState"
+
+static const VMStateDescription vmstate_cpr_state = {
+    .name = CPR_STATE,
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_QLIST_V(fds, CprState, 1, vmstate_cpr_fd, CprFd, next),
+        VMSTATE_END_OF_LIST()
+    }
+};
+/*************************************************************************/
+
+static QEMUFile *cpr_state_file;
+
+QIOChannel *cpr_state_ioc(void)
+{
+    return qemu_file_get_ioc(cpr_state_file);
+}
+
+static MigMode incoming_mode = MIG_MODE_NONE;
+
+MigMode cpr_get_incoming_mode(void)
+{
+    return incoming_mode;
+}
+
+void cpr_set_incoming_mode(MigMode mode)
+{
+    incoming_mode = mode;
+}
+
+int cpr_state_save(MigrationChannel *channel, Error **errp)
+{
+    int ret;
+    QEMUFile *f;
+    MigMode mode = migrate_mode();
+
+    trace_cpr_state_save(MigMode_str(mode));
+
+    if (mode == MIG_MODE_CPR_TRANSFER) {
+        f = cpr_transfer_output(channel, errp);
+    } else {
+        return 0;
+    }
+    if (!f) {
+        return -1;
+    }
+
+    qemu_put_be32(f, QEMU_CPR_FILE_MAGIC);
+    qemu_put_be32(f, QEMU_CPR_FILE_VERSION);
+
+    ret = vmstate_save_state(f, &vmstate_cpr_state, &cpr_state, 0);
+    if (ret) {
+        error_setg(errp, "vmstate_save_state error %d", ret);
+        qemu_fclose(f);
+        return ret;
+    }
+
+    /*
+     * Close the socket only partially so we can later detect when the other
+     * end closes by getting a HUP event.
+     */
+    qemu_fflush(f);
+    qio_channel_shutdown(qemu_file_get_ioc(f), QIO_CHANNEL_SHUTDOWN_WRITE,
+                         NULL);
+    cpr_state_file = f;
+    return 0;
+}
+
+int cpr_state_load(MigrationChannel *channel, Error **errp)
+{
+    int ret;
+    uint32_t v;
+    QEMUFile *f;
+    MigMode mode = 0;
+
+    if (channel) {
+        mode = MIG_MODE_CPR_TRANSFER;
+        cpr_set_incoming_mode(mode);
+        f = cpr_transfer_input(channel, errp);
+    } else {
+        return 0;
+    }
+    if (!f) {
+        return -1;
+    }
+
+    trace_cpr_state_load(MigMode_str(mode));
+
+    v = qemu_get_be32(f);
+    if (v != QEMU_CPR_FILE_MAGIC) {
+        error_setg(errp, "Not a migration stream (bad magic %x)", v);
+        qemu_fclose(f);
+        return -EINVAL;
+    }
+    v = qemu_get_be32(f);
+    if (v != QEMU_CPR_FILE_VERSION) {
+        error_setg(errp, "Unsupported migration stream version %d", v);
+        qemu_fclose(f);
+        return -ENOTSUP;
+    }
+
+    ret = vmstate_load_state(f, &vmstate_cpr_state, &cpr_state, 1);
+    if (ret) {
+        error_setg(errp, "vmstate_load_state error %d", ret);
+        qemu_fclose(f);
+        return ret;
+    }
+
+    /*
+     * Let the caller decide when to close the socket (and generate a HUP event
+     * for the sending side).
+     */
+    cpr_state_file = f;
+
+    return ret;
+}
+
+void cpr_state_close(void)
+{
+    if (cpr_state_file) {
+        qemu_fclose(cpr_state_file);
+        cpr_state_file = NULL;
+    }
+}

+ 2 - 0
migration/meson.build

@@ -14,6 +14,8 @@ system_ss.add(files(
   'block-active.c',
   'block-active.c',
   'channel.c',
   'channel.c',
   'channel-block.c',
   'channel-block.c',
+  'cpr.c',
+  'cpr-transfer.c',
   'cpu-throttle.c',
   'cpu-throttle.c',
   'dirtyrate.c',
   'dirtyrate.c',
   'exec.c',
   'exec.c',

+ 266 - 82
migration/migration.c

@@ -14,6 +14,7 @@
  */
  */
 
 
 #include "qemu/osdep.h"
 #include "qemu/osdep.h"
+#include "qemu/ctype.h"
 #include "qemu/cutils.h"
 #include "qemu/cutils.h"
 #include "qemu/error-report.h"
 #include "qemu/error-report.h"
 #include "qemu/main-loop.h"
 #include "qemu/main-loop.h"
@@ -27,6 +28,7 @@
 #include "system/cpu-throttle.h"
 #include "system/cpu-throttle.h"
 #include "rdma.h"
 #include "rdma.h"
 #include "ram.h"
 #include "ram.h"
+#include "migration/cpr.h"
 #include "migration/global_state.h"
 #include "migration/global_state.h"
 #include "migration/misc.h"
 #include "migration/misc.h"
 #include "migration.h"
 #include "migration.h"
@@ -75,6 +77,7 @@
 static NotifierWithReturnList migration_state_notifiers[] = {
 static NotifierWithReturnList migration_state_notifiers[] = {
     NOTIFIER_ELEM_INIT(migration_state_notifiers, MIG_MODE_NORMAL),
     NOTIFIER_ELEM_INIT(migration_state_notifiers, MIG_MODE_NORMAL),
     NOTIFIER_ELEM_INIT(migration_state_notifiers, MIG_MODE_CPR_REBOOT),
     NOTIFIER_ELEM_INIT(migration_state_notifiers, MIG_MODE_CPR_REBOOT),
+    NOTIFIER_ELEM_INIT(migration_state_notifiers, MIG_MODE_CPR_TRANSFER),
 };
 };
 
 
 /* Messages sent on the return path from destination to source */
 /* Messages sent on the return path from destination to source */
@@ -102,12 +105,11 @@ static MigrationIncomingState *current_incoming;
 static GSList *migration_blockers[MIG_MODE__MAX];
 static GSList *migration_blockers[MIG_MODE__MAX];
 
 
 static bool migration_object_check(MigrationState *ms, Error **errp);
 static bool migration_object_check(MigrationState *ms, Error **errp);
-static int migration_maybe_pause(MigrationState *s,
-                                 int *current_active_state,
-                                 int new_state);
+static bool migration_switchover_start(MigrationState *s, Error **errp);
 static void migrate_fd_cancel(MigrationState *s);
 static void migrate_fd_cancel(MigrationState *s);
 static bool close_return_path_on_source(MigrationState *s);
 static bool close_return_path_on_source(MigrationState *s);
 static void migration_completion_end(MigrationState *s);
 static void migration_completion_end(MigrationState *s);
+static void migrate_hup_delete(MigrationState *s);
 
 
 static void migration_downtime_start(MigrationState *s)
 static void migration_downtime_start(MigrationState *s)
 {
 {
@@ -125,9 +127,19 @@ static void migration_downtime_end(MigrationState *s)
      */
      */
     if (!s->downtime) {
     if (!s->downtime) {
         s->downtime = now - s->downtime_start;
         s->downtime = now - s->downtime_start;
+        trace_vmstate_downtime_checkpoint("src-downtime-end");
+    }
+}
+
+static void precopy_notify_complete(void)
+{
+    Error *local_err = NULL;
+
+    if (precopy_notify(PRECOPY_NOTIFY_COMPLETE, &local_err)) {
+        error_report_err(local_err);
     }
     }
 
 
-    trace_vmstate_downtime_checkpoint("src-downtime-end");
+    trace_migration_precopy_complete();
 }
 }
 
 
 static bool migration_needs_multiple_sockets(void)
 static bool migration_needs_multiple_sockets(void)
@@ -218,6 +230,12 @@ migration_channels_and_transport_compatible(MigrationAddress *addr,
         return false;
         return false;
     }
     }
 
 
+    if (migrate_mode() == MIG_MODE_CPR_TRANSFER &&
+        addr->transport == MIGRATION_ADDRESS_TYPE_FILE) {
+        error_setg(errp, "Migration requires streamable transport (eg unix)");
+        return false;
+    }
+
     return true;
     return true;
 }
 }
 
 
@@ -433,6 +451,7 @@ void migration_incoming_state_destroy(void)
         mis->postcopy_qemufile_dst = NULL;
         mis->postcopy_qemufile_dst = NULL;
     }
     }
 
 
+    cpr_set_incoming_mode(MIG_MODE_NONE);
     yank_unregister_instance(MIGRATION_YANK_INSTANCE);
     yank_unregister_instance(MIGRATION_YANK_INSTANCE);
 }
 }
 
 
@@ -586,6 +605,16 @@ void migrate_add_address(SocketAddress *address)
                       QAPI_CLONE(SocketAddress, address));
                       QAPI_CLONE(SocketAddress, address));
 }
 }
 
 
+bool migrate_is_uri(const char *uri)
+{
+    while (*uri && *uri != ':') {
+        if (!qemu_isalpha(*uri++)) {
+            return false;
+        }
+    }
+    return *uri == ':';
+}
+
 bool migrate_uri_parse(const char *uri, MigrationChannel **channel,
 bool migrate_uri_parse(const char *uri, MigrationChannel **channel,
                        Error **errp)
                        Error **errp)
 {
 {
@@ -683,7 +712,8 @@ static void qemu_start_incoming_migration(const char *uri, bool has_channels,
     if (channels) {
     if (channels) {
         /* To verify that Migrate channel list has only item */
         /* To verify that Migrate channel list has only item */
         if (channels->next) {
         if (channels->next) {
-            error_setg(errp, "Channel list has more than one entries");
+            error_setg(errp, "Channel list must have only one entry, "
+                             "for type 'main'");
             return;
             return;
         }
         }
         addr = channels->value->addr;
         addr = channels->value->addr;
@@ -734,6 +764,9 @@ static void qemu_start_incoming_migration(const char *uri, bool has_channels,
     } else {
     } else {
         error_setg(errp, "unknown migration protocol: %s", uri);
         error_setg(errp, "unknown migration protocol: %s", uri);
     }
     }
+
+    /* Close cpr socket to tell source that we are listening */
+    cpr_state_close();
 }
 }
 
 
 static void process_incoming_migration_bh(void *opaque)
 static void process_incoming_migration_bh(void *opaque)
@@ -1397,6 +1430,11 @@ void migrate_set_state(MigrationStatus *state, MigrationStatus old_state,
     }
     }
 }
 }
 
 
+static void migration_cleanup_json_writer(MigrationState *s)
+{
+    g_clear_pointer(&s->vmdesc, json_writer_free);
+}
+
 static void migrate_fd_cleanup(MigrationState *s)
 static void migrate_fd_cleanup(MigrationState *s)
 {
 {
     MigrationEventType type;
     MigrationEventType type;
@@ -1404,12 +1442,14 @@ static void migrate_fd_cleanup(MigrationState *s)
 
 
     trace_migrate_fd_cleanup();
     trace_migrate_fd_cleanup();
 
 
+    migration_cleanup_json_writer(s);
+
     g_free(s->hostname);
     g_free(s->hostname);
     s->hostname = NULL;
     s->hostname = NULL;
-    json_writer_free(s->vmdesc);
-    s->vmdesc = NULL;
 
 
     qemu_savevm_state_cleanup();
     qemu_savevm_state_cleanup();
+    cpr_state_close();
+    migrate_hup_delete(s);
 
 
     close_return_path_on_source(s);
     close_return_path_on_source(s);
 
 
@@ -1521,6 +1561,7 @@ static void migrate_fd_error(MigrationState *s, const Error *error)
 static void migrate_fd_cancel(MigrationState *s)
 static void migrate_fd_cancel(MigrationState *s)
 {
 {
     int old_state ;
     int old_state ;
+    bool setup = (s->state == MIGRATION_STATUS_SETUP);
 
 
     trace_migrate_fd_cancel();
     trace_migrate_fd_cancel();
 
 
@@ -1555,6 +1596,17 @@ static void migrate_fd_cancel(MigrationState *s)
             }
             }
         }
         }
     }
     }
+
+    /*
+     * If qmp_migrate_finish has not been called, then there is no path that
+     * will complete the cancellation.  Do it now.
+     */
+    if (setup && !s->to_dst_file) {
+        migrate_set_state(&s->state, MIGRATION_STATUS_CANCELLING,
+                          MIGRATION_STATUS_CANCELLED);
+        cpr_state_close();
+        migrate_hup_delete(s);
+    }
 }
 }
 
 
 void migration_add_notifier_mode(NotifierWithReturn *notify,
 void migration_add_notifier_mode(NotifierWithReturn *notify,
@@ -1652,7 +1704,9 @@ bool migration_thread_is_self(void)
 
 
 bool migrate_mode_is_cpr(MigrationState *s)
 bool migrate_mode_is_cpr(MigrationState *s)
 {
 {
-    return s->parameters.mode == MIG_MODE_CPR_REBOOT;
+    MigMode mode = s->parameters.mode;
+    return mode == MIG_MODE_CPR_REBOOT ||
+           mode == MIG_MODE_CPR_TRANSFER;
 }
 }
 
 
 int migrate_init(MigrationState *s, Error **errp)
 int migrate_init(MigrationState *s, Error **errp)
@@ -1681,7 +1735,10 @@ int migrate_init(MigrationState *s, Error **errp)
     s->migration_thread_running = false;
     s->migration_thread_running = false;
     error_free(s->error);
     error_free(s->error);
     s->error = NULL;
     s->error = NULL;
-    s->vmdesc = NULL;
+
+    if (should_send_vmdesc()) {
+        s->vmdesc = json_writer_new(false);
+    }
 
 
     migrate_set_state(&s->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP);
     migrate_set_state(&s->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP);
 
 
@@ -2033,6 +2090,40 @@ static bool migrate_prepare(MigrationState *s, bool resume, Error **errp)
     return true;
     return true;
 }
 }
 
 
+static void qmp_migrate_finish(MigrationAddress *addr, bool resume_requested,
+                               Error **errp);
+
+static void migrate_hup_add(MigrationState *s, QIOChannel *ioc, GSourceFunc cb,
+                            void *opaque)
+{
+        s->hup_source = qio_channel_create_watch(ioc, G_IO_HUP);
+        g_source_set_callback(s->hup_source, cb, opaque, NULL);
+        g_source_attach(s->hup_source, NULL);
+}
+
+static void migrate_hup_delete(MigrationState *s)
+{
+    if (s->hup_source) {
+        g_source_destroy(s->hup_source);
+        g_source_unref(s->hup_source);
+        s->hup_source = NULL;
+    }
+}
+
+static gboolean qmp_migrate_finish_cb(QIOChannel *channel,
+                                      GIOCondition cond,
+                                      void *opaque)
+{
+    MigrationAddress *addr = opaque;
+
+    qmp_migrate_finish(addr, false, NULL);
+
+    cpr_state_close();
+    migrate_hup_delete(migrate_get_current());
+    qapi_free_MigrationAddress(addr);
+    return G_SOURCE_REMOVE;
+}
+
 void qmp_migrate(const char *uri, bool has_channels,
 void qmp_migrate(const char *uri, bool has_channels,
                  MigrationChannelList *channels, bool has_detach, bool detach,
                  MigrationChannelList *channels, bool has_detach, bool detach,
                  bool has_resume, bool resume, Error **errp)
                  bool has_resume, bool resume, Error **errp)
@@ -2042,6 +2133,8 @@ void qmp_migrate(const char *uri, bool has_channels,
     MigrationState *s = migrate_get_current();
     MigrationState *s = migrate_get_current();
     g_autoptr(MigrationChannel) channel = NULL;
     g_autoptr(MigrationChannel) channel = NULL;
     MigrationAddress *addr = NULL;
     MigrationAddress *addr = NULL;
+    MigrationChannel *channelv[MIGRATION_CHANNEL_TYPE__MAX] = { NULL };
+    MigrationChannel *cpr_channel = NULL;
 
 
     /*
     /*
      * Having preliminary checks for uri and channel
      * Having preliminary checks for uri and channel
@@ -2052,12 +2145,22 @@ void qmp_migrate(const char *uri, bool has_channels,
     }
     }
 
 
     if (channels) {
     if (channels) {
-        /* To verify that Migrate channel list has only item */
-        if (channels->next) {
-            error_setg(errp, "Channel list has more than one entries");
+        for ( ; channels; channels = channels->next) {
+            MigrationChannelType type = channels->value->channel_type;
+
+            if (channelv[type]) {
+                error_setg(errp, "Channel list has more than one %s entry",
+                           MigrationChannelType_str(type));
+                return;
+            }
+            channelv[type] = channels->value;
+        }
+        cpr_channel = channelv[MIGRATION_CHANNEL_TYPE_CPR];
+        addr = channelv[MIGRATION_CHANNEL_TYPE_MAIN]->addr;
+        if (!addr) {
+            error_setg(errp, "Channel list has no main entry");
             return;
             return;
         }
         }
-        addr = channels->value->addr;
     }
     }
 
 
     if (uri) {
     if (uri) {
@@ -2073,12 +2176,52 @@ void qmp_migrate(const char *uri, bool has_channels,
         return;
         return;
     }
     }
 
 
+    if (s->parameters.mode == MIG_MODE_CPR_TRANSFER && !cpr_channel) {
+        error_setg(errp, "missing 'cpr' migration channel");
+        return;
+    }
+
     resume_requested = has_resume && resume;
     resume_requested = has_resume && resume;
     if (!migrate_prepare(s, resume_requested, errp)) {
     if (!migrate_prepare(s, resume_requested, errp)) {
         /* Error detected, put into errp */
         /* Error detected, put into errp */
         return;
         return;
     }
     }
 
 
+    if (cpr_state_save(cpr_channel, &local_err)) {
+        goto out;
+    }
+
+    /*
+     * For cpr-transfer, the target may not be listening yet on the migration
+     * channel, because first it must finish cpr_load_state.  The target tells
+     * us it is listening by closing the cpr-state socket.  Wait for that HUP
+     * event before connecting in qmp_migrate_finish.
+     *
+     * The HUP could occur because the target fails while reading CPR state,
+     * in which case the target will not listen for the incoming migration
+     * connection, so qmp_migrate_finish will fail to connect, and then recover.
+     */
+    if (s->parameters.mode == MIG_MODE_CPR_TRANSFER) {
+        migrate_hup_add(s, cpr_state_ioc(), (GSourceFunc)qmp_migrate_finish_cb,
+                        QAPI_CLONE(MigrationAddress, addr));
+
+    } else {
+        qmp_migrate_finish(addr, resume_requested, errp);
+    }
+
+out:
+    if (local_err) {
+        migrate_fd_error(s, local_err);
+        error_propagate(errp, local_err);
+    }
+}
+
+static void qmp_migrate_finish(MigrationAddress *addr, bool resume_requested,
+                               Error **errp)
+{
+    MigrationState *s = migrate_get_current();
+    Error *local_err = NULL;
+
     if (!resume_requested) {
     if (!resume_requested) {
         if (!yank_register_instance(MIGRATION_YANK_INSTANCE, errp)) {
         if (!yank_register_instance(MIGRATION_YANK_INSTANCE, errp)) {
             return;
             return;
@@ -2495,8 +2638,14 @@ static int postcopy_start(MigrationState *ms, Error **errp)
     int ret;
     int ret;
     QIOChannelBuffer *bioc;
     QIOChannelBuffer *bioc;
     QEMUFile *fb;
     QEMUFile *fb;
-    uint64_t bandwidth = migrate_max_postcopy_bandwidth();
-    int cur_state = MIGRATION_STATUS_ACTIVE;
+
+    /*
+     * Now we're 100% sure to switch to postcopy, so JSON writer won't be
+     * useful anymore.  Free the resources early if it is there.  Clearing
+     * the vmdesc also means any follow up vmstate_save()s will start to
+     * skip all JSON operations, which can shrink postcopy downtime.
+     */
+    migration_cleanup_json_writer(ms);
 
 
     if (migrate_postcopy_preempt()) {
     if (migrate_postcopy_preempt()) {
         migration_wait_main_channel(ms);
         migration_wait_main_channel(ms);
@@ -2508,11 +2657,6 @@ static int postcopy_start(MigrationState *ms, Error **errp)
         }
         }
     }
     }
 
 
-    if (!migrate_pause_before_switchover()) {
-        migrate_set_state(&ms->state, MIGRATION_STATUS_ACTIVE,
-                          MIGRATION_STATUS_POSTCOPY_ACTIVE);
-    }
-
     trace_postcopy_start();
     trace_postcopy_start();
     bql_lock();
     bql_lock();
     trace_postcopy_start_set_run();
     trace_postcopy_start_set_run();
@@ -2523,16 +2667,7 @@ static int postcopy_start(MigrationState *ms, Error **errp)
         goto fail;
         goto fail;
     }
     }
 
 
-    ret = migration_maybe_pause(ms, &cur_state,
-                                MIGRATION_STATUS_POSTCOPY_ACTIVE);
-    if (ret < 0) {
-        error_setg_errno(errp, -ret, "%s: Failed in migration_maybe_pause()",
-                         __func__);
-        goto fail;
-    }
-
-    if (!migration_block_inactivate()) {
-        error_setg(errp, "%s: Failed in bdrv_inactivate_all()", __func__);
+    if (!migration_switchover_start(ms, errp)) {
         goto fail;
         goto fail;
     }
     }
 
 
@@ -2540,7 +2675,11 @@ static int postcopy_start(MigrationState *ms, Error **errp)
      * Cause any non-postcopiable, but iterative devices to
      * Cause any non-postcopiable, but iterative devices to
      * send out their final data.
      * send out their final data.
      */
      */
-    qemu_savevm_state_complete_precopy(ms->to_dst_file, true, false);
+    ret = qemu_savevm_state_complete_precopy_iterable(ms->to_dst_file, true);
+    if (ret) {
+        error_setg(errp, "Postcopy save non-postcopiable iterables failed");
+        goto fail;
+    }
 
 
     /*
     /*
      * in Finish migrate and with the io-lock held everything should
      * in Finish migrate and with the io-lock held everything should
@@ -2552,12 +2691,6 @@ static int postcopy_start(MigrationState *ms, Error **errp)
         ram_postcopy_send_discard_bitmap(ms);
         ram_postcopy_send_discard_bitmap(ms);
     }
     }
 
 
-    /*
-     * send rest of state - note things that are doing postcopy
-     * will notice we're in POSTCOPY_ACTIVE and not actually
-     * wrap their state up here
-     */
-    migration_rate_set(bandwidth);
     if (migrate_postcopy_ram()) {
     if (migrate_postcopy_ram()) {
         /* Ping just for debugging, helps line traces up */
         /* Ping just for debugging, helps line traces up */
         qemu_savevm_send_ping(ms->to_dst_file, 2);
         qemu_savevm_send_ping(ms->to_dst_file, 2);
@@ -2585,7 +2718,12 @@ static int postcopy_start(MigrationState *ms, Error **errp)
      */
      */
     qemu_savevm_send_postcopy_listen(fb);
     qemu_savevm_send_postcopy_listen(fb);
 
 
-    qemu_savevm_state_complete_precopy(fb, false, false);
+    ret = qemu_savevm_state_complete_precopy_non_iterable(fb, true);
+    if (ret) {
+        error_setg(errp, "Postcopy save non-iterable device states failed");
+        goto fail_closefb;
+    }
+
     if (migrate_postcopy_ram()) {
     if (migrate_postcopy_ram()) {
         qemu_savevm_send_ping(fb, 3);
         qemu_savevm_send_ping(fb, 3);
     }
     }
@@ -2619,8 +2757,6 @@ static int postcopy_start(MigrationState *ms, Error **errp)
 
 
     migration_downtime_end(ms);
     migration_downtime_end(ms);
 
 
-    bql_unlock();
-
     if (migrate_postcopy_ram()) {
     if (migrate_postcopy_ram()) {
         /*
         /*
          * Although this ping is just for debug, it could potentially be
          * Although this ping is just for debug, it could potentially be
@@ -2636,11 +2772,22 @@ static int postcopy_start(MigrationState *ms, Error **errp)
     ret = qemu_file_get_error(ms->to_dst_file);
     ret = qemu_file_get_error(ms->to_dst_file);
     if (ret) {
     if (ret) {
         error_setg_errno(errp, -ret, "postcopy_start: Migration stream error");
         error_setg_errno(errp, -ret, "postcopy_start: Migration stream error");
-        bql_lock();
         goto fail;
         goto fail;
     }
     }
     trace_postcopy_preempt_enabled(migrate_postcopy_preempt());
     trace_postcopy_preempt_enabled(migrate_postcopy_preempt());
 
 
+    /*
+     * Now postcopy officially started, switch to postcopy bandwidth that
+     * user specified.
+     */
+    migration_rate_set(migrate_max_postcopy_bandwidth());
+
+    /* Now, switchover looks all fine, switching to postcopy-active */
+    migrate_set_state(&ms->state, MIGRATION_STATUS_DEVICE,
+                      MIGRATION_STATUS_POSTCOPY_ACTIVE);
+
+    bql_unlock();
+
     return ret;
     return ret;
 
 
 fail_closefb:
 fail_closefb:
@@ -2655,16 +2802,39 @@ fail:
 }
 }
 
 
 /**
 /**
- * migration_maybe_pause: Pause if required to by
- * migrate_pause_before_switchover called with the BQL locked
- * Returns: 0 on success
+ * @migration_switchover_prepare: Start VM switchover procedure
+ *
+ * @s: The migration state object pointer
+ *
+ * Prepares for the switchover, depending on "pause-before-switchover"
+ * capability.
+ *
+ * If cap set, state machine goes like:
+ *   [postcopy-]active -> pre-switchover -> device
+ *
+ * If cap not set:
+ *   [postcopy-]active -> device
+ *
+ * Returns: true on success, false on interruptions.
  */
  */
-static int migration_maybe_pause(MigrationState *s,
-                                 int *current_active_state,
-                                 int new_state)
+static bool migration_switchover_prepare(MigrationState *s)
 {
 {
+    /* Concurrent cancellation?  Quit */
+    if (s->state == MIGRATION_STATUS_CANCELLING) {
+        return false;
+    }
+
+    /*
+     * No matter precopy or postcopy, since we still hold BQL it must not
+     * change concurrently to CANCELLING, so it must be either ACTIVE or
+     * POSTCOPY_ACTIVE.
+     */
+    assert(migration_is_active());
+
+    /* If the pre stage not requested, directly switch to DEVICE */
     if (!migrate_pause_before_switchover()) {
     if (!migrate_pause_before_switchover()) {
-        return 0;
+        migrate_set_state(&s->state, s->state, MIGRATION_STATUS_DEVICE);
+        return true;
     }
     }
 
 
     /* Since leaving this state is not atomic with posting the semaphore
     /* Since leaving this state is not atomic with posting the semaphore
@@ -2677,28 +2847,53 @@ static int migration_maybe_pause(MigrationState *s,
         /* This block intentionally left blank */
         /* This block intentionally left blank */
     }
     }
 
 
+    /* Update [POSTCOPY_]ACTIVE to PRE_SWITCHOVER */
+    migrate_set_state(&s->state, s->state, MIGRATION_STATUS_PRE_SWITCHOVER);
+    bql_unlock();
+
+    qemu_sem_wait(&s->pause_sem);
+
+    bql_lock();
     /*
     /*
-     * If the migration is cancelled when it is in the completion phase,
-     * the migration state is set to MIGRATION_STATUS_CANCELLING.
-     * So we don't need to wait a semaphore, otherwise we would always
-     * wait for the 'pause_sem' semaphore.
+     * After BQL released and retaken, the state can be CANCELLING if it
+     * happend during sem_wait().. Only change the state if it's still
+     * pre-switchover.
      */
      */
-    if (s->state != MIGRATION_STATUS_CANCELLING) {
-        bql_unlock();
-        migrate_set_state(&s->state, *current_active_state,
-                          MIGRATION_STATUS_PRE_SWITCHOVER);
-        qemu_sem_wait(&s->pause_sem);
-        migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER,
-                          new_state);
-        *current_active_state = new_state;
-        bql_lock();
+    migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER,
+                      MIGRATION_STATUS_DEVICE);
+
+    return s->state == MIGRATION_STATUS_DEVICE;
+}
+
+static bool migration_switchover_start(MigrationState *s, Error **errp)
+{
+    ERRP_GUARD();
+
+    if (!migration_switchover_prepare(s)) {
+        error_setg(errp, "Switchover is interrupted");
+        return false;
     }
     }
 
 
-    return s->state == new_state ? 0 : -EINVAL;
+    /* Inactivate disks except in COLO */
+    if (!migrate_colo()) {
+        /*
+         * Inactivate before sending QEMU_VM_EOF so that the
+         * bdrv_activate_all() on the other end won't fail.
+         */
+        if (!migration_block_inactivate()) {
+            error_setg(errp, "Block inactivate failed during switchover");
+            return false;
+        }
+    }
+
+    migration_rate_set(RATE_LIMIT_DISABLED);
+
+    precopy_notify_complete();
+
+    return true;
 }
 }
 
 
-static int migration_completion_precopy(MigrationState *s,
-                                        int *current_active_state)
+static int migration_completion_precopy(MigrationState *s)
 {
 {
     int ret;
     int ret;
 
 
@@ -2711,17 +2906,12 @@ static int migration_completion_precopy(MigrationState *s,
         }
         }
     }
     }
 
 
-    ret = migration_maybe_pause(s, current_active_state,
-                                MIGRATION_STATUS_DEVICE);
-    if (ret < 0) {
+    if (!migration_switchover_start(s, NULL)) {
+        ret = -EFAULT;
         goto out_unlock;
         goto out_unlock;
     }
     }
 
 
-    migration_rate_set(RATE_LIMIT_DISABLED);
-
-    /* Inactivate disks except in COLO */
-    ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false,
-                                             !migrate_colo());
+    ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false);
 out_unlock:
 out_unlock:
     bql_unlock();
     bql_unlock();
     return ret;
     return ret;
@@ -2755,11 +2945,10 @@ static void migration_completion_postcopy(MigrationState *s)
 static void migration_completion(MigrationState *s)
 static void migration_completion(MigrationState *s)
 {
 {
     int ret = 0;
     int ret = 0;
-    int current_active_state = s->state;
     Error *local_err = NULL;
     Error *local_err = NULL;
 
 
     if (s->state == MIGRATION_STATUS_ACTIVE) {
     if (s->state == MIGRATION_STATUS_ACTIVE) {
-        ret = migration_completion_precopy(s, &current_active_state);
+        ret = migration_completion_precopy(s);
     } else if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
     } else if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
         migration_completion_postcopy(s);
         migration_completion_postcopy(s);
     } else {
     } else {
@@ -2799,8 +2988,7 @@ fail:
         error_free(local_err);
         error_free(local_err);
     }
     }
 
 
-    migrate_set_state(&s->state, current_active_state,
-                      MIGRATION_STATUS_FAILED);
+    migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED);
 }
 }
 
 
 /**
 /**
@@ -3597,12 +3785,8 @@ static void *bg_migration_thread(void *opaque)
     if (migration_stop_vm(s, RUN_STATE_PAUSED)) {
     if (migration_stop_vm(s, RUN_STATE_PAUSED)) {
         goto fail;
         goto fail;
     }
     }
-    /*
-     * Put vCPUs in sync with shadow context structures, then
-     * save their state to channel-buffer along with devices.
-     */
-    cpu_synchronize_all_states();
-    if (qemu_savevm_state_complete_precopy_non_iterable(fb, false, false)) {
+
+    if (qemu_savevm_state_complete_precopy_non_iterable(fb, false)) {
         goto fail;
         goto fail;
     }
     }
     /*
     /*

+ 3 - 2
migration/migration.h

@@ -468,6 +468,8 @@ struct MigrationState {
     bool switchover_acked;
     bool switchover_acked;
     /* Is this a rdma migration */
     /* Is this a rdma migration */
     bool rdma_migration;
     bool rdma_migration;
+
+    GSource *hup_source;
 };
 };
 
 
 void migrate_set_state(MigrationStatus *state, MigrationStatus old_state,
 void migrate_set_state(MigrationStatus *state, MigrationStatus old_state,
@@ -519,8 +521,6 @@ bool check_dirty_bitmap_mig_alias_map(const BitmapMigrationNodeAliasList *bbm,
                                       Error **errp);
                                       Error **errp);
 
 
 void migrate_add_address(SocketAddress *address);
 void migrate_add_address(SocketAddress *address);
-bool migrate_uri_parse(const char *uri, MigrationChannel **channel,
-                       Error **errp);
 int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque);
 int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque);
 
 
 #define qemu_ram_foreach_block \
 #define qemu_ram_foreach_block \
@@ -552,6 +552,7 @@ void migration_bitmap_sync_precopy(bool last_stage);
 
 
 /* migration/block-dirty-bitmap.c */
 /* migration/block-dirty-bitmap.c */
 void dirty_bitmap_mig_init(void);
 void dirty_bitmap_mig_init(void);
+bool should_send_vmdesc(void);
 
 
 /* migration/block-active.c */
 /* migration/block-active.c */
 void migration_block_active_setup(bool active);
 void migration_block_active_setup(bool active);

+ 6 - 2
migration/options.c

@@ -22,6 +22,7 @@
 #include "qapi/qmp/qnull.h"
 #include "qapi/qmp/qnull.h"
 #include "system/runstate.h"
 #include "system/runstate.h"
 #include "migration/colo.h"
 #include "migration/colo.h"
+#include "migration/cpr.h"
 #include "migration/misc.h"
 #include "migration/misc.h"
 #include "migration.h"
 #include "migration.h"
 #include "migration-stats.h"
 #include "migration-stats.h"
@@ -745,8 +746,11 @@ uint64_t migrate_max_postcopy_bandwidth(void)
 
 
 MigMode migrate_mode(void)
 MigMode migrate_mode(void)
 {
 {
-    MigrationState *s = migrate_get_current();
-    MigMode mode = s->parameters.mode;
+    MigMode mode = cpr_get_incoming_mode();
+
+    if (mode == MIG_MODE_NONE) {
+        mode = migrate_get_current()->parameters.mode;
+    }
 
 
     assert(mode >= 0 && mode < MIG_MODE__MAX);
     assert(mode >= 0 && mode < MIG_MODE__MAX);
     return mode;
     return mode;

+ 80 - 4
migration/qemu-file.c

@@ -37,6 +37,11 @@
 #define IO_BUF_SIZE 32768
 #define IO_BUF_SIZE 32768
 #define MAX_IOV_SIZE MIN_CONST(IOV_MAX, 64)
 #define MAX_IOV_SIZE MIN_CONST(IOV_MAX, 64)
 
 
+typedef struct FdEntry {
+    QTAILQ_ENTRY(FdEntry) entry;
+    int fd;
+} FdEntry;
+
 struct QEMUFile {
 struct QEMUFile {
     QIOChannel *ioc;
     QIOChannel *ioc;
     bool is_writable;
     bool is_writable;
@@ -51,6 +56,9 @@ struct QEMUFile {
 
 
     int last_error;
     int last_error;
     Error *last_error_obj;
     Error *last_error_obj;
+
+    bool can_pass_fd;
+    QTAILQ_HEAD(, FdEntry) fds;
 };
 };
 
 
 /*
 /*
@@ -109,6 +117,8 @@ static QEMUFile *qemu_file_new_impl(QIOChannel *ioc, bool is_writable)
     object_ref(ioc);
     object_ref(ioc);
     f->ioc = ioc;
     f->ioc = ioc;
     f->is_writable = is_writable;
     f->is_writable = is_writable;
+    f->can_pass_fd = qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_FD_PASS);
+    QTAILQ_INIT(&f->fds);
 
 
     return f;
     return f;
 }
 }
@@ -310,6 +320,10 @@ static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f)
     int len;
     int len;
     int pending;
     int pending;
     Error *local_error = NULL;
     Error *local_error = NULL;
+    g_autofree int *fds = NULL;
+    size_t nfd = 0;
+    int **pfds = f->can_pass_fd ? &fds : NULL;
+    size_t *pnfd = f->can_pass_fd ? &nfd : NULL;
 
 
     assert(!qemu_file_is_writable(f));
     assert(!qemu_file_is_writable(f));
 
 
@@ -325,10 +339,9 @@ static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f)
     }
     }
 
 
     do {
     do {
-        len = qio_channel_read(f->ioc,
-                               (char *)f->buf + pending,
-                               IO_BUF_SIZE - pending,
-                               &local_error);
+        struct iovec iov = { f->buf + pending, IO_BUF_SIZE - pending };
+        len = qio_channel_readv_full(f->ioc, &iov, 1, pfds, pnfd, 0,
+                                     &local_error);
         if (len == QIO_CHANNEL_ERR_BLOCK) {
         if (len == QIO_CHANNEL_ERR_BLOCK) {
             if (qemu_in_coroutine()) {
             if (qemu_in_coroutine()) {
                 qio_channel_yield(f->ioc, G_IO_IN);
                 qio_channel_yield(f->ioc, G_IO_IN);
@@ -348,9 +361,66 @@ static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f)
         qemu_file_set_error_obj(f, len, local_error);
         qemu_file_set_error_obj(f, len, local_error);
     }
     }
 
 
+    for (int i = 0; i < nfd; i++) {
+        FdEntry *fde = g_new0(FdEntry, 1);
+        fde->fd = fds[i];
+        QTAILQ_INSERT_TAIL(&f->fds, fde, entry);
+    }
+
     return len;
     return len;
 }
 }
 
 
+int qemu_file_put_fd(QEMUFile *f, int fd)
+{
+    int ret = 0;
+    QIOChannel *ioc = qemu_file_get_ioc(f);
+    Error *err = NULL;
+    struct iovec iov = { (void *)" ", 1 };
+
+    /*
+     * Send a dummy byte so qemu_fill_buffer on the receiving side does not
+     * fail with a len=0 error.  Flush first to maintain ordering wrt other
+     * data.
+     */
+
+    qemu_fflush(f);
+    if (qio_channel_writev_full(ioc, &iov, 1, &fd, 1, 0, &err) < 1) {
+        error_report_err(error_copy(err));
+        qemu_file_set_error_obj(f, -EIO, err);
+        ret = -1;
+    }
+    trace_qemu_file_put_fd(f->ioc->name, fd, ret);
+    return ret;
+}
+
+int qemu_file_get_fd(QEMUFile *f)
+{
+    int fd = -1;
+    FdEntry *fde;
+
+    if (!f->can_pass_fd) {
+        Error *err = NULL;
+        error_setg(&err, "%s does not support fd passing", f->ioc->name);
+        error_report_err(error_copy(err));
+        qemu_file_set_error_obj(f, -EIO, err);
+        goto out;
+    }
+
+    /* Force the dummy byte and its fd passenger to appear. */
+    qemu_peek_byte(f, 0);
+
+    fde = QTAILQ_FIRST(&f->fds);
+    if (fde) {
+        qemu_get_byte(f);       /* Drop the dummy byte */
+        fd = fde->fd;
+        QTAILQ_REMOVE(&f->fds, fde, entry);
+        g_free(fde);
+    }
+out:
+    trace_qemu_file_get_fd(f->ioc->name, fd);
+    return fd;
+}
+
 /** Closes the file
 /** Closes the file
  *
  *
  * Returns negative error value if any error happened on previous operations or
  * Returns negative error value if any error happened on previous operations or
@@ -361,11 +431,17 @@ static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f)
  */
  */
 int qemu_fclose(QEMUFile *f)
 int qemu_fclose(QEMUFile *f)
 {
 {
+    FdEntry *fde, *next;
     int ret = qemu_fflush(f);
     int ret = qemu_fflush(f);
     int ret2 = qio_channel_close(f->ioc, NULL);
     int ret2 = qio_channel_close(f->ioc, NULL);
     if (ret >= 0) {
     if (ret >= 0) {
         ret = ret2;
         ret = ret2;
     }
     }
+    QTAILQ_FOREACH_SAFE(fde, &f->fds, entry, next) {
+        warn_report("qemu_fclose: received fd %d was never claimed", fde->fd);
+        close(fde->fd);
+        g_free(fde);
+    }
     g_clear_pointer(&f->ioc, object_unref);
     g_clear_pointer(&f->ioc, object_unref);
     error_free(f->last_error_obj);
     error_free(f->last_error_obj);
     g_free(f);
     g_free(f);

+ 2 - 0
migration/qemu-file.h

@@ -79,5 +79,7 @@ size_t qemu_get_buffer_at(QEMUFile *f, const uint8_t *buf, size_t buflen,
                           off_t pos);
                           off_t pos);
 
 
 QIOChannel *qemu_file_get_ioc(QEMUFile *file);
 QIOChannel *qemu_file_get_ioc(QEMUFile *file);
+int qemu_file_put_fd(QEMUFile *f, int fd);
+int qemu_file_get_fd(QEMUFile *f);
 
 
 #endif
 #endif

+ 18 - 49
migration/ram.c

@@ -195,7 +195,9 @@ static bool postcopy_preempt_active(void)
 
 
 bool migrate_ram_is_ignored(RAMBlock *block)
 bool migrate_ram_is_ignored(RAMBlock *block)
 {
 {
+    MigMode mode = migrate_mode();
     return !qemu_ram_is_migratable(block) ||
     return !qemu_ram_is_migratable(block) ||
+           mode == MIG_MODE_CPR_TRANSFER ||
            (migrate_ignore_shared() && qemu_ram_is_shared(block)
            (migrate_ignore_shared() && qemu_ram_is_shared(block)
                                     && qemu_ram_is_named_file(block));
                                     && qemu_ram_is_named_file(block));
 }
 }
@@ -446,13 +448,6 @@ void ram_transferred_add(uint64_t bytes)
     }
     }
 }
 }
 
 
-struct MigrationOps {
-    int (*ram_save_target_page)(RAMState *rs, PageSearchStatus *pss);
-};
-typedef struct MigrationOps MigrationOps;
-
-MigrationOps *migration_ops;
-
 static int ram_save_host_page_urgent(PageSearchStatus *pss);
 static int ram_save_host_page_urgent(PageSearchStatus *pss);
 
 
 /* NOTE: page is the PFN not real ram_addr_t. */
 /* NOTE: page is the PFN not real ram_addr_t. */
@@ -1958,53 +1953,34 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len,
 }
 }
 
 
 /**
 /**
- * ram_save_target_page_legacy: save one target page
- *
- * Returns the number of pages written
+ * ram_save_target_page: save one target page to the precopy thread
+ * OR to multifd workers.
  *
  *
  * @rs: current RAM state
  * @rs: current RAM state
  * @pss: data about the page we want to send
  * @pss: data about the page we want to send
  */
  */
-static int ram_save_target_page_legacy(RAMState *rs, PageSearchStatus *pss)
+static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss)
 {
 {
     ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
     ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
     int res;
     int res;
 
 
-    if (control_save_page(pss, offset, &res)) {
-        return res;
+    if (!migrate_multifd()
+        || migrate_zero_page_detection() == ZERO_PAGE_DETECTION_LEGACY) {
+        if (save_zero_page(rs, pss, offset)) {
+            return 1;
+        }
     }
     }
 
 
-    if (save_zero_page(rs, pss, offset)) {
-        return 1;
+    if (migrate_multifd()) {
+        RAMBlock *block = pss->block;
+        return ram_save_multifd_page(block, offset);
     }
     }
 
 
-    return ram_save_page(rs, pss);
-}
-
-/**
- * ram_save_target_page_multifd: send one target page to multifd workers
- *
- * Returns 1 if the page was queued, -1 otherwise.
- *
- * @rs: current RAM state
- * @pss: data about the page we want to send
- */
-static int ram_save_target_page_multifd(RAMState *rs, PageSearchStatus *pss)
-{
-    RAMBlock *block = pss->block;
-    ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
-
-    /*
-     * While using multifd live migration, we still need to handle zero
-     * page checking on the migration main thread.
-     */
-    if (migrate_zero_page_detection() == ZERO_PAGE_DETECTION_LEGACY) {
-        if (save_zero_page(rs, pss, offset)) {
-            return 1;
-        }
+    if (control_save_page(pss, offset, &res)) {
+        return res;
     }
     }
 
 
-    return ram_save_multifd_page(block, offset);
+    return ram_save_page(rs, pss);
 }
 }
 
 
 /* Should be called before sending a host page */
 /* Should be called before sending a host page */
@@ -2093,7 +2069,7 @@ static int ram_save_host_page_urgent(PageSearchStatus *pss)
 
 
         if (page_dirty) {
         if (page_dirty) {
             /* Be strict to return code; it must be 1, or what else? */
             /* Be strict to return code; it must be 1, or what else? */
-            if (migration_ops->ram_save_target_page(rs, pss) != 1) {
+            if (ram_save_target_page(rs, pss) != 1) {
                 error_report_once("%s: ram_save_target_page failed", __func__);
                 error_report_once("%s: ram_save_target_page failed", __func__);
                 ret = -1;
                 ret = -1;
                 goto out;
                 goto out;
@@ -2162,7 +2138,7 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss)
             if (preempt_active) {
             if (preempt_active) {
                 qemu_mutex_unlock(&rs->bitmap_mutex);
                 qemu_mutex_unlock(&rs->bitmap_mutex);
             }
             }
-            tmppages = migration_ops->ram_save_target_page(rs, pss);
+            tmppages = ram_save_target_page(rs, pss);
             if (tmppages >= 0) {
             if (tmppages >= 0) {
                 pages += tmppages;
                 pages += tmppages;
                 /*
                 /*
@@ -2360,8 +2336,6 @@ static void ram_save_cleanup(void *opaque)
     xbzrle_cleanup();
     xbzrle_cleanup();
     multifd_ram_save_cleanup();
     multifd_ram_save_cleanup();
     ram_state_cleanup(rsp);
     ram_state_cleanup(rsp);
-    g_free(migration_ops);
-    migration_ops = NULL;
 }
 }
 
 
 static void ram_state_reset(RAMState *rs)
 static void ram_state_reset(RAMState *rs)
@@ -3027,13 +3001,8 @@ static int ram_save_setup(QEMUFile *f, void *opaque, Error **errp)
         return ret;
         return ret;
     }
     }
 
 
-    migration_ops = g_malloc0(sizeof(MigrationOps));
-
     if (migrate_multifd()) {
     if (migrate_multifd()) {
         multifd_ram_save_setup();
         multifd_ram_save_setup();
-        migration_ops->ram_save_target_page = ram_save_target_page_multifd;
-    } else {
-        migration_ops->ram_save_target_page = ram_save_target_page_legacy;
     }
     }
 
 
     /*
     /*

+ 45 - 71
migration/savevm.c

@@ -1231,8 +1231,7 @@ void qemu_savevm_non_migratable_list(strList **reasons)
 void qemu_savevm_state_header(QEMUFile *f)
 void qemu_savevm_state_header(QEMUFile *f)
 {
 {
     MigrationState *s = migrate_get_current();
     MigrationState *s = migrate_get_current();
-
-    s->vmdesc = json_writer_new(false);
+    JSONWriter *vmdesc = s->vmdesc;
 
 
     trace_savevm_state_header();
     trace_savevm_state_header();
     qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
     qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
@@ -1241,16 +1240,21 @@ void qemu_savevm_state_header(QEMUFile *f)
     if (s->send_configuration) {
     if (s->send_configuration) {
         qemu_put_byte(f, QEMU_VM_CONFIGURATION);
         qemu_put_byte(f, QEMU_VM_CONFIGURATION);
 
 
-        /*
-         * This starts the main json object and is paired with the
-         * json_writer_end_object in
-         * qemu_savevm_state_complete_precopy_non_iterable
-         */
-        json_writer_start_object(s->vmdesc, NULL);
+        if (vmdesc) {
+            /*
+             * This starts the main json object and is paired with the
+             * json_writer_end_object in
+             * qemu_savevm_state_complete_precopy_non_iterable
+             */
+            json_writer_start_object(vmdesc, NULL);
+            json_writer_start_object(vmdesc, "configuration");
+        }
+
+        vmstate_save_state(f, &vmstate_configuration, &savevm_state, vmdesc);
 
 
-        json_writer_start_object(s->vmdesc, "configuration");
-        vmstate_save_state(f, &vmstate_configuration, &savevm_state, s->vmdesc);
-        json_writer_end_object(s->vmdesc);
+        if (vmdesc) {
+            json_writer_end_object(vmdesc);
+        }
     }
     }
 }
 }
 
 
@@ -1296,16 +1300,19 @@ int qemu_savevm_state_setup(QEMUFile *f, Error **errp)
 {
 {
     ERRP_GUARD();
     ERRP_GUARD();
     MigrationState *ms = migrate_get_current();
     MigrationState *ms = migrate_get_current();
+    JSONWriter *vmdesc = ms->vmdesc;
     SaveStateEntry *se;
     SaveStateEntry *se;
     int ret = 0;
     int ret = 0;
 
 
-    json_writer_int64(ms->vmdesc, "page_size", qemu_target_page_size());
-    json_writer_start_array(ms->vmdesc, "devices");
+    if (vmdesc) {
+        json_writer_int64(vmdesc, "page_size", qemu_target_page_size());
+        json_writer_start_array(vmdesc, "devices");
+    }
 
 
     trace_savevm_state_setup();
     trace_savevm_state_setup();
     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
         if (se->vmsd && se->vmsd->early_setup) {
         if (se->vmsd && se->vmsd->early_setup) {
-            ret = vmstate_save(f, se, ms->vmdesc, errp);
+            ret = vmstate_save(f, se, vmdesc, errp);
             if (ret) {
             if (ret) {
                 migrate_set_error(ms, *errp);
                 migrate_set_error(ms, *errp);
                 qemu_file_set_error(f, ret);
                 qemu_file_set_error(f, ret);
@@ -1424,11 +1431,11 @@ int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy)
     return all_finished;
     return all_finished;
 }
 }
 
 
-static bool should_send_vmdesc(void)
+bool should_send_vmdesc(void)
 {
 {
     MachineState *machine = MACHINE(qdev_get_machine());
     MachineState *machine = MACHINE(qdev_get_machine());
-    bool in_postcopy = migration_in_postcopy();
-    return !machine->suppress_vmdesc && !in_postcopy;
+
+    return !machine->suppress_vmdesc;
 }
 }
 
 
 /*
 /*
@@ -1470,7 +1477,6 @@ void qemu_savevm_state_complete_postcopy(QEMUFile *f)
     qemu_fflush(f);
     qemu_fflush(f);
 }
 }
 
 
-static
 int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy)
 int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy)
 {
 {
     int64_t start_ts_each, end_ts_each;
     int64_t start_ts_each, end_ts_each;
@@ -1514,8 +1520,7 @@ int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy)
 }
 }
 
 
 int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
 int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
-                                                    bool in_postcopy,
-                                                    bool inactivate_disks)
+                                                    bool in_postcopy)
 {
 {
     MigrationState *ms = migrate_get_current();
     MigrationState *ms = migrate_get_current();
     int64_t start_ts_each, end_ts_each;
     int64_t start_ts_each, end_ts_each;
@@ -1525,6 +1530,9 @@ int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
     Error *local_err = NULL;
     Error *local_err = NULL;
     int ret;
     int ret;
 
 
+    /* Making sure cpu states are synchronized before saving non-iterable */
+    cpu_synchronize_all_states();
+
     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
         if (se->vmsd && se->vmsd->early_setup) {
         if (se->vmsd && se->vmsd->early_setup) {
             /* Already saved during qemu_savevm_state_setup(). */
             /* Already saved during qemu_savevm_state_setup(). */
@@ -1546,77 +1554,42 @@ int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
                                     end_ts_each - start_ts_each);
                                     end_ts_each - start_ts_each);
     }
     }
 
 
-    if (inactivate_disks) {
-        /*
-         * Inactivate before sending QEMU_VM_EOF so that the
-         * bdrv_activate_all() on the other end won't fail.
-         */
-        if (!migration_block_inactivate()) {
-            error_setg(&local_err, "%s: bdrv_inactivate_all() failed",
-                       __func__);
-            migrate_set_error(ms, local_err);
-            error_report_err(local_err);
-            qemu_file_set_error(f, -EFAULT);
-            return ret;
-        }
-    }
     if (!in_postcopy) {
     if (!in_postcopy) {
         /* Postcopy stream will still be going */
         /* Postcopy stream will still be going */
         qemu_put_byte(f, QEMU_VM_EOF);
         qemu_put_byte(f, QEMU_VM_EOF);
-    }
 
 
-    json_writer_end_array(vmdesc);
-    json_writer_end_object(vmdesc);
-    vmdesc_len = strlen(json_writer_get(vmdesc));
+        if (vmdesc) {
+            json_writer_end_array(vmdesc);
+            json_writer_end_object(vmdesc);
+            vmdesc_len = strlen(json_writer_get(vmdesc));
 
 
-    if (should_send_vmdesc()) {
-        qemu_put_byte(f, QEMU_VM_VMDESCRIPTION);
-        qemu_put_be32(f, vmdesc_len);
-        qemu_put_buffer(f, (uint8_t *)json_writer_get(vmdesc), vmdesc_len);
+            qemu_put_byte(f, QEMU_VM_VMDESCRIPTION);
+            qemu_put_be32(f, vmdesc_len);
+            qemu_put_buffer(f, (uint8_t *)json_writer_get(vmdesc), vmdesc_len);
+        }
     }
     }
 
 
-    /* Free it now to detect any inconsistencies. */
-    json_writer_free(vmdesc);
-    ms->vmdesc = NULL;
-
     trace_vmstate_downtime_checkpoint("src-non-iterable-saved");
     trace_vmstate_downtime_checkpoint("src-non-iterable-saved");
 
 
     return 0;
     return 0;
 }
 }
 
 
-int qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only,
-                                       bool inactivate_disks)
+int qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only)
 {
 {
     int ret;
     int ret;
-    Error *local_err = NULL;
-    bool in_postcopy = migration_in_postcopy();
 
 
-    if (precopy_notify(PRECOPY_NOTIFY_COMPLETE, &local_err)) {
-        error_report_err(local_err);
+    ret = qemu_savevm_state_complete_precopy_iterable(f, false);
+    if (ret) {
+        return ret;
     }
     }
 
 
-    trace_savevm_state_complete_precopy();
-
-    cpu_synchronize_all_states();
-
-    if (!in_postcopy || iterable_only) {
-        ret = qemu_savevm_state_complete_precopy_iterable(f, in_postcopy);
+    if (!iterable_only) {
+        ret = qemu_savevm_state_complete_precopy_non_iterable(f, false);
         if (ret) {
         if (ret) {
             return ret;
             return ret;
         }
         }
     }
     }
 
 
-    if (iterable_only) {
-        goto flush;
-    }
-
-    ret = qemu_savevm_state_complete_precopy_non_iterable(f, in_postcopy,
-                                                          inactivate_disks);
-    if (ret) {
-        return ret;
-    }
-
-flush:
     return qemu_fflush(f);
     return qemu_fflush(f);
 }
 }
 
 
@@ -1714,7 +1687,7 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
 
 
     ret = qemu_file_get_error(f);
     ret = qemu_file_get_error(f);
     if (ret == 0) {
     if (ret == 0) {
-        qemu_savevm_state_complete_precopy(f, false, false);
+        qemu_savevm_state_complete_precopy(f, false);
         ret = qemu_file_get_error(f);
         ret = qemu_file_get_error(f);
     }
     }
     if (ret != 0) {
     if (ret != 0) {
@@ -1740,7 +1713,7 @@ cleanup:
 void qemu_savevm_live_state(QEMUFile *f)
 void qemu_savevm_live_state(QEMUFile *f)
 {
 {
     /* save QEMU_VM_SECTION_END section */
     /* save QEMU_VM_SECTION_END section */
-    qemu_savevm_state_complete_precopy(f, true, false);
+    qemu_savevm_state_complete_precopy(f, true);
     qemu_put_byte(f, QEMU_VM_EOF);
     qemu_put_byte(f, QEMU_VM_EOF);
 }
 }
 
 
@@ -2965,6 +2938,7 @@ int qemu_loadvm_state(QEMUFile *f)
         return ret;
         return ret;
     }
     }
 
 
+    /* When reaching here, it must be precopy */
     if (ret == 0) {
     if (ret == 0) {
         ret = qemu_file_get_error(f);
         ret = qemu_file_get_error(f);
     }
     }

+ 3 - 3
migration/savevm.h

@@ -39,12 +39,12 @@ void qemu_savevm_state_header(QEMUFile *f);
 int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy);
 int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy);
 void qemu_savevm_state_cleanup(void);
 void qemu_savevm_state_cleanup(void);
 void qemu_savevm_state_complete_postcopy(QEMUFile *f);
 void qemu_savevm_state_complete_postcopy(QEMUFile *f);
-int qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only,
-                                       bool inactivate_disks);
+int qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only);
 void qemu_savevm_state_pending_exact(uint64_t *must_precopy,
 void qemu_savevm_state_pending_exact(uint64_t *must_precopy,
                                      uint64_t *can_postcopy);
                                      uint64_t *can_postcopy);
 void qemu_savevm_state_pending_estimate(uint64_t *must_precopy,
 void qemu_savevm_state_pending_estimate(uint64_t *must_precopy,
                                         uint64_t *can_postcopy);
                                         uint64_t *can_postcopy);
+int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy);
 void qemu_savevm_send_ping(QEMUFile *f, uint32_t value);
 void qemu_savevm_send_ping(QEMUFile *f, uint32_t value);
 void qemu_savevm_send_open_return_path(QEMUFile *f);
 void qemu_savevm_send_open_return_path(QEMUFile *f);
 int qemu_savevm_send_packaged(QEMUFile *f, const uint8_t *buf, size_t len);
 int qemu_savevm_send_packaged(QEMUFile *f, const uint8_t *buf, size_t len);
@@ -68,6 +68,6 @@ int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis);
 int qemu_load_device_state(QEMUFile *f);
 int qemu_load_device_state(QEMUFile *f);
 int qemu_loadvm_approve_switchover(void);
 int qemu_loadvm_approve_switchover(void);
 int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
 int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
-        bool in_postcopy, bool inactivate_disks);
+        bool in_postcopy);
 
 
 #endif
 #endif

+ 12 - 1
migration/trace-events

@@ -44,7 +44,6 @@ savevm_state_resume_prepare(void) ""
 savevm_state_header(void) ""
 savevm_state_header(void) ""
 savevm_state_iterate(void) ""
 savevm_state_iterate(void) ""
 savevm_state_cleanup(void) ""
 savevm_state_cleanup(void) ""
-savevm_state_complete_precopy(void) ""
 vmstate_save(const char *idstr, const char *vmsd_name) "%s, %s"
 vmstate_save(const char *idstr, const char *vmsd_name) "%s, %s"
 vmstate_load(const char *idstr, const char *vmsd_name) "%s, %s"
 vmstate_load(const char *idstr, const char *vmsd_name) "%s, %s"
 vmstate_downtime_save(const char *type, const char *idstr, uint32_t instance_id, int64_t downtime) "type=%s idstr=%s instance_id=%d downtime=%"PRIi64
 vmstate_downtime_save(const char *type, const char *idstr, uint32_t instance_id, int64_t downtime) "type=%s idstr=%s instance_id=%d downtime=%"PRIi64
@@ -88,6 +87,8 @@ put_qlist_end(const char *field_name, const char *vmsd_name) "%s(%s)"
 
 
 # qemu-file.c
 # qemu-file.c
 qemu_file_fclose(void) ""
 qemu_file_fclose(void) ""
+qemu_file_put_fd(const char *name, int fd, int ret) "ioc %s, fd %d -> status %d"
+qemu_file_get_fd(const char *name, int fd) "ioc %s -> fd %d"
 
 
 # ram.c
 # ram.c
 get_queued_page(const char *block_name, uint64_t tmp_offset, unsigned long page_abs) "%s/0x%" PRIx64 " page_abs=0x%lx"
 get_queued_page(const char *block_name, uint64_t tmp_offset, unsigned long page_abs) "%s/0x%" PRIx64 " page_abs=0x%lx"
@@ -193,6 +194,7 @@ migrate_transferred(uint64_t transferred, uint64_t time_spent, uint64_t bandwidt
 process_incoming_migration_co_end(int ret, int ps) "ret=%d postcopy-state=%d"
 process_incoming_migration_co_end(int ret, int ps) "ret=%d postcopy-state=%d"
 process_incoming_migration_co_postcopy_end_main(void) ""
 process_incoming_migration_co_postcopy_end_main(void) ""
 postcopy_preempt_enabled(bool value) "%d"
 postcopy_preempt_enabled(bool value) "%d"
+migration_precopy_complete(void) ""
 
 
 # migration-stats
 # migration-stats
 migration_transferred_bytes(uint64_t qemu_file, uint64_t multifd, uint64_t rdma) "qemu_file %" PRIu64 " multifd %" PRIu64 " RDMA %" PRIu64
 migration_transferred_bytes(uint64_t qemu_file, uint64_t multifd, uint64_t rdma) "qemu_file %" PRIu64 " multifd %" PRIu64 " RDMA %" PRIu64
@@ -342,6 +344,15 @@ colo_receive_message(const char *msg) "Receive '%s' message"
 # colo-failover.c
 # colo-failover.c
 colo_failover_set_state(const char *new_state) "new state %s"
 colo_failover_set_state(const char *new_state) "new state %s"
 
 
+# cpr.c
+cpr_save_fd(const char *name, int id, int fd) "%s, id %d, fd %d"
+cpr_delete_fd(const char *name, int id) "%s, id %d"
+cpr_find_fd(const char *name, int id, int fd) "%s, id %d returns %d"
+cpr_state_save(const char *mode) "%s mode"
+cpr_state_load(const char *mode) "%s mode"
+cpr_transfer_input(const char *path) "%s"
+cpr_transfer_output(const char *path) "%s"
+
 # block-dirty-bitmap.c
 # block-dirty-bitmap.c
 send_bitmap_header_enter(void) ""
 send_bitmap_header_enter(void) ""
 send_bitmap_bits(uint32_t flags, uint64_t start_sector, uint32_t nr_sectors, uint64_t data_size) "flags: 0x%x, start_sector: %" PRIu64 ", nr_sectors: %" PRIu32 ", data_size: %" PRIu64
 send_bitmap_bits(uint32_t flags, uint64_t start_sector, uint32_t nr_sectors, uint64_t data_size) "flags: 0x%x, start_sector: %" PRIu64 ", nr_sectors: %" PRIu32 ", data_size: %" PRIu64

+ 24 - 0
migration/vmstate-types.c

@@ -15,6 +15,7 @@
 #include "qemu-file.h"
 #include "qemu-file.h"
 #include "migration.h"
 #include "migration.h"
 #include "migration/vmstate.h"
 #include "migration/vmstate.h"
+#include "migration/client-options.h"
 #include "qemu/error-report.h"
 #include "qemu/error-report.h"
 #include "qemu/queue.h"
 #include "qemu/queue.h"
 #include "trace.h"
 #include "trace.h"
@@ -314,6 +315,29 @@ const VMStateInfo vmstate_info_uint64 = {
     .put  = put_uint64,
     .put  = put_uint64,
 };
 };
 
 
+/* File descriptor communicated via SCM_RIGHTS */
+
+static int get_fd(QEMUFile *f, void *pv, size_t size,
+                  const VMStateField *field)
+{
+    int32_t *v = pv;
+    *v = qemu_file_get_fd(f);
+    return 0;
+}
+
+static int put_fd(QEMUFile *f, void *pv, size_t size,
+                  const VMStateField *field, JSONWriter *vmdesc)
+{
+    int32_t *v = pv;
+    return qemu_file_put_fd(f, *v);
+}
+
+const VMStateInfo vmstate_info_fd = {
+    .name = "fd",
+    .get  = get_fd,
+    .put  = put_fd,
+};
+
 static int get_nullptr(QEMUFile *f, void *pv, size_t size,
 static int get_nullptr(QEMUFile *f, void *pv, size_t size,
                        const VMStateField *field)
                        const VMStateField *field)
 
 

+ 4 - 2
migration/vmstate.c

@@ -459,6 +459,8 @@ int vmstate_save_state_v(QEMUFile *f, const VMStateDescription *vmsd,
                 }
                 }
 
 
                 /*
                 /*
+                 * This logic only matters when dumping VM Desc.
+                 *
                  * Due to the fake nullptr handling above, if there's mixed
                  * Due to the fake nullptr handling above, if there's mixed
                  * null/non-null data, it doesn't make sense to emit a
                  * null/non-null data, it doesn't make sense to emit a
                  * compressed array representation spanning the entire array
                  * compressed array representation spanning the entire array
@@ -466,7 +468,7 @@ int vmstate_save_state_v(QEMUFile *f, const VMStateDescription *vmsd,
                  * vs. nullptr). Search ahead for the next null/non-null element
                  * vs. nullptr). Search ahead for the next null/non-null element
                  * and start a new compressed array if found.
                  * and start a new compressed array if found.
                  */
                  */
-                if (field->flags & VMS_ARRAY_OF_POINTER &&
+                if (vmdesc && (field->flags & VMS_ARRAY_OF_POINTER) &&
                     is_null != is_prev_null) {
                     is_null != is_prev_null) {
 
 
                     is_prev_null = is_null;
                     is_prev_null = is_null;
@@ -504,7 +506,7 @@ int vmstate_save_state_v(QEMUFile *f, const VMStateDescription *vmsd,
                                     written_bytes);
                                     written_bytes);
 
 
                 /* If we used a fake temp field.. free it now */
                 /* If we used a fake temp field.. free it now */
-                if (inner_field != field) {
+                if (is_null) {
                     g_clear_pointer((gpointer *)&inner_field, g_free);
                     g_clear_pointer((gpointer *)&inner_field, g_free);
                 }
                 }
 
 

+ 47 - 4
qapi/migration.json

@@ -158,8 +158,11 @@
 #
 #
 # @pre-switchover: Paused before device serialisation.  (since 2.11)
 # @pre-switchover: Paused before device serialisation.  (since 2.11)
 #
 #
-# @device: During device serialisation when pause-before-switchover is
-#     enabled (since 2.11)
+# @device: During device serialisation (also known as switchover phase).
+#     Before 9.2, this is only used when (1) in precopy, and (2) when
+#     pre-switchover capability is enabled.  After 10.0, this state will
+#     always be present for every migration procedure as the switchover
+#     phase.  (since 2.11)
 #
 #
 # @wait-unplug: wait for device unplug request by guest OS to be
 # @wait-unplug: wait for device unplug request by guest OS to be
 #     completed.  (since 4.2)
 #     completed.  (since 4.2)
@@ -614,9 +617,48 @@
 #     or COLO.
 #     or COLO.
 #
 #
 #     (since 8.2)
 #     (since 8.2)
+#
+# @cpr-transfer: This mode allows the user to transfer a guest to a
+#     new QEMU instance on the same host with minimal guest pause
+#     time by preserving guest RAM in place.  Devices and their pinned
+#     pages will also be preserved in a future QEMU release.
+#
+#     The user starts new QEMU on the same host as old QEMU, with
+#     command-line arguments to create the same machine, plus the
+#     -incoming option for the main migration channel, like normal
+#     live migration.  In addition, the user adds a second -incoming
+#     option with channel type "cpr".  This CPR channel must support
+#     file descriptor transfer with SCM_RIGHTS, i.e. it must be a
+#     UNIX domain socket.
+#
+#     To initiate CPR, the user issues a migrate command to old QEMU,
+#     adding a second migration channel of type "cpr" in the channels
+#     argument.  Old QEMU stops the VM, saves state to the migration
+#     channels, and enters the postmigrate state.  Execution resumes
+#     in new QEMU.
+#
+#     New QEMU reads the CPR channel before opening a monitor, hence
+#     the CPR channel cannot be specified in the list of channels for
+#     a migrate-incoming command.  It may only be specified on the
+#     command line.
+#
+#     The main channel address cannot be a file type, and for an
+#     inet socket, the port cannot be 0 (meaning dynamically choose
+#     a port).
+#
+#     Memory-backend objects must have the share=on attribute, but
+#     memory-backend-epc is not supported.  The VM must be started
+#     with the '-machine aux-ram-share=on' option.
+#
+#     When using -incoming defer, you must issue the migrate command
+#     to old QEMU before issuing any monitor commands to new QEMU.
+#     However, new QEMU does not open and read the migration stream
+#     until you issue the migrate incoming command.
+#
+#     (since 10.0)
 ##
 ##
 { 'enum': 'MigMode',
 { 'enum': 'MigMode',
-  'data': [ 'normal', 'cpr-reboot' ] }
+  'data': [ 'normal', 'cpr-reboot', 'cpr-transfer' ] }
 
 
 ##
 ##
 # @ZeroPageDetection:
 # @ZeroPageDetection:
@@ -1578,11 +1620,12 @@
 # The migration channel-type request options.
 # The migration channel-type request options.
 #
 #
 # @main: Main outbound migration channel.
 # @main: Main outbound migration channel.
+# @cpr: Checkpoint and restart state channel.
 #
 #
 # Since: 8.1
 # Since: 8.1
 ##
 ##
 { 'enum': 'MigrationChannelType',
 { 'enum': 'MigrationChannelType',
-  'data': [ 'main' ] }
+  'data': [ 'main', 'cpr' ] }
 
 
 ##
 ##
 # @MigrationChannel:
 # @MigrationChannel:

+ 34 - 0
qemu-options.hx

@@ -38,6 +38,9 @@ DEF("machine", HAS_ARG, QEMU_OPTION_machine, \
     "                nvdimm=on|off controls NVDIMM support (default=off)\n"
     "                nvdimm=on|off controls NVDIMM support (default=off)\n"
     "                memory-encryption=@var{} memory encryption object to use (default=none)\n"
     "                memory-encryption=@var{} memory encryption object to use (default=none)\n"
     "                hmat=on|off controls ACPI HMAT support (default=off)\n"
     "                hmat=on|off controls ACPI HMAT support (default=off)\n"
+#ifdef CONFIG_POSIX
+    "                aux-ram-share=on|off allocate auxiliary guest RAM as shared (default: off)\n"
+#endif
     "                memory-backend='backend-id' specifies explicitly provided backend for main RAM (default=none)\n"
     "                memory-backend='backend-id' specifies explicitly provided backend for main RAM (default=none)\n"
     "                cxl-fmw.0.targets.0=firsttarget,cxl-fmw.0.targets.1=secondtarget,cxl-fmw.0.size=size[,cxl-fmw.0.interleave-granularity=granularity]\n",
     "                cxl-fmw.0.targets.0=firsttarget,cxl-fmw.0.targets.1=secondtarget,cxl-fmw.0.size=size[,cxl-fmw.0.interleave-granularity=granularity]\n",
     QEMU_ARCH_ALL)
     QEMU_ARCH_ALL)
@@ -101,6 +104,16 @@ SRST
         Enables or disables ACPI Heterogeneous Memory Attribute Table
         Enables or disables ACPI Heterogeneous Memory Attribute Table
         (HMAT) support. The default is off.
         (HMAT) support. The default is off.
 
 
+    ``aux-ram-share=on|off``
+        Allocate auxiliary guest RAM as an anonymous file that is
+        shareable with an external process.  This option applies to
+        memory allocated as a side effect of creating various devices.
+        It does not apply to memory-backend-objects, whether explicitly
+        specified on the command line, or implicitly created by the -m
+        command line option.  The default is off.
+
+        To use the cpr-transfer migration mode, you must set aux-ram-share=on.
+
     ``memory-backend='id'``
     ``memory-backend='id'``
         An alternative to legacy ``-mem-path`` and ``mem-prealloc`` options.
         An alternative to legacy ``-mem-path`` and ``mem-prealloc`` options.
         Allows to use a memory backend as main RAM.
         Allows to use a memory backend as main RAM.
@@ -4929,10 +4942,18 @@ DEF("incoming", HAS_ARG, QEMU_OPTION_incoming, \
     "-incoming exec:cmdline\n" \
     "-incoming exec:cmdline\n" \
     "                accept incoming migration on given file descriptor\n" \
     "                accept incoming migration on given file descriptor\n" \
     "                or from given external command\n" \
     "                or from given external command\n" \
+    "-incoming <channel>\n" \
+    "                accept incoming migration on the migration channel\n" \
     "-incoming defer\n" \
     "-incoming defer\n" \
     "                wait for the URI to be specified via migrate_incoming\n",
     "                wait for the URI to be specified via migrate_incoming\n",
     QEMU_ARCH_ALL)
     QEMU_ARCH_ALL)
 SRST
 SRST
+The -incoming option specifies the migration channel for an incoming
+migration.  It may be used multiple times to specify multiple
+migration channel types.  The channel type is specified in <channel>,
+or is 'main' for all other forms of -incoming.  If multiple -incoming
+options are specified for a channel type, the last one takes precedence.
+
 ``-incoming tcp:[host]:port[,to=maxport][,ipv4=on|off][,ipv6=on|off]``
 ``-incoming tcp:[host]:port[,to=maxport][,ipv4=on|off][,ipv6=on|off]``
   \ 
   \ 
 ``-incoming rdma:host:port[,ipv4=on|off][,ipv6=on|off]``
 ``-incoming rdma:host:port[,ipv4=on|off][,ipv6=on|off]``
@@ -4952,6 +4973,19 @@ SRST
     Accept incoming migration as an output from specified external
     Accept incoming migration as an output from specified external
     command.
     command.
 
 
+``-incoming <channel>``
+    Accept incoming migration on the migration channel.  For the syntax
+    of <channel>, see the QAPI documentation of ``MigrationChannel``.
+    Examples:
+    ::
+
+        -incoming '{"channel-type": "main",
+                    "addr": { "transport": "socket",
+                              "type": "unix",
+                              "path": "my.sock" }}'
+
+        -incoming main,addr.transport=socket,addr.type=unix,addr.path=my.sock
+
 ``-incoming defer``
 ``-incoming defer``
     Wait for the URI to be specified via migrate\_incoming. The monitor
     Wait for the URI to be specified via migrate\_incoming. The monitor
     can be used to change settings (such as migration parameters) prior
     can be used to change settings (such as migration parameters) prior

+ 7 - 0
stubs/vmstate.c

@@ -1,5 +1,7 @@
 #include "qemu/osdep.h"
 #include "qemu/osdep.h"
 #include "migration/vmstate.h"
 #include "migration/vmstate.h"
+#include "qapi/qapi-types-migration.h"
+#include "migration/client-options.h"
 
 
 int vmstate_register_with_alias_id(VMStateIf *obj,
 int vmstate_register_with_alias_id(VMStateIf *obj,
                                    uint32_t instance_id,
                                    uint32_t instance_id,
@@ -21,3 +23,8 @@ bool vmstate_check_only_migratable(const VMStateDescription *vmsd)
 {
 {
     return true;
     return true;
 }
 }
+
+MigMode migrate_mode(void)
+{
+    return MIG_MODE_NORMAL;
+}

+ 2 - 2
system/memory.c

@@ -1680,8 +1680,8 @@ bool memory_region_init_ram_from_fd(MemoryRegion *mr,
     mr->readonly = !!(ram_flags & RAM_READONLY);
     mr->readonly = !!(ram_flags & RAM_READONLY);
     mr->terminates = true;
     mr->terminates = true;
     mr->destructor = memory_region_destructor_ram;
     mr->destructor = memory_region_destructor_ram;
-    mr->ram_block = qemu_ram_alloc_from_fd(size, mr, ram_flags, fd, offset,
-                                           &err);
+    mr->ram_block = qemu_ram_alloc_from_fd(size, size, NULL, mr, ram_flags, fd,
+                                           offset, false, &err);
     if (err) {
     if (err) {
         mr->size = int128_zero();
         mr->size = int128_zero();
         object_unparent(OBJECT(mr));
         object_unparent(OBJECT(mr));

+ 129 - 21
system/physmem.c

@@ -48,6 +48,7 @@
 #include "qemu/qemu-print.h"
 #include "qemu/qemu-print.h"
 #include "qemu/log.h"
 #include "qemu/log.h"
 #include "qemu/memalign.h"
 #include "qemu/memalign.h"
+#include "qemu/memfd.h"
 #include "exec/memory.h"
 #include "exec/memory.h"
 #include "exec/ioport.h"
 #include "exec/ioport.h"
 #include "system/dma.h"
 #include "system/dma.h"
@@ -69,6 +70,7 @@
 
 
 #include "qemu/pmem.h"
 #include "qemu/pmem.h"
 
 
+#include "migration/cpr.h"
 #include "migration/vmstate.h"
 #include "migration/vmstate.h"
 
 
 #include "qemu/range.h"
 #include "qemu/range.h"
@@ -1660,6 +1662,18 @@ void qemu_ram_unset_idstr(RAMBlock *block)
     }
     }
 }
 }
 
 
+static char *cpr_name(MemoryRegion *mr)
+{
+    const char *mr_name = memory_region_name(mr);
+    g_autofree char *id = mr->dev ? qdev_get_dev_path(mr->dev) : NULL;
+
+    if (id) {
+        return g_strdup_printf("%s/%s", id, mr_name);
+    } else {
+        return g_strdup(mr_name);
+    }
+}
+
 size_t qemu_ram_pagesize(RAMBlock *rb)
 size_t qemu_ram_pagesize(RAMBlock *rb)
 {
 {
     return rb->page_size;
     return rb->page_size;
@@ -1942,18 +1956,27 @@ out_free:
 }
 }
 
 
 #ifdef CONFIG_POSIX
 #ifdef CONFIG_POSIX
-RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
+RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, ram_addr_t max_size,
+                                 qemu_ram_resize_cb resized, MemoryRegion *mr,
                                  uint32_t ram_flags, int fd, off_t offset,
                                  uint32_t ram_flags, int fd, off_t offset,
+                                 bool grow,
                                  Error **errp)
                                  Error **errp)
 {
 {
+    ERRP_GUARD();
     RAMBlock *new_block;
     RAMBlock *new_block;
     Error *local_err = NULL;
     Error *local_err = NULL;
-    int64_t file_size, file_align;
+    int64_t file_size, file_align, share_flags;
+
+    share_flags = ram_flags & (RAM_PRIVATE | RAM_SHARED);
+    assert(share_flags != (RAM_SHARED | RAM_PRIVATE));
+    ram_flags &= ~RAM_PRIVATE;
 
 
     /* Just support these ram flags by now. */
     /* Just support these ram flags by now. */
     assert((ram_flags & ~(RAM_SHARED | RAM_PMEM | RAM_NORESERVE |
     assert((ram_flags & ~(RAM_SHARED | RAM_PMEM | RAM_NORESERVE |
                           RAM_PROTECTED | RAM_NAMED_FILE | RAM_READONLY |
                           RAM_PROTECTED | RAM_NAMED_FILE | RAM_READONLY |
-                          RAM_READONLY_FD | RAM_GUEST_MEMFD)) == 0);
+                          RAM_READONLY_FD | RAM_GUEST_MEMFD |
+                          RAM_RESIZEABLE)) == 0);
+    assert(max_size >= size);
 
 
     if (xen_enabled()) {
     if (xen_enabled()) {
         error_setg(errp, "-mem-path not supported with Xen");
         error_setg(errp, "-mem-path not supported with Xen");
@@ -1968,12 +1991,16 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
 
 
     size = TARGET_PAGE_ALIGN(size);
     size = TARGET_PAGE_ALIGN(size);
     size = REAL_HOST_PAGE_ALIGN(size);
     size = REAL_HOST_PAGE_ALIGN(size);
+    max_size = TARGET_PAGE_ALIGN(max_size);
+    max_size = REAL_HOST_PAGE_ALIGN(max_size);
 
 
     file_size = get_file_size(fd);
     file_size = get_file_size(fd);
-    if (file_size > offset && file_size < (offset + size)) {
-        error_setg(errp, "backing store size 0x%" PRIx64
-                   " does not match 'size' option 0x" RAM_ADDR_FMT,
-                   file_size, size);
+    if (file_size && file_size < offset + max_size && !grow) {
+        error_setg(errp, "%s backing store size 0x%" PRIx64
+                   " is too small for 'size' option 0x" RAM_ADDR_FMT
+                   " plus 'offset' option 0x%" PRIx64,
+                   memory_region_name(mr), file_size, max_size,
+                   (uint64_t)offset);
         return NULL;
         return NULL;
     }
     }
 
 
@@ -1988,11 +2015,13 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
     new_block = g_malloc0(sizeof(*new_block));
     new_block = g_malloc0(sizeof(*new_block));
     new_block->mr = mr;
     new_block->mr = mr;
     new_block->used_length = size;
     new_block->used_length = size;
-    new_block->max_length = size;
+    new_block->max_length = max_size;
+    new_block->resized = resized;
     new_block->flags = ram_flags;
     new_block->flags = ram_flags;
     new_block->guest_memfd = -1;
     new_block->guest_memfd = -1;
-    new_block->host = file_ram_alloc(new_block, size, fd, !file_size, offset,
-                                     errp);
+    new_block->host = file_ram_alloc(new_block, max_size, fd,
+                                     file_size < offset + max_size,
+                                     offset, errp);
     if (!new_block->host) {
     if (!new_block->host) {
         g_free(new_block);
         g_free(new_block);
         return NULL;
         return NULL;
@@ -2044,7 +2073,8 @@ RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
         return NULL;
         return NULL;
     }
     }
 
 
-    block = qemu_ram_alloc_from_fd(size, mr, ram_flags, fd, offset, errp);
+    block = qemu_ram_alloc_from_fd(size, size, NULL, mr, ram_flags, fd, offset,
+                                   false, errp);
     if (!block) {
     if (!block) {
         if (created) {
         if (created) {
             unlink(mem_path);
             unlink(mem_path);
@@ -2057,21 +2087,97 @@ RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
 }
 }
 #endif
 #endif
 
 
+#ifdef CONFIG_POSIX
+/*
+ * Create MAP_SHARED RAMBlocks by mmap'ing a file descriptor, so it can be
+ * shared with another process if CPR is being used.  Use memfd if available
+ * because it has no size limits, else use POSIX shm.
+ */
+static int qemu_ram_get_shared_fd(const char *name, bool *reused, Error **errp)
+{
+    int fd = cpr_find_fd(name, 0);
+
+    if (fd >= 0) {
+        *reused = true;
+        return fd;
+    }
+
+    if (qemu_memfd_check(0)) {
+        fd = qemu_memfd_create(name, 0, 0, 0, 0, errp);
+    } else {
+        fd = qemu_shm_alloc(0, errp);
+    }
+
+    if (fd >= 0) {
+        cpr_save_fd(name, 0, fd);
+    }
+    *reused = false;
+    return fd;
+}
+#endif
+
 static
 static
 RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
 RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
-                                  void (*resized)(const char*,
-                                                  uint64_t length,
-                                                  void *host),
+                                  qemu_ram_resize_cb resized,
                                   void *host, uint32_t ram_flags,
                                   void *host, uint32_t ram_flags,
                                   MemoryRegion *mr, Error **errp)
                                   MemoryRegion *mr, Error **errp)
 {
 {
     RAMBlock *new_block;
     RAMBlock *new_block;
     Error *local_err = NULL;
     Error *local_err = NULL;
-    int align;
+    int align, share_flags;
+
+    share_flags = ram_flags & (RAM_PRIVATE | RAM_SHARED);
+    assert(share_flags != (RAM_SHARED | RAM_PRIVATE));
+    ram_flags &= ~RAM_PRIVATE;
 
 
     assert((ram_flags & ~(RAM_SHARED | RAM_RESIZEABLE | RAM_PREALLOC |
     assert((ram_flags & ~(RAM_SHARED | RAM_RESIZEABLE | RAM_PREALLOC |
                           RAM_NORESERVE | RAM_GUEST_MEMFD)) == 0);
                           RAM_NORESERVE | RAM_GUEST_MEMFD)) == 0);
     assert(!host ^ (ram_flags & RAM_PREALLOC));
     assert(!host ^ (ram_flags & RAM_PREALLOC));
+    assert(max_size >= size);
+
+#ifdef CONFIG_POSIX         /* ignore RAM_SHARED for Windows */
+    if (!host) {
+        if (!share_flags && current_machine->aux_ram_share) {
+            ram_flags |= RAM_SHARED;
+        }
+        if (ram_flags & RAM_SHARED) {
+            bool reused;
+            g_autofree char *name = cpr_name(mr);
+            int fd = qemu_ram_get_shared_fd(name, &reused, errp);
+
+            if (fd < 0) {
+                return NULL;
+            }
+
+            /* Use same alignment as qemu_anon_ram_alloc */
+            mr->align = QEMU_VMALLOC_ALIGN;
+
+            /*
+             * This can fail if the shm mount size is too small, or alloc from
+             * fd is not supported, but previous QEMU versions that called
+             * qemu_anon_ram_alloc for anonymous shared memory could have
+             * succeeded.  Quietly fail and fall back.
+             *
+             * After cpr-transfer, new QEMU could create a memory region
+             * with a larger max size than old, so pass reused to grow the
+             * region if necessary.  The extra space will be usable after a
+             * guest reset.
+             */
+            new_block = qemu_ram_alloc_from_fd(size, max_size, resized, mr,
+                                               ram_flags, fd, 0, reused, NULL);
+            if (new_block) {
+                trace_qemu_ram_alloc_shared(name, new_block->used_length,
+                                            new_block->max_length, fd,
+                                            new_block->host);
+                return new_block;
+            }
+
+            cpr_delete_fd(name, 0);
+            close(fd);
+            /* fall back to anon allocation */
+        }
+    }
+#endif
 
 
     align = qemu_real_host_page_size();
     align = qemu_real_host_page_size();
     align = MAX(align, TARGET_PAGE_SIZE);
     align = MAX(align, TARGET_PAGE_SIZE);
@@ -2083,7 +2189,6 @@ RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
     new_block->resized = resized;
     new_block->resized = resized;
     new_block->used_length = size;
     new_block->used_length = size;
     new_block->max_length = max_size;
     new_block->max_length = max_size;
-    assert(max_size >= size);
     new_block->fd = -1;
     new_block->fd = -1;
     new_block->guest_memfd = -1;
     new_block->guest_memfd = -1;
     new_block->page_size = qemu_real_host_page_size();
     new_block->page_size = qemu_real_host_page_size();
@@ -2108,15 +2213,14 @@ RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
 RAMBlock *qemu_ram_alloc(ram_addr_t size, uint32_t ram_flags,
 RAMBlock *qemu_ram_alloc(ram_addr_t size, uint32_t ram_flags,
                          MemoryRegion *mr, Error **errp)
                          MemoryRegion *mr, Error **errp)
 {
 {
-    assert((ram_flags & ~(RAM_SHARED | RAM_NORESERVE | RAM_GUEST_MEMFD)) == 0);
+    assert((ram_flags & ~(RAM_SHARED | RAM_NORESERVE | RAM_GUEST_MEMFD |
+                          RAM_PRIVATE)) == 0);
     return qemu_ram_alloc_internal(size, size, NULL, NULL, ram_flags, mr, errp);
     return qemu_ram_alloc_internal(size, size, NULL, NULL, ram_flags, mr, errp);
 }
 }
 
 
 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
-                                     void (*resized)(const char*,
-                                                     uint64_t length,
-                                                     void *host),
-                                     MemoryRegion *mr, Error **errp)
+                                    qemu_ram_resize_cb resized,
+                                    MemoryRegion *mr, Error **errp)
 {
 {
     return qemu_ram_alloc_internal(size, maxsz, resized, NULL,
     return qemu_ram_alloc_internal(size, maxsz, resized, NULL,
                                    RAM_RESIZEABLE, mr, errp);
                                    RAM_RESIZEABLE, mr, errp);
@@ -2147,6 +2251,8 @@ static void reclaim_ramblock(RAMBlock *block)
 
 
 void qemu_ram_free(RAMBlock *block)
 void qemu_ram_free(RAMBlock *block)
 {
 {
+    g_autofree char *name = NULL;
+
     if (!block) {
     if (!block) {
         return;
         return;
     }
     }
@@ -2157,6 +2263,8 @@ void qemu_ram_free(RAMBlock *block)
     }
     }
 
 
     qemu_mutex_lock_ramlist();
     qemu_mutex_lock_ramlist();
+    name = cpr_name(block->mr);
+    cpr_delete_fd(name, 0);
     QLIST_REMOVE_RCU(block, next);
     QLIST_REMOVE_RCU(block, next);
     ram_list.mru_block = NULL;
     ram_list.mru_block = NULL;
     /* Write list before version */
     /* Write list before version */

+ 1 - 0
system/trace-events

@@ -33,6 +33,7 @@ address_space_map(void *as, uint64_t addr, uint64_t len, bool is_write, uint32_t
 find_ram_offset(uint64_t size, uint64_t offset) "size: 0x%" PRIx64 " @ 0x%" PRIx64
 find_ram_offset(uint64_t size, uint64_t offset) "size: 0x%" PRIx64 " @ 0x%" PRIx64
 find_ram_offset_loop(uint64_t size, uint64_t candidate, uint64_t offset, uint64_t next, uint64_t mingap) "trying size: 0x%" PRIx64 " @ 0x%" PRIx64 ", offset: 0x%" PRIx64" next: 0x%" PRIx64 " mingap: 0x%" PRIx64
 find_ram_offset_loop(uint64_t size, uint64_t candidate, uint64_t offset, uint64_t next, uint64_t mingap) "trying size: 0x%" PRIx64 " @ 0x%" PRIx64 ", offset: 0x%" PRIx64" next: 0x%" PRIx64 " mingap: 0x%" PRIx64
 ram_block_discard_range(const char *rbname, void *hva, size_t length, bool need_madvise, bool need_fallocate, int ret) "%s@%p + 0x%zx: madvise: %d fallocate: %d ret: %d"
 ram_block_discard_range(const char *rbname, void *hva, size_t length, bool need_madvise, bool need_fallocate, int ret) "%s@%p + 0x%zx: madvise: %d fallocate: %d ret: %d"
+qemu_ram_alloc_shared(const char *name, size_t size, size_t max_size, int fd, void *host) "%s size %zu max_size %zu fd %d host %p"
 
 
 # cpus.c
 # cpus.c
 vm_stop_flush_all(int ret) "ret %d"
 vm_stop_flush_all(int ret) "ret %d"

+ 40 - 3
system/vl.c

@@ -77,6 +77,7 @@
 #include "hw/block/block.h"
 #include "hw/block/block.h"
 #include "hw/i386/x86.h"
 #include "hw/i386/x86.h"
 #include "hw/i386/pc.h"
 #include "hw/i386/pc.h"
+#include "migration/cpr.h"
 #include "migration/misc.h"
 #include "migration/misc.h"
 #include "migration/snapshot.h"
 #include "migration/snapshot.h"
 #include "system/tpm.h"
 #include "system/tpm.h"
@@ -123,6 +124,7 @@
 #include "qapi/qapi-visit-block-core.h"
 #include "qapi/qapi-visit-block-core.h"
 #include "qapi/qapi-visit-compat.h"
 #include "qapi/qapi-visit-compat.h"
 #include "qapi/qapi-visit-machine.h"
 #include "qapi/qapi-visit-machine.h"
+#include "qapi/qapi-visit-migration.h"
 #include "qapi/qapi-visit-ui.h"
 #include "qapi/qapi-visit-ui.h"
 #include "qapi/qapi-commands-block-core.h"
 #include "qapi/qapi-commands-block-core.h"
 #include "qapi/qapi-commands-migration.h"
 #include "qapi/qapi-commands-migration.h"
@@ -159,6 +161,8 @@ typedef struct DeviceOption {
 static const char *cpu_option;
 static const char *cpu_option;
 static const char *mem_path;
 static const char *mem_path;
 static const char *incoming;
 static const char *incoming;
+static const char *incoming_str[MIGRATION_CHANNEL_TYPE__MAX];
+static MigrationChannel *incoming_channels[MIGRATION_CHANNEL_TYPE__MAX];
 static const char *loadvm;
 static const char *loadvm;
 static const char *accelerators;
 static const char *accelerators;
 static bool have_custom_ram_size;
 static bool have_custom_ram_size;
@@ -1813,6 +1817,30 @@ static void object_option_add_visitor(Visitor *v)
     QTAILQ_INSERT_TAIL(&object_opts, opt, next);
     QTAILQ_INSERT_TAIL(&object_opts, opt, next);
 }
 }
 
 
+static void incoming_option_parse(const char *str)
+{
+    MigrationChannelType type = MIGRATION_CHANNEL_TYPE_MAIN;
+    MigrationChannel *channel;
+    Visitor *v;
+
+    if (!strcmp(str, "defer")) {
+        channel = NULL;
+    } else if (migrate_is_uri(str)) {
+        migrate_uri_parse(str, &channel, &error_fatal);
+    } else {
+        v = qobject_input_visitor_new_str(str, "channel-type", &error_fatal);
+        visit_type_MigrationChannel(v, NULL, &channel, &error_fatal);
+        visit_free(v);
+        type = channel->channel_type;
+    }
+
+    /* New incoming spec replaces the previous */
+    qapi_free_MigrationChannel(incoming_channels[type]);
+    incoming_channels[type] = channel;
+    incoming_str[type] = str;
+    incoming = incoming_str[MIGRATION_CHANNEL_TYPE_MAIN];
+}
+
 static void object_option_parse(const char *str)
 static void object_option_parse(const char *str)
 {
 {
     QemuOpts *opts;
     QemuOpts *opts;
@@ -2738,8 +2766,11 @@ void qmp_x_exit_preconfig(Error **errp)
     if (incoming) {
     if (incoming) {
         Error *local_err = NULL;
         Error *local_err = NULL;
         if (strcmp(incoming, "defer") != 0) {
         if (strcmp(incoming, "defer") != 0) {
-            qmp_migrate_incoming(incoming, false, NULL, true, true,
-                                 &local_err);
+            g_autofree MigrationChannelList *channels =
+                g_new0(MigrationChannelList, 1);
+
+            channels->value = incoming_channels[MIGRATION_CHANNEL_TYPE_MAIN];
+            qmp_migrate_incoming(NULL, true, channels, true, true, &local_err);
             if (local_err) {
             if (local_err) {
                 error_reportf_err(local_err, "-incoming %s: ", incoming);
                 error_reportf_err(local_err, "-incoming %s: ", incoming);
                 exit(1);
                 exit(1);
@@ -3458,7 +3489,7 @@ void qemu_init(int argc, char **argv)
                 if (!incoming) {
                 if (!incoming) {
                     runstate_set(RUN_STATE_INMIGRATE);
                     runstate_set(RUN_STATE_INMIGRATE);
                 }
                 }
-                incoming = optarg;
+                incoming_option_parse(optarg);
                 break;
                 break;
             case QEMU_OPTION_only_migratable:
             case QEMU_OPTION_only_migratable:
                 only_migratable = 1;
                 only_migratable = 1;
@@ -3676,6 +3707,12 @@ void qemu_init(int argc, char **argv)
 
 
     qemu_create_machine(machine_opts_dict);
     qemu_create_machine(machine_opts_dict);
 
 
+    /*
+     * Load incoming CPR state before any devices are created, because it
+     * contains file descriptors that are needed in device initialization code.
+     */
+    cpr_state_load(incoming_channels[MIGRATION_CHANNEL_TYPE_CPR], &error_fatal);
+
     suspend_mux_open();
     suspend_mux_open();
 
 
     qemu_disable_default_devices();
     qemu_disable_default_devices();

+ 1 - 0
tests/qemu-iotests/194.out

@@ -14,6 +14,7 @@ Starting migration...
 {"return": {}}
 {"return": {}}
 {"data": {"status": "setup"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "setup"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "active"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "active"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
+{"data": {"status": "device"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "completed"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "completed"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 Gracefully ending the `drive-mirror` job on source...
 Gracefully ending the `drive-mirror` job on source...
 {"return": {}}
 {"return": {}}

+ 1 - 0
tests/qemu-iotests/203.out

@@ -8,4 +8,5 @@ Starting migration...
 {"return": {}}
 {"return": {}}
 {"data": {"status": "setup"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "setup"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "active"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "active"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
+{"data": {"status": "device"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "completed"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "completed"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}

+ 2 - 0
tests/qemu-iotests/234.out

@@ -10,6 +10,7 @@ Starting migration to B...
 {"return": {}}
 {"return": {}}
 {"data": {"status": "setup"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "setup"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "active"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "active"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
+{"data": {"status": "device"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "completed"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "completed"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "active"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "active"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "completed"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "completed"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
@@ -27,6 +28,7 @@ Starting migration back to A...
 {"return": {}}
 {"return": {}}
 {"data": {"status": "setup"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "setup"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "active"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "active"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
+{"data": {"status": "device"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "completed"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "completed"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "active"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "active"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "completed"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "completed"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}

+ 1 - 0
tests/qemu-iotests/262.out

@@ -8,6 +8,7 @@ Starting migration to B...
 {"return": {}}
 {"return": {}}
 {"data": {"status": "setup"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "setup"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "active"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "active"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
+{"data": {"status": "device"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "completed"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "completed"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "active"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "active"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "completed"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "completed"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}

+ 1 - 0
tests/qemu-iotests/280.out

@@ -7,6 +7,7 @@ Enabling migration QMP events on VM...
 {"return": {}}
 {"return": {}}
 {"data": {"status": "setup"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "setup"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "active"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "active"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
+{"data": {"status": "device"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "completed"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"status": "completed"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 
 
 VM is now stopped:
 VM is now stopped:

+ 2 - 1
tests/qtest/libqos/libqos.c

@@ -117,13 +117,14 @@ void migrate(QOSState *from, QOSState *to, const char *uri)
         g_assert(qdict_haskey(sub, "status"));
         g_assert(qdict_haskey(sub, "status"));
         st = qdict_get_str(sub, "status");
         st = qdict_get_str(sub, "status");
 
 
-        /* "setup", "active", "completed", "failed", "cancelled" */
+        /* "setup", "active", "device", "completed", "failed", "cancelled" */
         if (strcmp(st, "completed") == 0) {
         if (strcmp(st, "completed") == 0) {
             qobject_unref(rsp);
             qobject_unref(rsp);
             break;
             break;
         }
         }
 
 
         if ((strcmp(st, "setup") == 0) || (strcmp(st, "active") == 0)
         if ((strcmp(st, "setup") == 0) || (strcmp(st, "active") == 0)
+            || (strcmp(st, "device") == 0)
             || (strcmp(st, "wait-unplug") == 0)) {
             || (strcmp(st, "wait-unplug") == 0)) {
             qobject_unref(rsp);
             qobject_unref(rsp);
             g_usleep(5000);
             g_usleep(5000);

+ 68 - 35
tests/qtest/libqtest.c

@@ -75,6 +75,8 @@ struct QTestState
 {
 {
     int fd;
     int fd;
     int qmp_fd;
     int qmp_fd;
+    int sock;
+    int qmpsock;
     pid_t qemu_pid;  /* our child QEMU process */
     pid_t qemu_pid;  /* our child QEMU process */
     int wstatus;
     int wstatus;
 #ifdef _WIN32
 #ifdef _WIN32
@@ -458,18 +460,19 @@ static QTestState *G_GNUC_PRINTF(2, 3) qtest_spawn_qemu(const char *qemu_bin,
     return s;
     return s;
 }
 }
 
 
+static char *qtest_socket_path(const char *suffix)
+{
+    return g_strdup_printf("%s/qtest-%d.%s", g_get_tmp_dir(), getpid(), suffix);
+}
+
 static QTestState *qtest_init_internal(const char *qemu_bin,
 static QTestState *qtest_init_internal(const char *qemu_bin,
-                                       const char *extra_args)
+                                       const char *extra_args,
+                                       bool do_connect)
 {
 {
     QTestState *s;
     QTestState *s;
     int sock, qmpsock, i;
     int sock, qmpsock, i;
-    gchar *socket_path;
-    gchar *qmp_socket_path;
-
-    socket_path = g_strdup_printf("%s/qtest-%d.sock",
-                                  g_get_tmp_dir(), getpid());
-    qmp_socket_path = g_strdup_printf("%s/qtest-%d.qmp",
-                                      g_get_tmp_dir(), getpid());
+    g_autofree gchar *socket_path = qtest_socket_path("sock");
+    g_autofree gchar *qmp_socket_path = qtest_socket_path("qmp");
 
 
     /*
     /*
      * It's possible that if an earlier test run crashed it might
      * It's possible that if an earlier test run crashed it might
@@ -501,22 +504,19 @@ static QTestState *qtest_init_internal(const char *qemu_bin,
     qtest_client_set_rx_handler(s, qtest_client_socket_recv_line);
     qtest_client_set_rx_handler(s, qtest_client_socket_recv_line);
     qtest_client_set_tx_handler(s, qtest_client_socket_send);
     qtest_client_set_tx_handler(s, qtest_client_socket_send);
 
 
-    s->fd = socket_accept(sock);
-    if (s->fd >= 0) {
-        s->qmp_fd = socket_accept(qmpsock);
-    }
-    unlink(socket_path);
-    unlink(qmp_socket_path);
-    g_free(socket_path);
-    g_free(qmp_socket_path);
-
-    g_assert(s->fd >= 0 && s->qmp_fd >= 0);
-
     s->rx = g_string_new("");
     s->rx = g_string_new("");
     for (i = 0; i < MAX_IRQ; i++) {
     for (i = 0; i < MAX_IRQ; i++) {
         s->irq_level[i] = false;
         s->irq_level[i] = false;
     }
     }
 
 
+    s->fd = -1;
+    s->qmp_fd = -1;
+    s->sock = sock;
+    s->qmpsock = qmpsock;
+    if (do_connect) {
+        qtest_connect(s);
+    }
+
     /*
     /*
      * Stopping QEMU for debugging is not supported on Windows.
      * Stopping QEMU for debugging is not supported on Windows.
      *
      *
@@ -531,28 +531,38 @@ static QTestState *qtest_init_internal(const char *qemu_bin,
     }
     }
 #endif
 #endif
 
 
-    /* ask endianness of the target */
+   return s;
+}
 
 
-    s->big_endian = qtest_query_target_endianness(s);
+void qtest_connect(QTestState *s)
+{
+    g_autofree gchar *socket_path = qtest_socket_path("sock");
+    g_autofree gchar *qmp_socket_path = qtest_socket_path("qmp");
 
 
-   return s;
+    g_assert(s->sock >= 0 && s->qmpsock >= 0);
+    s->fd = socket_accept(s->sock);
+    if (s->fd >= 0) {
+        s->qmp_fd = socket_accept(s->qmpsock);
+    }
+    unlink(socket_path);
+    unlink(qmp_socket_path);
+    g_assert(s->fd >= 0 && s->qmp_fd >= 0);
+    s->sock = s->qmpsock = -1;
+    /* ask endianness of the target */
+    s->big_endian = qtest_query_target_endianness(s);
 }
 }
 
 
 QTestState *qtest_init_without_qmp_handshake(const char *extra_args)
 QTestState *qtest_init_without_qmp_handshake(const char *extra_args)
 {
 {
-    return qtest_init_internal(qtest_qemu_binary(NULL), extra_args);
+    return qtest_init_internal(qtest_qemu_binary(NULL), extra_args, true);
 }
 }
 
 
-QTestState *qtest_init_with_env_and_capabilities(const char *var,
-                                                 const char *extra_args,
-                                                 QList *capabilities)
+void qtest_qmp_handshake(QTestState *s, QList *capabilities)
 {
 {
-    QTestState *s = qtest_init_internal(qtest_qemu_binary(var), extra_args);
-    QDict *greeting;
-
     /* Read the QMP greeting and then do the handshake */
     /* Read the QMP greeting and then do the handshake */
-    greeting = qtest_qmp_receive(s);
+    QDict *greeting = qtest_qmp_receive(s);
     qobject_unref(greeting);
     qobject_unref(greeting);
+
     if (capabilities) {
     if (capabilities) {
         qtest_qmp_assert_success(s,
         qtest_qmp_assert_success(s,
                                  "{ 'execute': 'qmp_capabilities', "
                                  "{ 'execute': 'qmp_capabilities', "
@@ -561,18 +571,37 @@ QTestState *qtest_init_with_env_and_capabilities(const char *var,
     } else {
     } else {
         qtest_qmp_assert_success(s, "{ 'execute': 'qmp_capabilities' }");
         qtest_qmp_assert_success(s, "{ 'execute': 'qmp_capabilities' }");
     }
     }
+}
+
+QTestState *qtest_init_with_env_and_capabilities(const char *var,
+                                                 const char *extra_args,
+                                                 QList *capabilities,
+                                                 bool do_connect)
+{
+    QTestState *s = qtest_init_internal(qtest_qemu_binary(var), extra_args,
+                                        do_connect);
 
 
+    if (do_connect) {
+        qtest_qmp_handshake(s, capabilities);
+    } else {
+        /*
+         * If the connection is delayed, the capabilities must be set
+         * at that moment.
+         */
+        assert(!capabilities);
+    }
     return s;
     return s;
 }
 }
 
 
-QTestState *qtest_init_with_env(const char *var, const char *extra_args)
+QTestState *qtest_init_with_env(const char *var, const char *extra_args,
+                                bool do_connect)
 {
 {
-    return qtest_init_with_env_and_capabilities(var, extra_args, NULL);
+    return qtest_init_with_env_and_capabilities(var, extra_args, NULL, true);
 }
 }
 
 
 QTestState *qtest_init(const char *extra_args)
 QTestState *qtest_init(const char *extra_args)
 {
 {
-    return qtest_init_with_env(NULL, extra_args);
+    return qtest_init_with_env(NULL, extra_args, true);
 }
 }
 
 
 QTestState *qtest_vinitf(const char *fmt, va_list ap)
 QTestState *qtest_vinitf(const char *fmt, va_list ap)
@@ -782,6 +811,7 @@ QDict *qtest_qmp_receive(QTestState *s)
 
 
 QDict *qtest_qmp_receive_dict(QTestState *s)
 QDict *qtest_qmp_receive_dict(QTestState *s)
 {
 {
+    g_assert(s->qmp_fd >= 0);
     return qmp_fd_receive(s->qmp_fd);
     return qmp_fd_receive(s->qmp_fd);
 }
 }
 
 
@@ -809,12 +839,14 @@ int qtest_socket_server(const char *socket_path)
 void qtest_qmp_vsend_fds(QTestState *s, int *fds, size_t fds_num,
 void qtest_qmp_vsend_fds(QTestState *s, int *fds, size_t fds_num,
                          const char *fmt, va_list ap)
                          const char *fmt, va_list ap)
 {
 {
+    g_assert(s->qmp_fd >= 0);
     qmp_fd_vsend_fds(s->qmp_fd, fds, fds_num, fmt, ap);
     qmp_fd_vsend_fds(s->qmp_fd, fds, fds_num, fmt, ap);
 }
 }
 #endif
 #endif
 
 
 void qtest_qmp_vsend(QTestState *s, const char *fmt, va_list ap)
 void qtest_qmp_vsend(QTestState *s, const char *fmt, va_list ap)
 {
 {
+    g_assert(s->qmp_fd >= 0);
     qmp_fd_vsend(s->qmp_fd, fmt, ap);
     qmp_fd_vsend(s->qmp_fd, fmt, ap);
 }
 }
 
 
@@ -875,6 +907,7 @@ void qtest_qmp_send_raw(QTestState *s, const char *fmt, ...)
 {
 {
     va_list ap;
     va_list ap;
 
 
+    g_assert(s->qmp_fd >= 0);
     va_start(ap, fmt);
     va_start(ap, fmt);
     qmp_fd_vsend_raw(s->qmp_fd, fmt, ap);
     qmp_fd_vsend_raw(s->qmp_fd, fmt, ap);
     va_end(ap);
     va_end(ap);
@@ -1580,7 +1613,7 @@ static struct MachInfo *qtest_get_machines(const char *var)
 
 
     silence_spawn_log = !g_test_verbose();
     silence_spawn_log = !g_test_verbose();
 
 
-    qts = qtest_init_with_env(qemu_var, "-machine none");
+    qts = qtest_init_with_env(qemu_var, "-machine none", true);
     response = qtest_qmp(qts, "{ 'execute': 'query-machines' }");
     response = qtest_qmp(qts, "{ 'execute': 'query-machines' }");
     g_assert(response);
     g_assert(response);
     list = qdict_get_qlist(response, "return");
     list = qdict_get_qlist(response, "return");
@@ -1635,7 +1668,7 @@ static struct CpuModel *qtest_get_cpu_models(void)
 
 
     silence_spawn_log = !g_test_verbose();
     silence_spawn_log = !g_test_verbose();
 
 
-    qts = qtest_init_with_env(NULL, "-machine none");
+    qts = qtest_init_with_env(NULL, "-machine none", true);
     response = qtest_qmp(qts, "{ 'execute': 'query-cpu-definitions' }");
     response = qtest_qmp(qts, "{ 'execute': 'query-cpu-definitions' }");
     g_assert(response);
     g_assert(response);
     list = qdict_get_qlist(response, "return");
     list = qdict_get_qlist(response, "return");

+ 22 - 2
tests/qtest/libqtest.h

@@ -61,13 +61,15 @@ QTestState *qtest_init(const char *extra_args);
  * @var: Environment variable from where to take the QEMU binary
  * @var: Environment variable from where to take the QEMU binary
  * @extra_args: Other arguments to pass to QEMU.  CAUTION: these
  * @extra_args: Other arguments to pass to QEMU.  CAUTION: these
  * arguments are subject to word splitting and shell evaluation.
  * arguments are subject to word splitting and shell evaluation.
+ * @do_connect: connect to qemu monitor and qtest socket.
  *
  *
  * Like qtest_init(), but use a different environment variable for the
  * Like qtest_init(), but use a different environment variable for the
  * QEMU binary.
  * QEMU binary.
  *
  *
  * Returns: #QTestState instance.
  * Returns: #QTestState instance.
  */
  */
-QTestState *qtest_init_with_env(const char *var, const char *extra_args);
+QTestState *qtest_init_with_env(const char *var, const char *extra_args,
+                                bool do_connect);
 
 
 /**
 /**
  * qtest_init_with_env_and_capabilities:
  * qtest_init_with_env_and_capabilities:
@@ -75,6 +77,7 @@ QTestState *qtest_init_with_env(const char *var, const char *extra_args);
  * @extra_args: Other arguments to pass to QEMU.  CAUTION: these
  * @extra_args: Other arguments to pass to QEMU.  CAUTION: these
  * arguments are subject to word splitting and shell evaluation.
  * arguments are subject to word splitting and shell evaluation.
  * @capabilities: list of QMP capabilities (strings) to enable
  * @capabilities: list of QMP capabilities (strings) to enable
+ * @do_connect: connect to qemu monitor and qtest socket.
  *
  *
  * Like qtest_init_with_env(), but enable specified capabilities during
  * Like qtest_init_with_env(), but enable specified capabilities during
  * hadshake.
  * hadshake.
@@ -83,7 +86,8 @@ QTestState *qtest_init_with_env(const char *var, const char *extra_args);
  */
  */
 QTestState *qtest_init_with_env_and_capabilities(const char *var,
 QTestState *qtest_init_with_env_and_capabilities(const char *var,
                                                  const char *extra_args,
                                                  const char *extra_args,
-                                                 QList *capabilities);
+                                                 QList *capabilities,
+                                                 bool do_connect);
 
 
 /**
 /**
  * qtest_init_without_qmp_handshake:
  * qtest_init_without_qmp_handshake:
@@ -94,6 +98,22 @@ QTestState *qtest_init_with_env_and_capabilities(const char *var,
  */
  */
 QTestState *qtest_init_without_qmp_handshake(const char *extra_args);
 QTestState *qtest_init_without_qmp_handshake(const char *extra_args);
 
 
+/**
+ * qtest_connect
+ * @s: #QTestState instance to connect
+ * Connect to qemu monitor and qtest socket, after skipping them in
+ * qtest_init_with_env.  Does not handshake with the monitor.
+ */
+void qtest_connect(QTestState *s);
+
+/**
+ * qtest_qmp_handshake:
+ * @s: #QTestState instance to operate on.
+ * @capabilities: list of QMP capabilities (strings) to enable
+ * Perform handshake after connecting to qemu monitor.
+ */
+void qtest_qmp_handshake(QTestState *s, QList *capabilities);
+
 /**
 /**
  * qtest_init_with_serial:
  * qtest_init_with_serial:
  * @extra_args: other arguments to pass to QEMU.  CAUTION: these
  * @extra_args: other arguments to pass to QEMU.  CAUTION: these

+ 62 - 0
tests/qtest/migration/cpr-tests.c

@@ -44,6 +44,62 @@ static void test_mode_reboot(void)
     test_file_common(&args, true);
     test_file_common(&args, true);
 }
 }
 
 
+static void *test_mode_transfer_start(QTestState *from, QTestState *to)
+{
+    migrate_set_parameter_str(from, "mode", "cpr-transfer");
+    return NULL;
+}
+
+/*
+ * cpr-transfer mode cannot use the target monitor prior to starting the
+ * migration, and cannot connect synchronously to the monitor, so defer
+ * the target connection.
+ */
+static void test_mode_transfer_common(bool incoming_defer)
+{
+    g_autofree char *cpr_path = g_strdup_printf("%s/cpr.sock", tmpfs);
+    g_autofree char *mig_path = g_strdup_printf("%s/migsocket", tmpfs);
+    g_autofree char *uri = g_strdup_printf("unix:%s", mig_path);
+
+    const char *opts = "-machine aux-ram-share=on -nodefaults";
+    g_autofree const char *cpr_channel = g_strdup_printf(
+        "cpr,addr.transport=socket,addr.type=unix,addr.path=%s",
+        cpr_path);
+    g_autofree char *opts_target = g_strdup_printf("-incoming %s %s",
+                                                   cpr_channel, opts);
+
+    g_autofree char *connect_channels = g_strdup_printf(
+        "[ { 'channel-type': 'main',"
+        "    'addr': { 'transport': 'socket',"
+        "              'type': 'unix',"
+        "              'path': '%s' } } ]",
+        mig_path);
+
+    MigrateCommon args = {
+        .start.opts_source = opts,
+        .start.opts_target = opts_target,
+        .start.defer_target_connect = true,
+        .start.memory_backend = "-object memory-backend-memfd,id=pc.ram,size=%s"
+                                " -machine memory-backend=pc.ram",
+        .listen_uri = incoming_defer ? "defer" : uri,
+        .connect_channels = connect_channels,
+        .cpr_channel = cpr_channel,
+        .start_hook = test_mode_transfer_start,
+    };
+
+    test_precopy_common(&args);
+}
+
+static void test_mode_transfer(void)
+{
+    test_mode_transfer_common(NULL);
+}
+
+static void test_mode_transfer_defer(void)
+{
+    test_mode_transfer_common(true);
+}
+
 void migration_test_add_cpr(MigrationTestEnv *env)
 void migration_test_add_cpr(MigrationTestEnv *env)
 {
 {
     tmpfs = env->tmpfs;
     tmpfs = env->tmpfs;
@@ -55,4 +111,10 @@ void migration_test_add_cpr(MigrationTestEnv *env)
     if (getenv("QEMU_TEST_FLAKY_TESTS")) {
     if (getenv("QEMU_TEST_FLAKY_TESTS")) {
         migration_test_add("/migration/mode/reboot", test_mode_reboot);
         migration_test_add("/migration/mode/reboot", test_mode_reboot);
     }
     }
+
+    if (env->has_kvm) {
+        migration_test_add("/migration/mode/transfer", test_mode_transfer);
+        migration_test_add("/migration/mode/transfer/defer",
+                           test_mode_transfer_defer);
+    }
 }
 }

+ 67 - 13
tests/qtest/migration/framework.c

@@ -18,6 +18,8 @@
 #include "migration/migration-qmp.h"
 #include "migration/migration-qmp.h"
 #include "migration/migration-util.h"
 #include "migration/migration-util.h"
 #include "ppc-util.h"
 #include "ppc-util.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qjson.h"
 #include "qapi/qmp/qlist.h"
 #include "qapi/qmp/qlist.h"
 #include "qemu/module.h"
 #include "qemu/module.h"
 #include "qemu/option.h"
 #include "qemu/option.h"
@@ -196,9 +198,10 @@ static void cleanup(const char *filename)
 
 
 static QList *migrate_start_get_qmp_capabilities(const MigrateStart *args)
 static QList *migrate_start_get_qmp_capabilities(const MigrateStart *args)
 {
 {
-    QList *capabilities = qlist_new();
+    QList *capabilities = NULL;
 
 
     if (args->oob) {
     if (args->oob) {
+        capabilities = qlist_new();
         qlist_append_str(capabilities, "oob");
         qlist_append_str(capabilities, "oob");
     }
     }
     return capabilities;
     return capabilities;
@@ -221,6 +224,8 @@ int migrate_start(QTestState **from, QTestState **to, const char *uri,
     g_autofree char *machine = NULL;
     g_autofree char *machine = NULL;
     const char *bootpath;
     const char *bootpath;
     g_autoptr(QList) capabilities = migrate_start_get_qmp_capabilities(args);
     g_autoptr(QList) capabilities = migrate_start_get_qmp_capabilities(args);
+    g_autofree char *memory_backend = NULL;
+    const char *events;
 
 
     if (args->use_shmem) {
     if (args->use_shmem) {
         if (!g_file_test("/dev/shm", G_FILE_TEST_IS_DIR)) {
         if (!g_file_test("/dev/shm", G_FILE_TEST_IS_DIR)) {
@@ -296,6 +301,12 @@ int migrate_start(QTestState **from, QTestState **to, const char *uri,
             memory_size, shmem_path);
             memory_size, shmem_path);
     }
     }
 
 
+    if (args->memory_backend) {
+        memory_backend = g_strdup_printf(args->memory_backend, memory_size);
+    } else {
+        memory_backend = g_strdup_printf("-m %s ", memory_size);
+    }
+
     if (args->use_dirty_ring) {
     if (args->use_dirty_ring) {
         kvm_opts = ",dirty-ring-size=4096";
         kvm_opts = ",dirty-ring-size=4096";
     }
     }
@@ -314,40 +325,48 @@ int migrate_start(QTestState **from, QTestState **to, const char *uri,
     cmd_source = g_strdup_printf("-accel kvm%s -accel tcg "
     cmd_source = g_strdup_printf("-accel kvm%s -accel tcg "
                                  "-machine %s,%s "
                                  "-machine %s,%s "
                                  "-name source,debug-threads=on "
                                  "-name source,debug-threads=on "
-                                 "-m %s "
+                                 "%s "
                                  "-serial file:%s/src_serial "
                                  "-serial file:%s/src_serial "
                                  "%s %s %s %s",
                                  "%s %s %s %s",
                                  kvm_opts ? kvm_opts : "",
                                  kvm_opts ? kvm_opts : "",
                                  machine, machine_opts,
                                  machine, machine_opts,
-                                 memory_size, tmpfs,
+                                 memory_backend, tmpfs,
                                  arch_opts ? arch_opts : "",
                                  arch_opts ? arch_opts : "",
                                  shmem_opts ? shmem_opts : "",
                                  shmem_opts ? shmem_opts : "",
                                  args->opts_source ? args->opts_source : "",
                                  args->opts_source ? args->opts_source : "",
                                  ignore_stderr);
                                  ignore_stderr);
     if (!args->only_target) {
     if (!args->only_target) {
         *from = qtest_init_with_env_and_capabilities(QEMU_ENV_SRC, cmd_source,
         *from = qtest_init_with_env_and_capabilities(QEMU_ENV_SRC, cmd_source,
-                                                     capabilities);
+                                                     capabilities, true);
         qtest_qmp_set_event_callback(*from,
         qtest_qmp_set_event_callback(*from,
                                      migrate_watch_for_events,
                                      migrate_watch_for_events,
                                      &src_state);
                                      &src_state);
     }
     }
 
 
+    /*
+     * If the monitor connection is deferred, enable events on the command line
+     * so none are missed.  This is for testing only, do not set migration
+     * options like this in general.
+     */
+    events = args->defer_target_connect ? "-global migration.x-events=on" : "";
+
     cmd_target = g_strdup_printf("-accel kvm%s -accel tcg "
     cmd_target = g_strdup_printf("-accel kvm%s -accel tcg "
                                  "-machine %s,%s "
                                  "-machine %s,%s "
                                  "-name target,debug-threads=on "
                                  "-name target,debug-threads=on "
-                                 "-m %s "
+                                 "%s "
                                  "-serial file:%s/dest_serial "
                                  "-serial file:%s/dest_serial "
                                  "-incoming %s "
                                  "-incoming %s "
-                                 "%s %s %s %s",
+                                 "%s %s %s %s %s",
                                  kvm_opts ? kvm_opts : "",
                                  kvm_opts ? kvm_opts : "",
                                  machine, machine_opts,
                                  machine, machine_opts,
-                                 memory_size, tmpfs, uri,
+                                 memory_backend, tmpfs, uri,
+                                 events,
                                  arch_opts ? arch_opts : "",
                                  arch_opts ? arch_opts : "",
                                  shmem_opts ? shmem_opts : "",
                                  shmem_opts ? shmem_opts : "",
                                  args->opts_target ? args->opts_target : "",
                                  args->opts_target ? args->opts_target : "",
                                  ignore_stderr);
                                  ignore_stderr);
     *to = qtest_init_with_env_and_capabilities(QEMU_ENV_DST, cmd_target,
     *to = qtest_init_with_env_and_capabilities(QEMU_ENV_DST, cmd_target,
-                                               capabilities);
+                                               capabilities, !args->defer_target_connect);
     qtest_qmp_set_event_callback(*to,
     qtest_qmp_set_event_callback(*to,
                                  migrate_watch_for_events,
                                  migrate_watch_for_events,
                                  &dst_state);
                                  &dst_state);
@@ -365,7 +384,9 @@ int migrate_start(QTestState **from, QTestState **to, const char *uri,
      * to mimic as closer as that.
      * to mimic as closer as that.
      */
      */
     migrate_set_capability(*from, "events", true);
     migrate_set_capability(*from, "events", true);
-    migrate_set_capability(*to, "events", true);
+    if (!args->defer_target_connect) {
+        migrate_set_capability(*to, "events", true);
+    }
 
 
     return 0;
     return 0;
 }
 }
@@ -399,6 +420,7 @@ void migrate_end(QTestState *from, QTestState *to, bool test_dest)
     qtest_quit(to);
     qtest_quit(to);
 
 
     cleanup("migsocket");
     cleanup("migsocket");
+    cleanup("cpr.sock");
     cleanup("src_serial");
     cleanup("src_serial");
     cleanup("dest_serial");
     cleanup("dest_serial");
     cleanup(FILE_TEST_FILENAME);
     cleanup(FILE_TEST_FILENAME);
@@ -686,6 +708,10 @@ void test_precopy_common(MigrateCommon *args)
 {
 {
     QTestState *from, *to;
     QTestState *from, *to;
     void *data_hook = NULL;
     void *data_hook = NULL;
+    QObject *in_channels = NULL;
+    QObject *out_channels = NULL;
+
+    g_assert(!args->cpr_channel || args->connect_channels);
 
 
     if (migrate_start(&from, &to, args->listen_uri, &args->start)) {
     if (migrate_start(&from, &to, args->listen_uri, &args->start)) {
         return;
         return;
@@ -718,12 +744,40 @@ void test_precopy_common(MigrateCommon *args)
         }
         }
     }
     }
 
 
+    /*
+     * The cpr channel must be included in outgoing channels, but not in
+     * migrate-incoming channels.
+     */
+    if (args->connect_channels) {
+        if (args->start.defer_target_connect &&
+            !strcmp(args->listen_uri, "defer")) {
+            in_channels = qobject_from_json(args->connect_channels,
+                                            &error_abort);
+        }
+        out_channels = qobject_from_json(args->connect_channels, &error_abort);
+
+        if (args->cpr_channel) {
+            QList *channels_list = qobject_to(QList, out_channels);
+            QObject *obj = migrate_str_to_channel(args->cpr_channel);
+
+            qlist_append(channels_list, obj);
+        }
+    }
+
     if (args->result == MIG_TEST_QMP_ERROR) {
     if (args->result == MIG_TEST_QMP_ERROR) {
-        migrate_qmp_fail(from, args->connect_uri, args->connect_channels, "{}");
+        migrate_qmp_fail(from, args->connect_uri, out_channels, "{}");
         goto finish;
         goto finish;
     }
     }
 
 
-    migrate_qmp(from, to, args->connect_uri, args->connect_channels, "{}");
+    migrate_qmp(from, to, args->connect_uri, out_channels, "{}");
+
+    if (args->start.defer_target_connect) {
+        qtest_connect(to);
+        qtest_qmp_handshake(to, NULL);
+        if (!strcmp(args->listen_uri, "defer")) {
+            migrate_incoming_qmp(to, args->connect_uri, in_channels, "{}");
+        }
+    }
 
 
     if (args->result != MIG_TEST_SUCCEED) {
     if (args->result != MIG_TEST_SUCCEED) {
         bool allow_active = args->result == MIG_TEST_FAIL;
         bool allow_active = args->result == MIG_TEST_FAIL;
@@ -868,7 +922,7 @@ void test_file_common(MigrateCommon *args, bool stop_src)
      * We need to wait for the source to finish before starting the
      * We need to wait for the source to finish before starting the
      * destination.
      * destination.
      */
      */
-    migrate_incoming_qmp(to, args->connect_uri, "{}");
+    migrate_incoming_qmp(to, args->connect_uri, NULL, "{}");
     wait_for_migration_complete(to);
     wait_for_migration_complete(to);
 
 
     if (stop_src) {
     if (stop_src) {
@@ -904,7 +958,7 @@ void *migrate_hook_start_precopy_tcp_multifd_common(QTestState *from,
     migrate_set_capability(to, "multifd", true);
     migrate_set_capability(to, "multifd", true);
 
 
     /* Start incoming migration from the 1st socket */
     /* Start incoming migration from the 1st socket */
-    migrate_incoming_qmp(to, "tcp:127.0.0.1:0", "{}");
+    migrate_incoming_qmp(to, "tcp:127.0.0.1:0", NULL, "{}");
 
 
     return NULL;
     return NULL;
 }
 }

+ 11 - 0
tests/qtest/migration/framework.h

@@ -111,6 +111,14 @@ typedef struct {
     bool suspend_me;
     bool suspend_me;
     /* enable OOB QMP capability */
     /* enable OOB QMP capability */
     bool oob;
     bool oob;
+    /*
+     * Format string for the main memory backend, containing one %s where the
+     * size is plugged in.  If omitted, "-m %s" is used.
+     */
+    const char *memory_backend;
+
+    /* Do not connect to target monitor and qtest sockets in qtest_init */
+    bool defer_target_connect;
 } MigrateStart;
 } MigrateStart;
 
 
 typedef enum PostcopyRecoveryFailStage {
 typedef enum PostcopyRecoveryFailStage {
@@ -146,6 +154,9 @@ typedef struct {
      */
      */
     const char *connect_channels;
     const char *connect_channels;
 
 
+    /* Optional: the cpr migration channel, in JSON or dotted keys format */
+    const char *cpr_channel;
+
     /* Optional: callback to run at start to set migration parameters */
     /* Optional: callback to run at start to set migration parameters */
     TestMigrateStartHook start_hook;
     TestMigrateStartHook start_hook;
     /* Optional: callback to run at finish to cleanup */
     /* Optional: callback to run at finish to cleanup */

+ 44 - 9
tests/qtest/migration/migration-qmp.c

@@ -15,9 +15,13 @@
 #include "migration-qmp.h"
 #include "migration-qmp.h"
 #include "migration-util.h"
 #include "migration-util.h"
 #include "qapi/error.h"
 #include "qapi/error.h"
+#include "qapi/qapi-types-migration.h"
+#include "qapi/qapi-visit-migration.h"
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qjson.h"
 #include "qapi/qmp/qjson.h"
 #include "qapi/qmp/qlist.h"
 #include "qapi/qmp/qlist.h"
+#include "qapi/qobject-input-visitor.h"
+#include "qapi/qobject-output-visitor.h"
 
 
 /*
 /*
  * Number of seconds we wait when looking for migration
  * Number of seconds we wait when looking for migration
@@ -47,8 +51,33 @@ void migration_event_wait(QTestState *s, const char *target)
     } while (!found);
     } while (!found);
 }
 }
 
 
+/*
+ * Convert a string representing a single channel to an object.
+ * @str may be in JSON or dotted keys format.
+ */
+QObject *migrate_str_to_channel(const char *str)
+{
+    Visitor *v;
+    MigrationChannel *channel;
+    QObject *obj;
+
+    /* Create the channel */
+    v = qobject_input_visitor_new_str(str, "channel-type", &error_abort);
+    visit_type_MigrationChannel(v, NULL, &channel, &error_abort);
+    visit_free(v);
+
+    /* Create the object */
+    v = qobject_output_visitor_new(&obj);
+    visit_type_MigrationChannel(v, NULL, &channel, &error_abort);
+    visit_complete(v, &obj);
+    visit_free(v);
+
+    qapi_free_MigrationChannel(channel);
+    return obj;
+}
+
 void migrate_qmp_fail(QTestState *who, const char *uri,
 void migrate_qmp_fail(QTestState *who, const char *uri,
-                      const char *channels, const char *fmt, ...)
+                      QObject *channels, const char *fmt, ...)
 {
 {
     va_list ap;
     va_list ap;
     QDict *args, *err;
     QDict *args, *err;
@@ -64,8 +93,7 @@ void migrate_qmp_fail(QTestState *who, const char *uri,
 
 
     g_assert(!qdict_haskey(args, "channels"));
     g_assert(!qdict_haskey(args, "channels"));
     if (channels) {
     if (channels) {
-        QObject *channels_obj = qobject_from_json(channels, &error_abort);
-        qdict_put_obj(args, "channels", channels_obj);
+        qdict_put_obj(args, "channels", channels);
     }
     }
 
 
     err = qtest_qmp_assert_failure_ref(
     err = qtest_qmp_assert_failure_ref(
@@ -82,7 +110,7 @@ void migrate_qmp_fail(QTestState *who, const char *uri,
  * qobject_from_jsonf_nofail()) with "uri": @uri spliced in.
  * qobject_from_jsonf_nofail()) with "uri": @uri spliced in.
  */
  */
 void migrate_qmp(QTestState *who, QTestState *to, const char *uri,
 void migrate_qmp(QTestState *who, QTestState *to, const char *uri,
-                 const char *channels, const char *fmt, ...)
+                 QObject *channels, const char *fmt, ...)
 {
 {
     va_list ap;
     va_list ap;
     QDict *args;
     QDict *args;
@@ -102,10 +130,9 @@ void migrate_qmp(QTestState *who, QTestState *to, const char *uri,
 
 
     g_assert(!qdict_haskey(args, "channels"));
     g_assert(!qdict_haskey(args, "channels"));
     if (channels) {
     if (channels) {
-        QObject *channels_obj = qobject_from_json(channels, &error_abort);
-        QList *channel_list = qobject_to(QList, channels_obj);
+        QList *channel_list = qobject_to(QList, channels);
         migrate_set_ports(to, channel_list);
         migrate_set_ports(to, channel_list);
-        qdict_put_obj(args, "channels", channels_obj);
+        qdict_put_obj(args, "channels", channels);
     }
     }
 
 
     qtest_qmp_assert_success(who,
     qtest_qmp_assert_success(who,
@@ -123,7 +150,8 @@ void migrate_set_capability(QTestState *who, const char *capability,
                              capability, value);
                              capability, value);
 }
 }
 
 
-void migrate_incoming_qmp(QTestState *to, const char *uri, const char *fmt, ...)
+void migrate_incoming_qmp(QTestState *to, const char *uri, QObject *channels,
+                          const char *fmt, ...)
 {
 {
     va_list ap;
     va_list ap;
     QDict *args, *rsp;
     QDict *args, *rsp;
@@ -133,7 +161,14 @@ void migrate_incoming_qmp(QTestState *to, const char *uri, const char *fmt, ...)
     va_end(ap);
     va_end(ap);
 
 
     g_assert(!qdict_haskey(args, "uri"));
     g_assert(!qdict_haskey(args, "uri"));
-    qdict_put_str(args, "uri", uri);
+    if (uri) {
+        qdict_put_str(args, "uri", uri);
+    }
+
+    g_assert(!qdict_haskey(args, "channels"));
+    if (channels) {
+        qdict_put_obj(args, "channels", channels);
+    }
 
 
     /* This function relies on the event to work, make sure it's enabled */
     /* This function relies on the event to work, make sure it's enabled */
     migrate_set_capability(to, "events", true);
     migrate_set_capability(to, "events", true);

+ 6 - 4
tests/qtest/migration/migration-qmp.h

@@ -4,17 +4,19 @@
 
 
 #include "migration-util.h"
 #include "migration-util.h"
 
 
+QObject *migrate_str_to_channel(const char *str);
+
 G_GNUC_PRINTF(4, 5)
 G_GNUC_PRINTF(4, 5)
 void migrate_qmp_fail(QTestState *who, const char *uri,
 void migrate_qmp_fail(QTestState *who, const char *uri,
-                      const char *channels, const char *fmt, ...);
+                      QObject *channels, const char *fmt, ...);
 
 
 G_GNUC_PRINTF(5, 6)
 G_GNUC_PRINTF(5, 6)
 void migrate_qmp(QTestState *who, QTestState *to, const char *uri,
 void migrate_qmp(QTestState *who, QTestState *to, const char *uri,
-                 const char *channels, const char *fmt, ...);
+                 QObject *channels, const char *fmt, ...);
 
 
-G_GNUC_PRINTF(3, 4)
+G_GNUC_PRINTF(4, 5)
 void migrate_incoming_qmp(QTestState *who, const char *uri,
 void migrate_incoming_qmp(QTestState *who, const char *uri,
-                          const char *fmt, ...);
+                          QObject *channels, const char *fmt, ...);
 
 
 void migration_event_wait(QTestState *s, const char *target);
 void migration_event_wait(QTestState *s, const char *target);
 void migrate_set_capability(QTestState *who, const char *capability,
 void migrate_set_capability(QTestState *who, const char *capability,

+ 15 - 8
tests/qtest/migration/migration-util.c

@@ -135,25 +135,32 @@ migrate_get_connect_qdict(QTestState *who)
 
 
 void migrate_set_ports(QTestState *to, QList *channel_list)
 void migrate_set_ports(QTestState *to, QList *channel_list)
 {
 {
-    QDict *addr;
+    g_autoptr(QDict) addr = NULL;
     QListEntry *entry;
     QListEntry *entry;
     const char *addr_port = NULL;
     const char *addr_port = NULL;
 
 
-    addr = migrate_get_connect_qdict(to);
-
     QLIST_FOREACH_ENTRY(channel_list, entry) {
     QLIST_FOREACH_ENTRY(channel_list, entry) {
         QDict *channel = qobject_to(QDict, qlist_entry_obj(entry));
         QDict *channel = qobject_to(QDict, qlist_entry_obj(entry));
         QDict *addrdict = qdict_get_qdict(channel, "addr");
         QDict *addrdict = qdict_get_qdict(channel, "addr");
 
 
-        if (qdict_haskey(addrdict, "port") &&
-            qdict_haskey(addr, "port") &&
-            (strcmp(qdict_get_str(addrdict, "port"), "0") == 0)) {
+        if (!qdict_haskey(addrdict, "port") ||
+            strcmp(qdict_get_str(addrdict, "port"), "0")) {
+            continue;
+        }
+
+        /*
+         * Fetch addr only if needed, so tests that are not yet connected to
+         * the monitor do not query it.  Such tests cannot use port=0.
+         */
+        if (!addr) {
+            addr = migrate_get_connect_qdict(to);
+        }
+
+        if (qdict_haskey(addr, "port")) {
             addr_port = qdict_get_str(addr, "port");
             addr_port = qdict_get_str(addr, "port");
             qdict_put_str(addrdict, "port", addr_port);
             qdict_put_str(addrdict, "port", addr_port);
         }
         }
     }
     }
-
-    qobject_unref(addr);
 }
 }
 
 
 bool migrate_watch_for_events(QTestState *who, const char *name,
 bool migrate_watch_for_events(QTestState *who, const char *name,

+ 8 - 1
tests/qtest/migration/misc-tests.c

@@ -11,6 +11,8 @@
  */
  */
 
 
 #include "qemu/osdep.h"
 #include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qjson.h"
 #include "libqtest.h"
 #include "libqtest.h"
 #include "migration/framework.h"
 #include "migration/framework.h"
 #include "migration/migration-qmp.h"
 #include "migration/migration-qmp.h"
@@ -205,6 +207,7 @@ static void test_validate_uuid_dst_not_set(void)
 static void do_test_validate_uri_channel(MigrateCommon *args)
 static void do_test_validate_uri_channel(MigrateCommon *args)
 {
 {
     QTestState *from, *to;
     QTestState *from, *to;
+    QObject *channels;
 
 
     if (migrate_start(&from, &to, args->listen_uri, &args->start)) {
     if (migrate_start(&from, &to, args->listen_uri, &args->start)) {
         return;
         return;
@@ -217,7 +220,11 @@ static void do_test_validate_uri_channel(MigrateCommon *args)
      * 'uri' and 'channels' validation is checked even before the migration
      * 'uri' and 'channels' validation is checked even before the migration
      * starts.
      * starts.
      */
      */
-    migrate_qmp_fail(from, args->connect_uri, args->connect_channels, "{}");
+    channels = args->connect_channels ?
+               qobject_from_json(args->connect_channels, &error_abort) :
+               NULL;
+    migrate_qmp_fail(from, args->connect_uri, channels, "{}");
+
     migrate_end(from, to, false);
     migrate_end(from, to, false);
 }
 }
 
 

+ 3 - 3
tests/qtest/migration/precopy-tests.c

@@ -152,7 +152,7 @@ static void *migrate_hook_start_fd(QTestState *from,
     close(pair[0]);
     close(pair[0]);
 
 
     /* Start incoming migration from the 1st socket */
     /* Start incoming migration from the 1st socket */
-    migrate_incoming_qmp(to, "fd:fd-mig", "{}");
+    migrate_incoming_qmp(to, "fd:fd-mig", NULL, "{}");
 
 
     /* Send the 2nd socket to the target */
     /* Send the 2nd socket to the target */
     qtest_qmp_fds_assert_success(from, &pair[1], 1,
     qtest_qmp_fds_assert_success(from, &pair[1], 1,
@@ -479,7 +479,7 @@ static void test_multifd_tcp_cancel(void)
     migrate_set_capability(to, "multifd", true);
     migrate_set_capability(to, "multifd", true);
 
 
     /* Start incoming migration from the 1st socket */
     /* Start incoming migration from the 1st socket */
-    migrate_incoming_qmp(to, "tcp:127.0.0.1:0", "{}");
+    migrate_incoming_qmp(to, "tcp:127.0.0.1:0", NULL, "{}");
 
 
     /* Wait for the first serial output from the source */
     /* Wait for the first serial output from the source */
     wait_for_serial("src_serial");
     wait_for_serial("src_serial");
@@ -518,7 +518,7 @@ static void test_multifd_tcp_cancel(void)
     migrate_set_capability(to2, "multifd", true);
     migrate_set_capability(to2, "multifd", true);
 
 
     /* Start incoming migration from the 1st socket */
     /* Start incoming migration from the 1st socket */
-    migrate_incoming_qmp(to2, "tcp:127.0.0.1:0", "{}");
+    migrate_incoming_qmp(to2, "tcp:127.0.0.1:0", NULL, "{}");
 
 
     migrate_ensure_non_converge(from);
     migrate_ensure_non_converge(from);
 
 

+ 4 - 4
tests/qtest/virtio-net-failover.c

@@ -773,7 +773,7 @@ static void test_migrate_in(gconstpointer opaque)
     check_one_card(qts, true, "standby0", MAC_STANDBY0);
     check_one_card(qts, true, "standby0", MAC_STANDBY0);
     check_one_card(qts, false, "primary0", MAC_PRIMARY0);
     check_one_card(qts, false, "primary0", MAC_PRIMARY0);
 
 
-    migrate_incoming_qmp(qts, uri, "{}");
+    migrate_incoming_qmp(qts, uri, NULL, "{}");
 
 
     resp = get_failover_negociated_event(qts);
     resp = get_failover_negociated_event(qts);
     g_assert_cmpstr(qdict_get_str(resp, "device-id"), ==, "standby0");
     g_assert_cmpstr(qdict_get_str(resp, "device-id"), ==, "standby0");
@@ -895,7 +895,7 @@ static void test_off_migrate_in(gconstpointer opaque)
     check_one_card(qts, true, "standby0", MAC_STANDBY0);
     check_one_card(qts, true, "standby0", MAC_STANDBY0);
     check_one_card(qts, true, "primary0", MAC_PRIMARY0);
     check_one_card(qts, true, "primary0", MAC_PRIMARY0);
 
 
-    migrate_incoming_qmp(qts, uri, "{}");
+    migrate_incoming_qmp(qts, uri, NULL, "{}");
 
 
     check_one_card(qts, true, "standby0", MAC_STANDBY0);
     check_one_card(qts, true, "standby0", MAC_STANDBY0);
     check_one_card(qts, true, "primary0", MAC_PRIMARY0);
     check_one_card(qts, true, "primary0", MAC_PRIMARY0);
@@ -1022,7 +1022,7 @@ static void test_guest_off_migrate_in(gconstpointer opaque)
     check_one_card(qts, true, "standby0", MAC_STANDBY0);
     check_one_card(qts, true, "standby0", MAC_STANDBY0);
     check_one_card(qts, false, "primary0", MAC_PRIMARY0);
     check_one_card(qts, false, "primary0", MAC_PRIMARY0);
 
 
-    migrate_incoming_qmp(qts, uri, "{}");
+    migrate_incoming_qmp(qts, uri, NULL, "{}");
 
 
     check_one_card(qts, true, "standby0", MAC_STANDBY0);
     check_one_card(qts, true, "standby0", MAC_STANDBY0);
     check_one_card(qts, false, "primary0", MAC_PRIMARY0);
     check_one_card(qts, false, "primary0", MAC_PRIMARY0);
@@ -1747,7 +1747,7 @@ static void test_multi_in(gconstpointer opaque)
     check_one_card(qts, true, "standby1", MAC_STANDBY1);
     check_one_card(qts, true, "standby1", MAC_STANDBY1);
     check_one_card(qts, false, "primary1", MAC_PRIMARY1);
     check_one_card(qts, false, "primary1", MAC_PRIMARY1);
 
 
-    migrate_incoming_qmp(qts, uri, "{}");
+    migrate_incoming_qmp(qts, uri, NULL, "{}");
 
 
     resp = get_failover_negociated_event(qts);
     resp = get_failover_negociated_event(qts);
     g_assert_cmpstr(qdict_get_str(resp, "device-id"), ==, "standby0");
     g_assert_cmpstr(qdict_get_str(resp, "device-id"), ==, "standby0");

+ 13 - 3
util/memfd.c

@@ -194,17 +194,27 @@ bool qemu_memfd_alloc_check(void)
 /**
 /**
  * qemu_memfd_check():
  * qemu_memfd_check():
  *
  *
- * Check if host supports memfd.
+ * Check if host supports memfd.  Cache the answer for the common case flags=0.
  */
  */
 bool qemu_memfd_check(unsigned int flags)
 bool qemu_memfd_check(unsigned int flags)
 {
 {
 #ifdef CONFIG_LINUX
 #ifdef CONFIG_LINUX
-    int mfd = memfd_create("test", flags | MFD_CLOEXEC);
+    int mfd;
+    static int memfd_check = MEMFD_TODO;
+
+    if (!flags && memfd_check != MEMFD_TODO) {
+        return memfd_check;
+    }
 
 
+    mfd = memfd_create("test", flags | MFD_CLOEXEC);
     if (mfd >= 0) {
     if (mfd >= 0) {
         close(mfd);
         close(mfd);
-        return true;
     }
     }
+    if (!flags) {
+        memfd_check = (mfd >= 0) ? MEMFD_OK : MEMFD_KO;
+    }
+    return (mfd >= 0);
+
 #endif
 #endif
 
 
     return false;
     return false;

+ 52 - 0
util/oslib-posix.c

@@ -931,3 +931,55 @@ void qemu_close_all_open_fd(const int *skip, unsigned int nskip)
         qemu_close_all_open_fd_fallback(skip, nskip, open_max);
         qemu_close_all_open_fd_fallback(skip, nskip, open_max);
     }
     }
 }
 }
+
+int qemu_shm_alloc(size_t size, Error **errp)
+{
+    g_autoptr(GString) shm_name = g_string_new(NULL);
+    int fd, oflag, cur_sequence;
+    static int sequence;
+    mode_t mode;
+
+    cur_sequence = qatomic_fetch_inc(&sequence);
+
+    /*
+     * Let's use `mode = 0` because we don't want other processes to open our
+     * memory unless we share the file descriptor with them.
+     */
+    mode = 0;
+    oflag = O_RDWR | O_CREAT | O_EXCL;
+
+    /*
+     * Some operating systems allow creating anonymous POSIX shared memory
+     * objects (e.g. FreeBSD provides the SHM_ANON constant), but this is not
+     * defined by POSIX, so let's create a unique name.
+     *
+     * From Linux's shm_open(3) man-page:
+     *   For  portable  use,  a shared  memory  object should be identified
+     *   by a name of the form /somename;"
+     */
+    g_string_printf(shm_name, "/qemu-" FMT_pid "-shm-%d", getpid(),
+                    cur_sequence);
+
+    fd = shm_open(shm_name->str, oflag, mode);
+    if (fd < 0) {
+        error_setg_errno(errp, errno,
+                         "failed to create POSIX shared memory");
+        return -1;
+    }
+
+    /*
+     * We have the file descriptor, so we no longer need to expose the
+     * POSIX shared memory object. However it will remain allocated as long as
+     * there are file descriptors pointing to it.
+     */
+    shm_unlink(shm_name->str);
+
+    if (ftruncate(fd, size) == -1) {
+        error_setg_errno(errp, errno,
+                         "failed to resize POSIX shared memory to %zu", size);
+        close(fd);
+        return -1;
+    }
+
+    return fd;
+}

+ 6 - 0
util/oslib-win32.c

@@ -877,3 +877,9 @@ void qemu_win32_map_free(void *ptr, HANDLE h, Error **errp)
     }
     }
     CloseHandle(h);
     CloseHandle(h);
 }
 }
+
+int qemu_shm_alloc(size_t size, Error **errp)
+{
+    error_setg(errp, "Shared memory is not supported.");
+    return -1;
+}