Selaa lähdekoodia

Merge tag 'pull-vfio-20241024' of https://github.com/legoater/qemu into staging

vfio queue:

* Fixed size reported in vfio_state_pending_exact()
* Added support for PMD or PUD aligned mappings

# -----BEGIN PGP SIGNATURE-----
#
# iQIzBAABCAAdFiEEoPZlSPBIlev+awtgUaNDx8/77KEFAmcZ22wACgkQUaNDx8/7
# 7KHU5g/8Cr1487IJQb5cbpLu2Nviu3wjzhbCFFdbl99uLifdc0GK1P6fqDNQ7BVx
# 2vpZgJRXLTxlUSTpreFw4z6TH7/C4HoNiluQV4l0vxqG/Y9q68SJBpT9WENwXUyY
# +2laDmGQbUjDznxIFlmCgZZAssCIJNp0esNE9hvwkQCarZx9m+QQSSkeVHVWNFqX
# +zTd4v076Q9hi53+4e7FlqFKaFoa54IcZe3gz+GjY/IXMqCDNFw9e9xJxML+zSg3
# HZ4/YMQj+EsKX2gm460EYBmt13kd0wdtFzA1MNc7XcSlBlLk/WmezpEzHZRubiLs
# mbUZ68/cweJmrO0WatycWg9JwQ2q9FlKH1Acgun4Fcf8Zov5ovHuYAsWYbdGDbN1
# E7pY/XlUf6b7Vk+yAGTnKKRi6OguTEmVyRRFy/4V8TwvZNycbeOMebKilGQUGfKj
# iLWuzF6NilT4ZGo7sWnlLZWcmrxN57wJh77GlmcqiqguskB8WGdh/SZSVCkkzr3y
# PN3FGSTseNaxalcjECEFnfE8+bUShLei+I6fppTfqLBaLHJ72lRel0Cg07FS8oM4
# 3ev7etH7jFT5xET00DBamDXacgNtLqFqO6XIK3bFTkLmP0FFQi9u+bvy04IyTVCC
# gd9Zg2vhxp0mjuwtelB+i7yD3pmA2LWFkEzoShpkH/h38CnpoyQ=
# =+69I
# -----END PGP SIGNATURE-----
# gpg: Signature made Thu 24 Oct 2024 06:30:20 BST
# gpg:                using RSA key A0F66548F04895EBFE6B0B6051A343C7CFFBECA1
# gpg: Good signature from "Cédric Le Goater <clg@redhat.com>" [full]
# gpg:                 aka "Cédric Le Goater <clg@kaod.org>" [full]
# Primary key fingerprint: A0F6 6548 F048 95EB FE6B  0B60 51A3 43C7 CFFB ECA1

* tag 'pull-vfio-20241024' of https://github.com/legoater/qemu:
  vfio/helpers: Align mmaps
  vfio/helpers: Refactor vfio_region_mmap() error handling
  vfio/migration: Change trace formats from hex to decimal
  vfio/migration: Report only stop-copy size in vfio_state_pending_exact()

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Peter Maydell 10 kuukautta sitten
vanhempi
commit
94be8fd692
3 muutettua tiedostoa jossa 52 lisäystä ja 27 poistoa
  1. 47 19
      hw/vfio/helpers.c
  2. 0 3
      hw/vfio/migration.c
  3. 5 5
      hw/vfio/trace-events

+ 47 - 19
hw/vfio/helpers.c

@@ -27,6 +27,7 @@
 #include "trace.h"
 #include "qapi/error.h"
 #include "qemu/error-report.h"
+#include "qemu/units.h"
 #include "monitor/monitor.h"
 
 /*
@@ -395,7 +396,7 @@ static void vfio_subregion_unmap(VFIORegion *region, int index)
 
 int vfio_region_mmap(VFIORegion *region)
 {
-    int i, prot = 0;
+    int i, ret, prot = 0;
     char *name;
 
     if (!region->mem) {
@@ -406,27 +407,40 @@ int vfio_region_mmap(VFIORegion *region)
     prot |= region->flags & VFIO_REGION_INFO_FLAG_WRITE ? PROT_WRITE : 0;
 
     for (i = 0; i < region->nr_mmaps; i++) {
-        region->mmaps[i].mmap = mmap(NULL, region->mmaps[i].size, prot,
-                                     MAP_SHARED, region->vbasedev->fd,
-                                     region->fd_offset +
-                                     region->mmaps[i].offset);
-        if (region->mmaps[i].mmap == MAP_FAILED) {
-            int ret = -errno;
+        size_t align = MIN(1ULL << ctz64(region->mmaps[i].size), 1 * GiB);
+        void *map_base, *map_align;
 
-            trace_vfio_region_mmap_fault(memory_region_name(region->mem), i,
-                                         region->fd_offset +
-                                         region->mmaps[i].offset,
-                                         region->fd_offset +
-                                         region->mmaps[i].offset +
-                                         region->mmaps[i].size - 1, ret);
-
-            region->mmaps[i].mmap = NULL;
+        /*
+         * Align the mmap for more efficient mapping in the kernel.  Ideally
+         * we'd know the PMD and PUD mapping sizes to use as discrete alignment
+         * intervals, but we don't.  As of Linux v6.12, the largest PUD size
+         * supporting huge pfnmap is 1GiB (ARCH_SUPPORTS_PUD_PFNMAP is only set
+         * on x86_64).  Align by power-of-two size, capped at 1GiB.
+         *
+         * NB. qemu_memalign() and friends actually allocate memory, whereas
+         * the region size here can exceed host memory, therefore we manually
+         * create an oversized anonymous mapping and clean it up for alignment.
+         */
+        map_base = mmap(0, region->mmaps[i].size + align, PROT_NONE,
+                        MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+        if (map_base == MAP_FAILED) {
+            ret = -errno;
+            goto no_mmap;
+        }
 
-            for (i--; i >= 0; i--) {
-                vfio_subregion_unmap(region, i);
-            }
+        map_align = (void *)ROUND_UP((uintptr_t)map_base, (uintptr_t)align);
+        munmap(map_base, map_align - map_base);
+        munmap(map_align + region->mmaps[i].size,
+               align - (map_align - map_base));
 
-            return ret;
+        region->mmaps[i].mmap = mmap(map_align, region->mmaps[i].size, prot,
+                                     MAP_SHARED | MAP_FIXED,
+                                     region->vbasedev->fd,
+                                     region->fd_offset +
+                                     region->mmaps[i].offset);
+        if (region->mmaps[i].mmap == MAP_FAILED) {
+            ret = -errno;
+            goto no_mmap;
         }
 
         name = g_strdup_printf("%s mmaps[%d]",
@@ -446,6 +460,20 @@ int vfio_region_mmap(VFIORegion *region)
     }
 
     return 0;
+
+no_mmap:
+    trace_vfio_region_mmap_fault(memory_region_name(region->mem), i,
+                                 region->fd_offset + region->mmaps[i].offset,
+                                 region->fd_offset + region->mmaps[i].offset +
+                                 region->mmaps[i].size - 1, ret);
+
+    region->mmaps[i].mmap = NULL;
+
+    for (i--; i >= 0; i--) {
+        vfio_subregion_unmap(region, i);
+    }
+
+    return ret;
 }
 
 void vfio_region_unmap(VFIORegion *region)

+ 0 - 3
hw/vfio/migration.c

@@ -576,9 +576,6 @@ static void vfio_state_pending_exact(void *opaque, uint64_t *must_precopy,
 
     if (vfio_device_state_is_precopy(vbasedev)) {
         vfio_query_precopy_size(migration);
-
-        *must_precopy +=
-            migration->precopy_init_size + migration->precopy_dirty_size;
     }
 
     trace_vfio_state_pending_exact(vbasedev->name, *must_precopy, *can_postcopy,

+ 5 - 5
hw/vfio/trace-events

@@ -151,7 +151,7 @@ vfio_display_edid_write_error(void) ""
 vfio_load_cleanup(const char *name) " (%s)"
 vfio_load_device_config_state(const char *name) " (%s)"
 vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64
-vfio_load_state_device_data(const char *name, uint64_t data_size, int ret) " (%s) size 0x%"PRIx64" ret %d"
+vfio_load_state_device_data(const char *name, uint64_t data_size, int ret) " (%s) size %"PRIu64" ret %d"
 vfio_migration_realize(const char *name) " (%s)"
 vfio_migration_set_device_state(const char *name, const char *state) " (%s) state %s"
 vfio_migration_set_state(const char *name, const char *new_state, const char *recover_state) " (%s) new state %s, recover state %s"
@@ -160,10 +160,10 @@ vfio_save_block(const char *name, int data_size) " (%s) data_size %d"
 vfio_save_cleanup(const char *name) " (%s)"
 vfio_save_complete_precopy(const char *name, int ret) " (%s) ret %d"
 vfio_save_device_config_state(const char *name) " (%s)"
-vfio_save_iterate(const char *name, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy initial size 0x%"PRIx64" precopy dirty size 0x%"PRIx64
-vfio_save_setup(const char *name, uint64_t data_buffer_size) " (%s) data buffer size 0x%"PRIx64
-vfio_state_pending_estimate(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" precopy initial size 0x%"PRIx64" precopy dirty size 0x%"PRIx64
-vfio_state_pending_exact(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t stopcopy_size, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" stopcopy size 0x%"PRIx64" precopy initial size 0x%"PRIx64" precopy dirty size 0x%"PRIx64
+vfio_save_iterate(const char *name, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy initial size %"PRIu64" precopy dirty size %"PRIu64
+vfio_save_setup(const char *name, uint64_t data_buffer_size) " (%s) data buffer size %"PRIu64
+vfio_state_pending_estimate(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy %"PRIu64" postcopy %"PRIu64" precopy initial size %"PRIu64" precopy dirty size %"PRIu64
+vfio_state_pending_exact(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t stopcopy_size, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy %"PRIu64" postcopy %"PRIu64" stopcopy size %"PRIu64" precopy initial size %"PRIu64" precopy dirty size %"PRIu64
 vfio_vmstate_change(const char *name, int running, const char *reason, const char *dev_state) " (%s) running %d reason %s device state %s"
 vfio_vmstate_change_prepare(const char *name, int running, const char *reason, const char *dev_state) " (%s) running %d reason %s device state %s"