Browse Source

Merge tag 'for-upstream' of https://gitlab.com/bonzini/qemu into staging

* Fixes for SGX
* force_rcu notifiers

# gpg: Signature made Wed 10 Nov 2021 10:57:48 PM CET
# gpg:                using RSA key F13338574B662389866C7682BFFBD25F78C7AE83
# gpg:                issuer "pbonzini@redhat.com"
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>" [full]
# gpg:                 aka "Paolo Bonzini <pbonzini@redhat.com>" [full]

* tag 'for-upstream' of https://gitlab.com/bonzini/qemu:
  sgx: Reset the vEPC regions during VM reboot
  numa: avoid crash with SGX and "info numa"
  accel/tcg: Register a force_rcu notifier
  rcu: Introduce force_rcu notifier
  target/i386: sgx: mark device not user creatable

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Richard Henderson 3 năm trước cách đây
mục cha
commit
1b9fc6d8ba

+ 26 - 0
accel/tcg/tcg-accel-ops-mttcg.c

@@ -28,6 +28,7 @@
 #include "sysemu/tcg.h"
 #include "sysemu/tcg.h"
 #include "sysemu/replay.h"
 #include "sysemu/replay.h"
 #include "qemu/main-loop.h"
 #include "qemu/main-loop.h"
+#include "qemu/notify.h"
 #include "qemu/guest-random.h"
 #include "qemu/guest-random.h"
 #include "exec/exec-all.h"
 #include "exec/exec-all.h"
 #include "hw/boards.h"
 #include "hw/boards.h"
@@ -35,6 +36,26 @@
 #include "tcg-accel-ops.h"
 #include "tcg-accel-ops.h"
 #include "tcg-accel-ops-mttcg.h"
 #include "tcg-accel-ops-mttcg.h"
 
 
+typedef struct MttcgForceRcuNotifier {
+    Notifier notifier;
+    CPUState *cpu;
+} MttcgForceRcuNotifier;
+
+static void do_nothing(CPUState *cpu, run_on_cpu_data d)
+{
+}
+
+static void mttcg_force_rcu(Notifier *notify, void *data)
+{
+    CPUState *cpu = container_of(notify, MttcgForceRcuNotifier, notifier)->cpu;
+
+    /*
+     * Called with rcu_registry_lock held, using async_run_on_cpu() ensures
+     * that there are no deadlocks.
+     */
+    async_run_on_cpu(cpu, do_nothing, RUN_ON_CPU_NULL);
+}
+
 /*
 /*
  * In the multi-threaded case each vCPU has its own thread. The TLS
  * In the multi-threaded case each vCPU has its own thread. The TLS
  * variable current_cpu can be used deep in the code to find the
  * variable current_cpu can be used deep in the code to find the
@@ -43,12 +64,16 @@
 
 
 static void *mttcg_cpu_thread_fn(void *arg)
 static void *mttcg_cpu_thread_fn(void *arg)
 {
 {
+    MttcgForceRcuNotifier force_rcu;
     CPUState *cpu = arg;
     CPUState *cpu = arg;
 
 
     assert(tcg_enabled());
     assert(tcg_enabled());
     g_assert(!icount_enabled());
     g_assert(!icount_enabled());
 
 
     rcu_register_thread();
     rcu_register_thread();
+    force_rcu.notifier.notify = mttcg_force_rcu;
+    force_rcu.cpu = cpu;
+    rcu_add_force_rcu_notifier(&force_rcu.notifier);
     tcg_register_thread();
     tcg_register_thread();
 
 
     qemu_mutex_lock_iothread();
     qemu_mutex_lock_iothread();
@@ -100,6 +125,7 @@ static void *mttcg_cpu_thread_fn(void *arg)
 
 
     tcg_cpus_destroy(cpu);
     tcg_cpus_destroy(cpu);
     qemu_mutex_unlock_iothread();
     qemu_mutex_unlock_iothread();
+    rcu_remove_force_rcu_notifier(&force_rcu.notifier);
     rcu_unregister_thread();
     rcu_unregister_thread();
     return NULL;
     return NULL;
 }
 }

+ 10 - 0
accel/tcg/tcg-accel-ops-rr.c

@@ -28,6 +28,7 @@
 #include "sysemu/tcg.h"
 #include "sysemu/tcg.h"
 #include "sysemu/replay.h"
 #include "sysemu/replay.h"
 #include "qemu/main-loop.h"
 #include "qemu/main-loop.h"
+#include "qemu/notify.h"
 #include "qemu/guest-random.h"
 #include "qemu/guest-random.h"
 #include "exec/exec-all.h"
 #include "exec/exec-all.h"
 
 
@@ -133,6 +134,11 @@ static void rr_deal_with_unplugged_cpus(void)
     }
     }
 }
 }
 
 
+static void rr_force_rcu(Notifier *notify, void *data)
+{
+    rr_kick_next_cpu();
+}
+
 /*
 /*
  * In the single-threaded case each vCPU is simulated in turn. If
  * In the single-threaded case each vCPU is simulated in turn. If
  * there is more than a single vCPU we create a simple timer to kick
  * there is more than a single vCPU we create a simple timer to kick
@@ -143,10 +149,13 @@ static void rr_deal_with_unplugged_cpus(void)
 
 
 static void *rr_cpu_thread_fn(void *arg)
 static void *rr_cpu_thread_fn(void *arg)
 {
 {
+    Notifier force_rcu;
     CPUState *cpu = arg;
     CPUState *cpu = arg;
 
 
     assert(tcg_enabled());
     assert(tcg_enabled());
     rcu_register_thread();
     rcu_register_thread();
+    force_rcu.notify = rr_force_rcu;
+    rcu_add_force_rcu_notifier(&force_rcu);
     tcg_register_thread();
     tcg_register_thread();
 
 
     qemu_mutex_lock_iothread();
     qemu_mutex_lock_iothread();
@@ -255,6 +264,7 @@ static void *rr_cpu_thread_fn(void *arg)
         rr_deal_with_unplugged_cpus();
         rr_deal_with_unplugged_cpus();
     }
     }
 
 
+    rcu_remove_force_rcu_notifier(&force_rcu);
     rcu_unregister_thread();
     rcu_unregister_thread();
     return NULL;
     return NULL;
 }
 }

+ 7 - 0
hw/core/numa.c

@@ -756,6 +756,7 @@ static void numa_stat_memory_devices(NumaNodeMem node_mem[])
     PCDIMMDeviceInfo     *pcdimm_info;
     PCDIMMDeviceInfo     *pcdimm_info;
     VirtioPMEMDeviceInfo *vpi;
     VirtioPMEMDeviceInfo *vpi;
     VirtioMEMDeviceInfo *vmi;
     VirtioMEMDeviceInfo *vmi;
+    SgxEPCDeviceInfo *se;
 
 
     for (info = info_list; info; info = info->next) {
     for (info = info_list; info; info = info->next) {
         MemoryDeviceInfo *value = info->value;
         MemoryDeviceInfo *value = info->value;
@@ -781,6 +782,12 @@ static void numa_stat_memory_devices(NumaNodeMem node_mem[])
                 node_mem[vmi->node].node_mem += vmi->size;
                 node_mem[vmi->node].node_mem += vmi->size;
                 node_mem[vmi->node].node_plugged_mem += vmi->size;
                 node_mem[vmi->node].node_plugged_mem += vmi->size;
                 break;
                 break;
+            case MEMORY_DEVICE_INFO_KIND_SGX_EPC:
+                se = value->u.sgx_epc.data;
+                /* TODO: once we support numa, assign to right node */
+                node_mem[0].node_mem += se->size;
+                node_mem[0].node_plugged_mem += se->size;
+                break;
             default:
             default:
                 g_assert_not_reached();
                 g_assert_not_reached();
             }
             }

+ 1 - 0
hw/i386/sgx-epc.c

@@ -154,6 +154,7 @@ static void sgx_epc_class_init(ObjectClass *oc, void *data)
     dc->realize = sgx_epc_realize;
     dc->realize = sgx_epc_realize;
     dc->unrealize = sgx_epc_unrealize;
     dc->unrealize = sgx_epc_unrealize;
     dc->desc = "SGX EPC section";
     dc->desc = "SGX EPC section";
+    dc->user_creatable = false;
     device_class_set_props(dc, sgx_epc_properties);
     device_class_set_props(dc, sgx_epc_properties);
 
 
     mdc->get_addr = sgx_epc_md_get_addr;
     mdc->get_addr = sgx_epc_md_get_addr;

+ 50 - 0
hw/i386/sgx.c

@@ -21,6 +21,8 @@
 #include "qapi/qapi-commands-misc-target.h"
 #include "qapi/qapi-commands-misc-target.h"
 #include "exec/address-spaces.h"
 #include "exec/address-spaces.h"
 #include "sysemu/hw_accel.h"
 #include "sysemu/hw_accel.h"
+#include "sysemu/reset.h"
+#include <sys/ioctl.h>
 
 
 #define SGX_MAX_EPC_SECTIONS            8
 #define SGX_MAX_EPC_SECTIONS            8
 #define SGX_CPUID_EPC_INVALID           0x0
 #define SGX_CPUID_EPC_INVALID           0x0
@@ -29,6 +31,11 @@
 #define SGX_CPUID_EPC_SECTION           0x1
 #define SGX_CPUID_EPC_SECTION           0x1
 #define SGX_CPUID_EPC_MASK              0xF
 #define SGX_CPUID_EPC_MASK              0xF
 
 
+#define SGX_MAGIC 0xA4
+#define SGX_IOC_VEPC_REMOVE_ALL       _IO(SGX_MAGIC, 0x04)
+
+#define RETRY_NUM                       2
+
 static uint64_t sgx_calc_section_metric(uint64_t low, uint64_t high)
 static uint64_t sgx_calc_section_metric(uint64_t low, uint64_t high)
 {
 {
     return (low & MAKE_64BIT_MASK(12, 20)) +
     return (low & MAKE_64BIT_MASK(12, 20)) +
@@ -59,6 +66,46 @@ static uint64_t sgx_calc_host_epc_section_size(void)
     return size;
     return size;
 }
 }
 
 
+static void sgx_epc_reset(void *opaque)
+{
+    PCMachineState *pcms = PC_MACHINE(qdev_get_machine());
+    HostMemoryBackend *hostmem;
+    SGXEPCDevice *epc;
+    int failures;
+    int fd, i, j, r;
+    static bool warned = false;
+
+    /*
+     * The second pass is needed to remove SECS pages that could not
+     * be removed during the first.
+     */
+    for (i = 0; i < RETRY_NUM; i++) {
+        failures = 0;
+        for (j = 0; j < pcms->sgx_epc.nr_sections; j++) {
+            epc = pcms->sgx_epc.sections[j];
+            hostmem = MEMORY_BACKEND(epc->hostmem);
+            fd = memory_region_get_fd(host_memory_backend_get_memory(hostmem));
+
+            r = ioctl(fd, SGX_IOC_VEPC_REMOVE_ALL);
+            if (r == -ENOTTY && !warned) {
+                warned = true;
+                warn_report("kernel does not support SGX_IOC_VEPC_REMOVE_ALL");
+                warn_report("SGX might operate incorrectly in the guest after reset");
+                break;
+            } else if (r > 0) {
+                /* SECS pages remain */
+                failures++;
+                if (i == 1) {
+                    error_report("cannot reset vEPC section %d", j);
+                }
+            }
+        }
+        if (!failures) {
+            break;
+        }
+     }
+}
+
 SGXInfo *qmp_query_sgx_capabilities(Error **errp)
 SGXInfo *qmp_query_sgx_capabilities(Error **errp)
 {
 {
     SGXInfo *info = NULL;
     SGXInfo *info = NULL;
@@ -190,4 +237,7 @@ void pc_machine_init_sgx_epc(PCMachineState *pcms)
     }
     }
 
 
     memory_region_set_size(&sgx_epc->mr, sgx_epc->size);
     memory_region_set_size(&sgx_epc->mr, sgx_epc->size);
+
+    /* register the reset callback for sgx epc */
+    qemu_register_reset(sgx_epc_reset, NULL);
 }
 }

+ 15 - 0
include/qemu/rcu.h

@@ -27,6 +27,7 @@
 #include "qemu/thread.h"
 #include "qemu/thread.h"
 #include "qemu/queue.h"
 #include "qemu/queue.h"
 #include "qemu/atomic.h"
 #include "qemu/atomic.h"
+#include "qemu/notify.h"
 #include "qemu/sys_membarrier.h"
 #include "qemu/sys_membarrier.h"
 
 
 #ifdef __cplusplus
 #ifdef __cplusplus
@@ -66,6 +67,13 @@ struct rcu_reader_data {
 
 
     /* Data used for registry, protected by rcu_registry_lock */
     /* Data used for registry, protected by rcu_registry_lock */
     QLIST_ENTRY(rcu_reader_data) node;
     QLIST_ENTRY(rcu_reader_data) node;
+
+    /*
+     * NotifierList used to force an RCU grace period.  Accessed under
+     * rcu_registry_lock.  Note that the notifier is called _outside_
+     * the thread!
+     */
+    NotifierList force_rcu;
 };
 };
 
 
 extern __thread struct rcu_reader_data rcu_reader;
 extern __thread struct rcu_reader_data rcu_reader;
@@ -180,6 +188,13 @@ G_DEFINE_AUTOPTR_CLEANUP_FUNC(RCUReadAuto, rcu_read_auto_unlock)
 #define RCU_READ_LOCK_GUARD() \
 #define RCU_READ_LOCK_GUARD() \
     g_autoptr(RCUReadAuto) _rcu_read_auto __attribute__((unused)) = rcu_read_auto_lock()
     g_autoptr(RCUReadAuto) _rcu_read_auto __attribute__((unused)) = rcu_read_auto_lock()
 
 
+/*
+ * Force-RCU notifiers tell readers that they should exit their
+ * read-side critical section.
+ */
+void rcu_add_force_rcu_notifier(Notifier *n);
+void rcu_remove_force_rcu_notifier(Notifier *n);
+
 #ifdef __cplusplus
 #ifdef __cplusplus
 }
 }
 #endif
 #endif

+ 19 - 0
util/rcu.c

@@ -46,6 +46,7 @@
 unsigned long rcu_gp_ctr = RCU_GP_LOCKED;
 unsigned long rcu_gp_ctr = RCU_GP_LOCKED;
 
 
 QemuEvent rcu_gp_event;
 QemuEvent rcu_gp_event;
+static int in_drain_call_rcu;
 static QemuMutex rcu_registry_lock;
 static QemuMutex rcu_registry_lock;
 static QemuMutex rcu_sync_lock;
 static QemuMutex rcu_sync_lock;
 
 
@@ -107,6 +108,8 @@ static void wait_for_readers(void)
                  * get some extra futex wakeups.
                  * get some extra futex wakeups.
                  */
                  */
                 qatomic_set(&index->waiting, false);
                 qatomic_set(&index->waiting, false);
+            } else if (qatomic_read(&in_drain_call_rcu)) {
+                notifier_list_notify(&index->force_rcu, NULL);
             }
             }
         }
         }
 
 
@@ -339,8 +342,10 @@ void drain_call_rcu(void)
      * assumed.
      * assumed.
      */
      */
 
 
+    qatomic_inc(&in_drain_call_rcu);
     call_rcu1(&rcu_drain.rcu, drain_rcu_callback);
     call_rcu1(&rcu_drain.rcu, drain_rcu_callback);
     qemu_event_wait(&rcu_drain.drain_complete_event);
     qemu_event_wait(&rcu_drain.drain_complete_event);
+    qatomic_dec(&in_drain_call_rcu);
 
 
     if (locked) {
     if (locked) {
         qemu_mutex_lock_iothread();
         qemu_mutex_lock_iothread();
@@ -363,6 +368,20 @@ void rcu_unregister_thread(void)
     qemu_mutex_unlock(&rcu_registry_lock);
     qemu_mutex_unlock(&rcu_registry_lock);
 }
 }
 
 
+void rcu_add_force_rcu_notifier(Notifier *n)
+{
+    qemu_mutex_lock(&rcu_registry_lock);
+    notifier_list_add(&rcu_reader.force_rcu, n);
+    qemu_mutex_unlock(&rcu_registry_lock);
+}
+
+void rcu_remove_force_rcu_notifier(Notifier *n)
+{
+    qemu_mutex_lock(&rcu_registry_lock);
+    notifier_remove(n);
+    qemu_mutex_unlock(&rcu_registry_lock);
+}
+
 static void rcu_init_complete(void)
 static void rcu_init_complete(void)
 {
 {
     QemuThread thread;
     QemuThread thread;